From a610dd723a233aed1abe31aa32e3137b23b5f983 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 13 Oct 2017 12:28:26 +0000
Subject: OpenBlas: preparing for dgemm use
---
src/fastblas.cpp | 212 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 212 insertions(+)
create mode 100644 src/fastblas.cpp
(limited to 'src/fastblas.cpp')
diff --git a/src/fastblas.cpp b/src/fastblas.cpp
new file mode 100644
index 0000000..38ca326
--- /dev/null
+++ b/src/fastblas.cpp
@@ -0,0 +1,212 @@
+/*
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011-2017, Xiang Zhou
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#include "gsl/gsl_matrix.h"
+#include
+#include
+#include
+#include
+#include "debug.h"
+#include "fastblas.h"
+#include "mathfunc.h"
+
+using namespace std;
+
+/*
+ Reasonably fast function to copy data from standard C array into
+ gsl_matrix. Avoid it for performance critical sections.
+*/
+gsl_matrix *fast_copy(gsl_matrix *m, const double *mem) {
+ auto rows = m->size1;
+ auto cols = m->size2;
+ if (is_strict_mode()) { // slower correct version
+ for (auto r=0; rsize == cols);
+ assert(v->block->size == cols);
+ assert(v->stride == 1);
+ memcpy(v->block->data,&mem[r*cols],cols*sizeof(double));
+ gsl_matrix_set_row(m,r,v);
+ }
+ }
+ return m;
+}
+
+/*
+ Helper function fast_cblas_dgemm runs the local dgemm
+*/
+void fast_cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order,
+ OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
+ OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
+ OPENBLAS_CONST blasint M,
+ OPENBLAS_CONST blasint N,
+ OPENBLAS_CONST blasint K,
+ OPENBLAS_CONST double alpha,
+ OPENBLAS_CONST double *A,
+ OPENBLAS_CONST blasint lda,
+ OPENBLAS_CONST double *B,
+ OPENBLAS_CONST blasint ldb,
+ OPENBLAS_CONST double beta,
+ double *C,
+ OPENBLAS_CONST blasint ldc) {
+#ifndef NDEBUG
+ size_t i,j;
+ if (is_debug_mode()) {
+ printf (" Top left corner of matrix A: \n");
+ for (i=0; isize1;
+ auto colsA = A->size2;
+ blasint M = A->size1;
+ blasint K = B->size1;
+ assert(K == colsA);
+ blasint N = B->size2;
+ // cout << M << "," << N "," << K << endl;
+ // Layout = CblasRowMajor: Trans: K , NoTrans M
+ blasint lda = (transA==CblasNoTrans ? K : M );
+ blasint ldb = (transB==CblasNoTrans ? N : K );
+ blasint ldc = N;
+ cout << rowsA << endl;
+ assert(transA == CblasNoTrans);
+ assert(transB == CblasNoTrans);
+ assert(rowsA == 2000);
+ assert(colsA == 200);
+ assert(lda == K);
+ assert(ldb == N);
+ assert(ldc == N);
+ assert(A->size2 == A->tda);
+ assert(B->size2 == B->tda);
+ assert(C->size2 == C->tda);
+
+ // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+ // m, n, k, alpha, A, k, B, n, beta, C, n);
+ // m = 2000, k = 200, n = 1000;
+ assert(M==2000);
+ assert(K==200);
+ assert(N==1000);
+
+ auto k = K;
+ auto m = M;
+ auto n = N;
+
+ fast_cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha,
+ /* A */ A->data,
+ /* lda */ lda,
+ /* B */ B->data,
+ /* ldb */ ldb,
+ /* beta */ beta,
+ /* C */ C->data, ldc);
+}
+
+/*
+ Use the fasted/supported way to call BLAS dgemm
+*/
+
+void fast_dgemm(const char *TransA, const char *TransB, const double alpha,
+ const gsl_matrix *A, const gsl_matrix *B, const double beta,
+ gsl_matrix *C) {
+ fast_cblas_dgemm(TransA,TransB,alpha,A,B,beta,C);
+}
--
cgit v1.2.3