/* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "gsl/gsl_matrix.h" #include #include #include #include #include "debug.h" #include "fastblas.h" #include "mathfunc.h" #ifndef NDEBUG #include "eigenlib.h" #endif using namespace std; /* Reasonably fast function to copy data from standard C array into gsl_matrix. Avoid it for performance critical sections. */ gsl_matrix *fast_copy(gsl_matrix *m, const double *mem) { auto rows = m->size1; auto cols = m->size2; if (is_strict_mode()) { // slower correct version for (auto r=0; rsize == cols); assert(v->block->size == cols); assert(v->stride == 1); memcpy(v->block->data,&mem[r*cols],cols*sizeof(double)); gsl_matrix_set_row(m,r,v); } } return m; } /* Helper function fast_cblas_dgemm runs the local dgemm */ void fast_cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc) { #ifndef NDEBUG size_t i,j; if (is_debug_mode()) { printf (" Top left corner of matrix A: \n"); for (i=0; isize1; auto colsA = A->size2; blasint M = A->size1; blasint K = B->size1; assert(K == colsA); blasint N = B->size2; // cout << M << "," << N "," << K << endl; // Layout = CblasRowMajor: Trans: K , NoTrans M blasint lda = (transA==CblasNoTrans ? K : M ); blasint ldb = (transB==CblasNoTrans ? N : K ); blasint ldc = N; fast_cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, /* A */ A->data, /* lda */ lda, /* B */ B->data, /* ldb */ ldb, /* beta */ beta, /* C */ C->data, ldc); } /* Use the fasted/supported way to call BLAS dgemm */ void fast_dgemm(const char *TransA, const char *TransB, const double alpha, const gsl_matrix *A, const gsl_matrix *B, const double beta, gsl_matrix *C) { fast_cblas_dgemm(TransA,TransB,alpha,A,B,beta,C); #ifndef NDEBUG if (is_strict_mode() && !is_no_check_mode()) { // ---- validate with original implementation gsl_matrix *C1 = gsl_matrix_alloc(C->size1,C->size2); eigenlib_dgemm(TransA,TransB,alpha,A,B,beta,C1); enforce_msg(gsl_matrix_equal(C,C1),"dgemm outcomes are not equal for fast & eigenlib"); } #endif }