/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright © 2011-2017, Xiang Zhou
Copyright © 2017, Peter Carbonetto
Copyright © 2017, Pjotr Prins
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include "gsl/gsl_matrix.h"
#include // std::min
#include
#include
#include
#include
#include "debug.h"
#include "fastblas.h"
#include "mathfunc.h"
#include
#include "eigenlib.h"
using namespace std;
/*
Reasonably fast function to copy data from standard C array into
gsl_matrix. Avoid it for performance critical sections.
*/
gsl_matrix *fast_copy(gsl_matrix *m, const double *mem) {
auto rows = m->size1;
auto cols = m->size2;
if (is_strict_mode()) { // slower correct version
for (size_t r=0; rsize == cols);
assert(v->block->size == cols);
assert(v->stride == 1);
memcpy(v->block->data,&mem[r*cols],cols*sizeof(double));
gsl_matrix_set_row(m,r,v);
}
gsl_vector_free(v);
}
return m;
}
/*
Helper function fast_cblas_dgemm runs the local dgemm
*/
void fast_cblas_dgemm(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB,
const size_t M,
const size_t N,
const size_t K,
const double alpha,
const double *A,
const size_t lda,
const double *B,
const size_t ldb,
const double beta,
double *C,
const size_t ldc) {
#ifndef NDEBUG
if (is_debug_mode()) {
#ifdef DISABLED
size_t i,j;
printf (" Top left corner of matrix A: \n");
for (i=0; i0);
enforce(N>0);
enforce(K>0);
// check_int_mult_overflow(560000,8000); // fails on default int (32-bits)
check_int_mult_overflow(M,K);
check_int_mult_overflow(N,K);
check_int_mult_overflow(M,N);
cblas_dgemm(Order,TransA,TransB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
#ifndef NDEBUG
#ifdef DISABLED
if (is_debug_mode()) {
printf (" Top left corner of matrix A (cols=k %i, rows=m %i): \n",K,M);
for (i=0; isize1;
const size_t N = C->size2;
const size_t MA = (transA == CblasNoTrans) ? A->size1 : A->size2;
const size_t NA = (transA == CblasNoTrans) ? A->size2 : A->size1;
const size_t MBx = (transB == CblasNoTrans) ? B->size1 : B->size2;
const size_t NB = (transB == CblasNoTrans) ? B->size2 : B->size1;
if (M == MA && N == NB && NA == MBx) { /* [MxN] = [MAxNA][MBxNB] */
auto K = NA;
// Check for (integer) overflows
enforce(M>0);
enforce(N>0);
enforce(K>0);
// check_int_mult_overflow(560000,8000);
check_int_mult_overflow(M,K);
check_int_mult_overflow(N,K);
check_int_mult_overflow(M,N);
cblas_dgemm (CblasRowMajor, transA, transB, M, N, NA,
alpha, A->data, A->tda, B->data, B->tda, beta,
C->data, C->tda);
} else {
fail_msg("Range error in dgemm");
}
}
/*
Use the fast/supported way to call BLAS dgemm
*/
void fast_dgemm(const char *TransA, const char *TransB, const double alpha,
const gsl_matrix *A, const gsl_matrix *B, const double beta,
gsl_matrix *C) {
fast_cblas_dgemm(TransA,TransB,alpha,A,B,beta,C);
#ifdef DISABLE
if (is_check_mode()) {
// ---- validate with original implementation
gsl_matrix *C1 = gsl_matrix_alloc(C->size1,C->size2);
eigenlib_dgemm(TransA,TransB,alpha,A,B,beta,C1);
enforce_msg(gsl_matrix_equal(C,C1),"dgemm outcomes are not equal for fast & eigenlib");
gsl_matrix_free(C1);
}
#endif
}
void fast_eigen_dgemm(const char *TransA, const char *TransB, const double alpha,
const gsl_matrix *A, const gsl_matrix *B, const double beta,
gsl_matrix *C) {
if (is_legacy_mode())
eigenlib_dgemm(TransA,TransB,alpha,A,B,beta,C);
else
fast_cblas_dgemm(TransA,TransB,alpha,A,B,beta,C);
}