diff options
author | Pjotr Prins | 2018-09-27 09:35:13 +0000 |
---|---|---|
committer | Pjotr Prins | 2018-09-27 09:35:13 +0000 |
commit | c08e633d2f858b35d2939d92c8a1b82d36168944 (patch) | |
tree | 82f7b8785f7af041c48934e89983ef47a8a3b374 | |
parent | 5d74d45b5ade5339bca6090bead2d6b37a70fb63 (diff) | |
download | pangemma-c08e633d2f858b35d2939d92c8a1b82d36168944.tar.gz |
Inlining GSL calls
-rw-r--r-- | INSTALL.md | 4 | ||||
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | src/fastblas.cpp | 3 | ||||
-rw-r--r-- | src/fastblas.h | 3 | ||||
-rw-r--r-- | src/lapack.cpp | 4 | ||||
-rw-r--r-- | test/performance/releases.org | 61 |
6 files changed, 74 insertions, 3 deletions
@@ -65,6 +65,10 @@ and for the Clang edition make clean make GUIX=$GUIX_ENVIRONMENT CXX=clang++ check -j 4 +To test with another dependency, e.g. GSLv1 + + env GUIX_PACKAGE_PATH=../guix-bioinformatics/ guix environment -C guix --ad-hoc gcc gdb gfortran:lib gsl1 eigen openblas zlib bash ld-wrapper perl + #### GNU Guix reproducible build system One of the challenges of developing software is dealing with @@ -70,7 +70,7 @@ WITH_GSLCBLAS = # Force linking gslcblas (if OpenBlas OPENBLAS_LEGACY = # Using older OpenBlas FORCE_STATIC = # Static linking of libraries # GCC_FLAGS = -Wall -O3 -std=gnu++11 # extra flags -Wl,--allow-multiple-definition -GCC_FLAGS = -pthread -Wall -std=gnu++11 # extra flags -Wl,--allow-multiple-definition +GCC_FLAGS = -DHAVE_INLINE -pthread -Wall -std=gnu++11 # extra flags -Wl,--allow-multiple-definition TRAVIS_CI = # used by TRAVIS for testing GSL_INCLUDE_PATH = diff --git a/src/fastblas.cpp b/src/fastblas.cpp index 362027c..de63e08 100644 --- a/src/fastblas.cpp +++ b/src/fastblas.cpp @@ -30,6 +30,9 @@ #include <string.h> #include "eigenlib.h" +const char *FastblasTrans = "T"; +const char *FastblasNoTrans = "N"; + using namespace std; /* diff --git a/src/fastblas.h b/src/fastblas.h index 6000983..343a73a 100644 --- a/src/fastblas.h +++ b/src/fastblas.h @@ -27,6 +27,9 @@ gsl_matrix *fast_copy(gsl_matrix *m, const double *mem); +extern const char *FastblasTrans; +extern const char *FastblasNoTrans; + void fast_dgemm(const char *TransA, const char *TransB, const double alpha, const gsl_matrix *A, const gsl_matrix *B, const double beta, gsl_matrix *C); diff --git a/src/lapack.cpp b/src/lapack.cpp index bf73338..eb5b16b 100644 --- a/src/lapack.cpp +++ b/src/lapack.cpp @@ -319,8 +319,8 @@ void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) { // introductory textbook on numerical linear algebra for details). void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *ret_inverse) { // debug_msg("entering"); - auto det = LULndet(LU); - enforce_msg(det != 1.0,"LU determinant is zero -> LU is not invertable"); + if (is_check_mode()) + LULndet(LU); enforce_gsl(gsl_linalg_LU_invert(LU, p, ret_inverse)); } diff --git a/test/performance/releases.org b/test/performance/releases.org index f1772dc..4e70c14 100644 --- a/test/performance/releases.org +++ b/test/performance/releases.org @@ -125,6 +125,8 @@ user 0m13.656s sys 0m1.584s #+END_SRC +Multivariate analysis is still slow. + #+BEGIN_SRC bash time ./bin/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check GEMMA 0.98 (2018-09-26) by Xiang Zhou and team (C) 2012-2018 @@ -146,6 +148,65 @@ user 0m15.788s sys 0m3.036s #+END_SRC +Full multivariate analysis is still slow. Mostly because of CalcQi - see above profiling. + +#+BEGIN_SRC bash +time ./bin/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 2 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check +GEMMA 0.98 (2018-09-26) by Xiang Zhou and team (C) 2012-2018 +Reading Files ... +## number of total individuals = 1940 +## number of analyzed individuals = 757 +## number of covariates = 1 +## number of phenotypes = 2 +## number of total SNPs/var = 12226 +## number of analyzed SNPs = 10775 +Start Eigen-Decomposition... +REMLE estimate for Vg in the null model: +1.3210 +1.3210 1.3210 +se(Vg): +0.8217 +0.7152 0.7198 +REMLE estimate for Ve in the null model: +0.3257 +0.3257 0.3257 +se(Ve): +1.9191 +2.6491 1.9101 +REMLE likelihood = 0.0000 +MLE estimate for Vg in the null model: +1.3215 +1.3215 1.3215 +se(Vg): +0.8217 +0.7152 0.7198 +MLE estimate for Ve in the null model: +0.3249 +0.3249 0.3249 +se(Ve): +1.9191 +2.6491 1.9101 +MLE likelihood = 0.0000 +================================================== 100% +**** INFO: Done. + +real 1m38.057s +user 1m39.320s +sys 0m2.116s +#+END_SRC + +using GSL inline functions improved it a bit to + +#+BEGIN_SRC +real 1m29.697s +user 1m30.896s +sys 0m2.148s +#+END_SRC + +otherwise the obvious way to improve things is to rejig these CalcXHiY, CalcQi and +CalcSigma functions. + + * GEMMA 0.98-pre #+BEGIN_SRC bash |