about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2025-11-26 12:01:37 +0100
committerPjotr Prins2025-11-26 12:01:37 +0100
commit806a01c27a1fd366c120820de0c38caea1d4cdcd (patch)
treefae0f2e33453c77f4fdc58c952bda397892c984a
parentc9824193dd3fd6ff398942885f110b27d8b646a0 (diff)
downloadpangemma-806a01c27a1fd366c120820de0c38caea1d4cdcd.tar.gz
openblas speed on par
-rw-r--r--guix.scm6
-rw-r--r--premake5.lua6
-rw-r--r--src/gemma_io.cpp19
-rw-r--r--src/lmm.cpp7
-rw-r--r--test/performance/releases.org21
5 files changed, 53 insertions, 6 deletions
diff --git a/guix.scm b/guix.scm
index 23b7583..b46b832 100644
--- a/guix.scm
+++ b/guix.scm
@@ -80,6 +80,9 @@
       #:make-flags
       #~(list (string-append "PREFIX=" #$output)
               (string-append "CFLAGS=-O3 -g -Wno-incompatible-pointer-types -Wno-error=implicit-function-declaration")
+              "COPT="
+              "COMMON_OPT="
+              "DYNAMIC_ARCH="
               "SHELL=bash"
               "MAKE_NB_JOBS=0"          ;use jobserver for submakes
 
@@ -103,7 +106,8 @@
                          (target-aarch64?))
                      ;; Dynamic older enables a few extra CPU architectures
                      ;; on x86_64 that were released before 2010.
-                     '("DYNAMIC_ARCH=1" "DYNAMIC_OLDER=1" "TARGET=GENERIC"))
+                     '("DYNAMIC_ARCH=1" "TARGET=GENERIC"))
+                     ;; '("DYNAMIC_ARCH=" "TARGET_CORE=ZEN"))
                     ;; On some of these architectures the CPU type can't be detected.
                     ;; We list the oldest CPU core we want to have support for.
                     ;; On MIPS we force the "SICORTEX" TARGET, as for the other
diff --git a/premake5.lua b/premake5.lua
index bb06cbd..1f70694 100644
--- a/premake5.lua
+++ b/premake5.lua
@@ -1,15 +1,15 @@
 -- Build with
 --
 --   make clean && rm build -rf
---   premake5 gmake2 && make verbose=1 gemmalib -j 8
+--   premake5 gmake && make verbose=1 gemmalib -j 8
 --
 -- Including bin
 --
---   premake5 gmake2 && make verbose=1 config=debug -j 8 && LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Debug/gemma
+--   premake5 gmake && make verbose=1 config=debug -j 8 && LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Debug/gemma
 --
 -- Or
 --
---   premake5 gmake2 && make verbose=1 config=release -j 8 gemma && LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma
+--   premake5 gmake && make verbose=1 config=release -j 8 gemma && LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma
 --
 -- Run
 --
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index 7d1f0ca..3bec483 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -153,6 +153,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) {
 // Read SNP file. A single column of SNP names.
 bool ReadFile_snps(const string file_snps, set<string> &setSnps) {
   debug_msg("enter ReadFile_snps");
+  checkpoint("read-file-snps",file_snps);
   setSnps.clear();
 
   igzstream infile(file_snps.c_str(), igzstream::in);
@@ -182,6 +183,7 @@ bool ReadFile_snps(const string file_snps, set<string> &setSnps) {
 bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
   debug_msg("entered");
   setSnps.clear();
+  checkpoint("read-file-header",file_snps);
 
   igzstream infile(file_snps.c_str(), igzstream::in);
   if (!infile) {
@@ -239,6 +241,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
 // Read log file.
 bool ReadFile_log(const string &file_log, double &pheno_mean) {
   debug_msg("ReadFile_log");
+  checkpoint("read-file-log",file_log);
   ifstream infile(file_log.c_str(), ifstream::in);
   if (!infile) {
     cout << "error! fail to open log file: " << file_log << endl;
@@ -282,6 +285,8 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr,
                    map<string, long int> &mapRS2bp,
                    map<string, double> &mapRS2cM) {
   debug_msg("ReadFile_anno");
+  checkpoint("read-file-anno",file_anno);
+
   mapRS2chr.clear();
   mapRS2bp.clear();
 
@@ -345,6 +350,7 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr,
 bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
                      vector<double> &pheno, const int &p_column) {
   debug_msg("entered");
+  checkpoint("read-file-column",file_pheno);
   indicator_idv.clear();
   pheno.clear();
 
@@ -389,6 +395,7 @@ bool ReadFile_pheno(const string &file_pheno,
                     vector<vector<double>> &pheno,
                     const vector<size_t> &p_column) {
   debug_msg("entered");
+  checkpoint("read-file-pheno",file_pheno);
   indicator_pheno.clear();
   pheno.clear();
 
@@ -448,6 +455,7 @@ bool ReadFile_pheno(const string &file_pheno,
 bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt,
                   vector<vector<double>> &cvt, size_t &n_cvt) {
   debug_msg("entered");
+  checkpoint("read-file-cvt",file_cvt);
   indicator_cvt.clear();
 
   ifstream infile(file_cvt.c_str(), ifstream::in);
@@ -515,6 +523,7 @@ bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt,
 
 // Read .bim file.
 bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
+  checkpoint("read-file-bim",file_bim);
   debug_msg("entered");
   snpInfo.clear();
 
@@ -563,6 +572,7 @@ bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
 bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
                   vector<vector<double>> &pheno, map<string, int> &mapID2num,
                   const vector<size_t> &p_column) {
+  checkpoint("read-file-fam",file_fam);
   debug_msg("entered");
   indicator_pheno.clear();
   pheno.clear();
@@ -686,6 +696,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
                    map<string, long int> &mapRS2bp,
                    map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
                    size_t &ns_test) {
+  checkpoint("read-file-geno",file_geno);
   debug_msg("entered");
   indicator_snp.clear();
   snpInfo.clear();
@@ -924,6 +935,7 @@ bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
                   const double &maf_level, const double &miss_level,
                   const double &hwe_level, const double &r2_level,
                   size_t &ns_test) {
+  checkpoint("read-file-bed",file_bed);
   debug_msg("entered");
   indicator_snp.clear();
   size_t ns_total = snpInfo.size();
@@ -1232,6 +1244,7 @@ void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv,
 void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
                   map<string, int> &mapID2num, const size_t k_mode, bool &error,
                   gsl_matrix *G) {
+  checkpoint("read-file-kin",file_kin);
   debug_msg("entered");
   igzstream infile(file_kin.c_str(), igzstream::in);
   if (!infile) {
@@ -1792,6 +1805,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
 bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                    const bool calc_K) {
+  checkpoint("read-file-geno",file_geno);
   debug_msg("entered");
   igzstream infile(file_geno.c_str(), igzstream::in);
   if (!infile) {
@@ -1900,6 +1914,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
                    vector<vector<unsigned char>> &Xt, gsl_matrix *K,
                    const bool calc_K, const size_t ni_test,
                    const size_t ns_test) {
+  checkpoint("read-file-geno",file_geno);
   debug_msg("entered");
   igzstream infile(file_geno.c_str(), igzstream::in);
   if (!infile) {
@@ -2008,6 +2023,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                   const bool calc_K) {
+  checkpoint("read-file-bed",file_bed);
   debug_msg("entered");
   ifstream infile(file_bed.c_str(), ios::binary);
   if (!infile) {
@@ -2141,6 +2157,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
                   gsl_matrix *K, const bool calc_K, const size_t ni_test,
                   const size_t ns_test) {
+  checkpoint("read-file-bed",file_bed);
   debug_msg("entered");
   ifstream infile(file_bed.c_str(), ios::binary);
   if (!infile) {
@@ -2277,6 +2294,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
 
 bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
                   map<string, double> &mapRS2est) {
+  checkpoint("read-file-est",file_est);
   debug_msg("entered");
   mapRS2est.clear();
 
@@ -2361,6 +2379,7 @@ bool CountFileLines(const string &file_input, size_t &n_lines) {
 // Read gene expression file.
 bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
                    vector<SNPINFO> &snpInfo, size_t &ng_total) {
+  checkpoint("read-file-gene",file_gene);
   debug_msg("entered");
   vec_read.clear();
   ng_total = 0;
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 87b9e1e..fb3a255 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -55,6 +55,7 @@
 using namespace std;
 
 void LMM::CopyFromParam(PARAM &cPar) {
+  checkpoint_nofile("lmm-copy-from-param");
   a_mode = cPar.a_mode;
   d_pace = cPar.d_pace;
 
@@ -91,6 +92,7 @@ void LMM::CopyFromParam(PARAM &cPar) {
 }
 
 void LMM::CopyToParam(PARAM &cPar) {
+  checkpoint_nofile("lmm-copy-to-param");
   cPar.time_UtX = time_UtX;
   cPar.time_opt = time_opt;
 
@@ -105,6 +107,7 @@ void LMM::WriteFiles() {
 
   file_str = path_out + "/" + file_out;
   file_str += ".assoc.txt";
+  checkpoint("lmm-write-files",file_str.c_str());
 
   ofstream outfile(file_str.c_str(), ofstream::out);
   if (!outfile) {
@@ -1668,6 +1671,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
   bool process_gwasnps = gwasnps.size();
   if (process_gwasnps)
     debug_msg("Analyze subset of SNPs (LOCO)");
+  checkpoint_nofile("start-lmm-analyze");
 
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
@@ -1704,6 +1708,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
   auto batch_compute = [&](size_t l) { // using a C++ closure
     // Compute SNPs in batch, note the computations are independent per SNP
     // debug_msg("enter batch_compute");
+    checkpoint_nofile("\nstart-batch_compute");
     gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l);
     gsl_matrix_view UtXlarge_sub =
         gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l);
@@ -1763,6 +1768,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
       sumStat.push_back(SNPs);
     }
     // debug_msg("exit batch_compute");
+    checkpoint_nofile("end-batch_compute");
   };
 
   const auto num_snps = indicator_snp.size();
@@ -1847,6 +1853,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
   ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1);
   // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl;
   cout << endl;
+  checkpoint_nofile("end-lmm-analyze");
 
   gsl_vector_safe_free(x);
   gsl_vector_safe_free(x_miss);
diff --git a/test/performance/releases.org b/test/performance/releases.org
index 4cc92f1..792cb2e 100644
--- a/test/performance/releases.org
+++ b/test/performance/releases.org
@@ -1,6 +1,7 @@
 * GEMMA performance stats
 
-** GEMMA 0.98.5
+** GEMMA 1.00-pre1
+
 
 Measurements taken on a recent AMD Ryzen 7 3700X 8-Core Processor @2.195GHz.
 
@@ -8,7 +9,7 @@ We are facing a time regression.
 
 premake5 gmake2 && make verbose=1 config=release -j 8 gemma && time LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check -debug
 
-With openblas 0.3.21 we go a bit faster. Still behind though, there is room for tweaking. But I want to run some bigger files first.
+With openblas 0.3.21 we go a bit faster. Still 10% behind though, there is room for tweaking. But I want to run some bigger files first.
 
 #+begin_src sh
 Pangemma --- GEMMA 0.98.5 compatible executable 1.0.0 (2025-11-22) with guile 3.0.9 by Xiang Zhou, Pjotr Prins and team (C) 2012-2025
@@ -28,6 +29,7 @@ user    0m13.168s
 sys     0m5.919s
 #+end_src sh
 
+Before it was
 
 #+begin_src sh
 Pangemma --- GEMMA 0.98.5 compatible executable 1.0.0 (2025-11-22) with guile 3.0.9 by Xiang Zhou, Pjotr Prins and team (C) 2012-2025
@@ -67,6 +69,21 @@ this led me to try the newer openblas on the older gemma - and indeed, the regre
 
 Well, at least I found the problem. Time for a special openblas build like I used to do.
 
+
+*** Bigger run
+
+We translate this 10Gb (gzip compressed) job from our pangenome precompute
+
+```
+/bin/gemma -loco 3 -k /export2/data/wrk/services/gemma-wrapper/tmp/tmp/panlmm/93f6b39ec06c09fb9ba9ca628b5fb990921b6c60.3.cXX.txt.cXX.txt -o a3248cec40b3fe6b9e8672352b3ab2d7280c426c.3.assoc.txt -p pheno.json.txt -g pangenome-13M-genotypes.txt -a snps-matched.txt -lmm 9 -maf 0.1 -n 2 -outdir /export2/data/wrk/services/gemma-wrapper/tmp/tmp/panlmm/d20251126-4190721-c8bbo8
+```
+
+to
+
+```
+time LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma -g tmp/pangenome-13M-genotypes.txt -p tmp/pheno.json.txt -n 1 -a tmp/snps-matched.txt -k tmp/93f6b39ec06c09fb9ba9ca628b5fb990921b6c60.3.cXX.txt.cXX.txt -lmm 9 -no-check
+```
+
 ** GEMMA 0.98.5-pre1
 
 Measurements taken on a recent AMD Ryzen 7 3700X 8-Core Processor @2.195GHz.