diff options
| author | Pjotr Prins | 2025-11-26 12:01:37 +0100 |
|---|---|---|
| committer | Pjotr Prins | 2025-11-26 12:01:37 +0100 |
| commit | 806a01c27a1fd366c120820de0c38caea1d4cdcd (patch) | |
| tree | fae0f2e33453c77f4fdc58c952bda397892c984a | |
| parent | c9824193dd3fd6ff398942885f110b27d8b646a0 (diff) | |
| download | pangemma-806a01c27a1fd366c120820de0c38caea1d4cdcd.tar.gz | |
openblas speed on par
| -rw-r--r-- | guix.scm | 6 | ||||
| -rw-r--r-- | premake5.lua | 6 | ||||
| -rw-r--r-- | src/gemma_io.cpp | 19 | ||||
| -rw-r--r-- | src/lmm.cpp | 7 | ||||
| -rw-r--r-- | test/performance/releases.org | 21 |
5 files changed, 53 insertions, 6 deletions
diff --git a/guix.scm b/guix.scm index 23b7583..b46b832 100644 --- a/guix.scm +++ b/guix.scm @@ -80,6 +80,9 @@ #:make-flags #~(list (string-append "PREFIX=" #$output) (string-append "CFLAGS=-O3 -g -Wno-incompatible-pointer-types -Wno-error=implicit-function-declaration") + "COPT=" + "COMMON_OPT=" + "DYNAMIC_ARCH=" "SHELL=bash" "MAKE_NB_JOBS=0" ;use jobserver for submakes @@ -103,7 +106,8 @@ (target-aarch64?)) ;; Dynamic older enables a few extra CPU architectures ;; on x86_64 that were released before 2010. - '("DYNAMIC_ARCH=1" "DYNAMIC_OLDER=1" "TARGET=GENERIC")) + '("DYNAMIC_ARCH=1" "TARGET=GENERIC")) + ;; '("DYNAMIC_ARCH=" "TARGET_CORE=ZEN")) ;; On some of these architectures the CPU type can't be detected. ;; We list the oldest CPU core we want to have support for. ;; On MIPS we force the "SICORTEX" TARGET, as for the other diff --git a/premake5.lua b/premake5.lua index bb06cbd..1f70694 100644 --- a/premake5.lua +++ b/premake5.lua @@ -1,15 +1,15 @@ -- Build with -- -- make clean && rm build -rf --- premake5 gmake2 && make verbose=1 gemmalib -j 8 +-- premake5 gmake && make verbose=1 gemmalib -j 8 -- -- Including bin -- --- premake5 gmake2 && make verbose=1 config=debug -j 8 && LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Debug/gemma +-- premake5 gmake && make verbose=1 config=debug -j 8 && LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Debug/gemma -- -- Or -- --- premake5 gmake2 && make verbose=1 config=release -j 8 gemma && LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma +-- premake5 gmake && make verbose=1 config=release -j 8 gemma && LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma -- -- Run -- diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 7d1f0ca..3bec483 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -153,6 +153,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) { // Read SNP file. A single column of SNP names. bool ReadFile_snps(const string file_snps, set<string> &setSnps) { debug_msg("enter ReadFile_snps"); + checkpoint("read-file-snps",file_snps); setSnps.clear(); igzstream infile(file_snps.c_str(), igzstream::in); @@ -182,6 +183,7 @@ bool ReadFile_snps(const string file_snps, set<string> &setSnps) { bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) { debug_msg("entered"); setSnps.clear(); + checkpoint("read-file-header",file_snps); igzstream infile(file_snps.c_str(), igzstream::in); if (!infile) { @@ -239,6 +241,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) { // Read log file. bool ReadFile_log(const string &file_log, double &pheno_mean) { debug_msg("ReadFile_log"); + checkpoint("read-file-log",file_log); ifstream infile(file_log.c_str(), ifstream::in); if (!infile) { cout << "error! fail to open log file: " << file_log << endl; @@ -282,6 +285,8 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM) { debug_msg("ReadFile_anno"); + checkpoint("read-file-anno",file_anno); + mapRS2chr.clear(); mapRS2bp.clear(); @@ -345,6 +350,7 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr, bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv, vector<double> &pheno, const int &p_column) { debug_msg("entered"); + checkpoint("read-file-column",file_pheno); indicator_idv.clear(); pheno.clear(); @@ -389,6 +395,7 @@ bool ReadFile_pheno(const string &file_pheno, vector<vector<double>> &pheno, const vector<size_t> &p_column) { debug_msg("entered"); + checkpoint("read-file-pheno",file_pheno); indicator_pheno.clear(); pheno.clear(); @@ -448,6 +455,7 @@ bool ReadFile_pheno(const string &file_pheno, bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt, vector<vector<double>> &cvt, size_t &n_cvt) { debug_msg("entered"); + checkpoint("read-file-cvt",file_cvt); indicator_cvt.clear(); ifstream infile(file_cvt.c_str(), ifstream::in); @@ -515,6 +523,7 @@ bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt, // Read .bim file. bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) { + checkpoint("read-file-bim",file_bim); debug_msg("entered"); snpInfo.clear(); @@ -563,6 +572,7 @@ bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) { bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno, vector<vector<double>> &pheno, map<string, int> &mapID2num, const vector<size_t> &p_column) { + checkpoint("read-file-fam",file_fam); debug_msg("entered"); indicator_pheno.clear(); pheno.clear(); @@ -686,6 +696,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, size_t &ns_test) { + checkpoint("read-file-geno",file_geno); debug_msg("entered"); indicator_snp.clear(); snpInfo.clear(); @@ -924,6 +935,7 @@ bool ReadFile_bed(const string &file_bed, const set<string> &setSnps, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test) { + checkpoint("read-file-bed",file_bed); debug_msg("entered"); indicator_snp.clear(); size_t ns_total = snpInfo.size(); @@ -1232,6 +1244,7 @@ void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv, void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G) { + checkpoint("read-file-kin",file_kin); debug_msg("entered"); igzstream infile(file_kin.c_str(), igzstream::in); if (!infile) { @@ -1792,6 +1805,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { + checkpoint("read-file-geno",file_geno); debug_msg("entered"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { @@ -1900,6 +1914,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, vector<vector<unsigned char>> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test) { + checkpoint("read-file-geno",file_geno); debug_msg("entered"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { @@ -2008,6 +2023,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { + checkpoint("read-file-bed",file_bed); debug_msg("entered"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { @@ -2141,6 +2157,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test) { + checkpoint("read-file-bed",file_bed); debug_msg("entered"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { @@ -2277,6 +2294,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, bool ReadFile_est(const string &file_est, const vector<size_t> &est_column, map<string, double> &mapRS2est) { + checkpoint("read-file-est",file_est); debug_msg("entered"); mapRS2est.clear(); @@ -2361,6 +2379,7 @@ bool CountFileLines(const string &file_input, size_t &n_lines) { // Read gene expression file. bool ReadFile_gene(const string &file_gene, vector<double> &vec_read, vector<SNPINFO> &snpInfo, size_t &ng_total) { + checkpoint("read-file-gene",file_gene); debug_msg("entered"); vec_read.clear(); ng_total = 0; diff --git a/src/lmm.cpp b/src/lmm.cpp index 87b9e1e..fb3a255 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -55,6 +55,7 @@ using namespace std; void LMM::CopyFromParam(PARAM &cPar) { + checkpoint_nofile("lmm-copy-from-param"); a_mode = cPar.a_mode; d_pace = cPar.d_pace; @@ -91,6 +92,7 @@ void LMM::CopyFromParam(PARAM &cPar) { } void LMM::CopyToParam(PARAM &cPar) { + checkpoint_nofile("lmm-copy-to-param"); cPar.time_UtX = time_UtX; cPar.time_opt = time_opt; @@ -105,6 +107,7 @@ void LMM::WriteFiles() { file_str = path_out + "/" + file_out; file_str += ".assoc.txt"; + checkpoint("lmm-write-files",file_str.c_str()); ofstream outfile(file_str.c_str(), ofstream::out); if (!outfile) { @@ -1668,6 +1671,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, bool process_gwasnps = gwasnps.size(); if (process_gwasnps) debug_msg("Analyze subset of SNPs (LOCO)"); + checkpoint_nofile("start-lmm-analyze"); // Calculate basic quantities. size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; @@ -1704,6 +1708,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, auto batch_compute = [&](size_t l) { // using a C++ closure // Compute SNPs in batch, note the computations are independent per SNP // debug_msg("enter batch_compute"); + checkpoint_nofile("\nstart-batch_compute"); gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l); gsl_matrix_view UtXlarge_sub = gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l); @@ -1763,6 +1768,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, sumStat.push_back(SNPs); } // debug_msg("exit batch_compute"); + checkpoint_nofile("end-batch_compute"); }; const auto num_snps = indicator_snp.size(); @@ -1847,6 +1853,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1); // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl; cout << endl; + checkpoint_nofile("end-lmm-analyze"); gsl_vector_safe_free(x); gsl_vector_safe_free(x_miss); diff --git a/test/performance/releases.org b/test/performance/releases.org index 4cc92f1..792cb2e 100644 --- a/test/performance/releases.org +++ b/test/performance/releases.org @@ -1,6 +1,7 @@ * GEMMA performance stats -** GEMMA 0.98.5 +** GEMMA 1.00-pre1 + Measurements taken on a recent AMD Ryzen 7 3700X 8-Core Processor @2.195GHz. @@ -8,7 +9,7 @@ We are facing a time regression. premake5 gmake2 && make verbose=1 config=release -j 8 gemma && time LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check -debug -With openblas 0.3.21 we go a bit faster. Still behind though, there is room for tweaking. But I want to run some bigger files first. +With openblas 0.3.21 we go a bit faster. Still 10% behind though, there is room for tweaking. But I want to run some bigger files first. #+begin_src sh Pangemma --- GEMMA 0.98.5 compatible executable 1.0.0 (2025-11-22) with guile 3.0.9 by Xiang Zhou, Pjotr Prins and team (C) 2012-2025 @@ -28,6 +29,7 @@ user 0m13.168s sys 0m5.919s #+end_src sh +Before it was #+begin_src sh Pangemma --- GEMMA 0.98.5 compatible executable 1.0.0 (2025-11-22) with guile 3.0.9 by Xiang Zhou, Pjotr Prins and team (C) 2012-2025 @@ -67,6 +69,21 @@ this led me to try the newer openblas on the older gemma - and indeed, the regre Well, at least I found the problem. Time for a special openblas build like I used to do. + +*** Bigger run + +We translate this 10Gb (gzip compressed) job from our pangenome precompute + +``` +/bin/gemma -loco 3 -k /export2/data/wrk/services/gemma-wrapper/tmp/tmp/panlmm/93f6b39ec06c09fb9ba9ca628b5fb990921b6c60.3.cXX.txt.cXX.txt -o a3248cec40b3fe6b9e8672352b3ab2d7280c426c.3.assoc.txt -p pheno.json.txt -g pangenome-13M-genotypes.txt -a snps-matched.txt -lmm 9 -maf 0.1 -n 2 -outdir /export2/data/wrk/services/gemma-wrapper/tmp/tmp/panlmm/d20251126-4190721-c8bbo8 +``` + +to + +``` +time LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma -g tmp/pangenome-13M-genotypes.txt -p tmp/pheno.json.txt -n 1 -a tmp/snps-matched.txt -k tmp/93f6b39ec06c09fb9ba9ca628b5fb990921b6c60.3.cXX.txt.cXX.txt -lmm 9 -no-check +``` + ** GEMMA 0.98.5-pre1 Measurements taken on a recent AMD Ryzen 7 3700X 8-Core Processor @2.195GHz. |
