diff options
| author | Pjotr Prins | 2025-12-02 09:04:28 +0100 |
|---|---|---|
| committer | Pjotr Prins | 2025-12-02 09:04:28 +0100 |
| commit | a1e252bfc7fc874fb4c442288b4b33e232d5002e (patch) | |
| tree | 240d7bd5910ee39f273221265b2064f7ec60fcd5 | |
| parent | db417c280c570dc153ec72b3874b11c23761f969 (diff) | |
| download | pangemma-a1e252bfc7fc874fb4c442288b4b33e232d5002e.tar.gz | |
Started generating output
| -rw-r--r-- | src/gemma.cpp | 3 | ||||
| -rw-r--r-- | src/lmm.cpp | 50 | ||||
| -rw-r--r-- | test/performance/releases.org | 12 |
3 files changed, 43 insertions, 22 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp index 69e338a..c7f7ab6 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -2842,9 +2842,10 @@ void GEMMA::BatchRun(PARAM &cPar) { gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and its transformed write(&Y_col.vector, "Y_col"); - if (cPar.is_mdb) + if (cPar.is_mdb) { cLmm.mdb_calc_gwa(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, cPar.setGWASnps); + } else if (!cPar.file_bfile.empty()) { // PLINK analysis diff --git a/src/lmm.cpp b/src/lmm.cpp index 1319d7c..1e5e229 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -108,11 +108,11 @@ void LMM::WriteFiles() { file_str = path_out + "/" + file_out; file_str += ".assoc.txt"; - checkpoint("lmm-write-files",file_str.c_str()); + checkpoint("lmm-write-files",file_str); ofstream outfile(file_str.c_str(), ofstream::out); if (!outfile) { - cout << "error writing file: " << file_str.c_str() << endl; + cout << "error writing file: " << file_str << endl; return; } @@ -206,21 +206,29 @@ void LMM::WriteFiles() { common_header(); - size_t t = 0; - for (size_t i = 0; i < snpInfo.size(); ++i) { - if (indicator_snp[i] == 0) - continue; - auto snp = snpInfo[i].rs_number; - if (process_gwasnps && setGWASnps.count(snp) == 0) - continue; - // cout << t << endl; - outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" - << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t" - << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" - << fixed << setprecision(3) << snpInfo[i].maf << "\t"; - - sumstats(sumStat[t]); - t++; + if (snpInfo.size()) { + size_t t = 0; + for (size_t i = 0; i < snpInfo.size(); ++i) { + if (indicator_snp[i] == 0) + continue; + auto snp = snpInfo[i].rs_number; + if (process_gwasnps && setGWASnps.count(snp) == 0) + continue; + // cout << t << endl; + outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" + << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t" + << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" + << fixed << setprecision(3) << snpInfo[i].maf << "\t"; + + sumstats(sumStat[t]); + t++; + } + } + else + { + for (auto &s : sumStat) { + sumstats(s); + } } } @@ -2008,7 +2016,7 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, continue; auto snp = get<1>(tup); auto gs = get<2>(tup); - cout << t << " SNP: " << snp << endl; + // cout << t << " SNP: " << snp << endl; // check whether SNP is included in gwasnps (used by LOCO) /* @@ -2061,8 +2069,8 @@ void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp, if (c % msize) batch_compute(c % msize); ProgressBar("Reading SNPS", num_markers - 1, num_markers - 1); - // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl; cout << endl; + cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl; checkpoint_nofile("end-lmm-mdb-analyze"); gsl_vector_safe_free(x); @@ -2125,7 +2133,7 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, if (result) { size_t num_floats = value.size() / sizeof(float); - cout << string(key) << ":" << num_floats << "," << ni_total << " " << endl; + // cout << string(key) << ":" << num_floats << "," << ni_total << " " << endl; assert(num_floats == ni_total); const float* gsbuf = reinterpret_cast<const float*>(value.data()); auto snp = string(key); @@ -2135,7 +2143,7 @@ void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval, for (size_t i = 0; i < size; ++i) { gs.push_back(static_cast<double>(gsbuf[i])); } - cout << "!!!!" << size << snp << ": " << gs[0] << "," << gs[1] << "," << gs[2] << "," << gs[3] << endl; + // cout << "!!!!" << size << snp << ": " << gs[0] << "," << gs[1] << "," << gs[2] << "," << gs[3] << endl; } // return { result,snp,gs }; return make_tuple(result, snp, gs); diff --git a/test/performance/releases.org b/test/performance/releases.org index 082cdde..0b1c638 100644 --- a/test/performance/releases.org +++ b/test/performance/releases.org @@ -5,6 +5,18 @@ Measurements taken on a recent AMD Ryzen 7 3700X 8-Core Processor @2.195GHz. +Introducing mdb genotype format led to a 30% speed increase on the small mouse set: + +#+begin_src sh +real 0m6.403s +user 0m11.529s +sys 0m6.325s +#+end_src sh + +that may not look like much, but we are only starting! + +** Picking up the pieces + We are facing a time regression. premake5 gmake2 && make verbose=1 config=release -j 8 gemma && time LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check |
