diff options
-rw-r--r-- | doc/code/pangemma.md | 13 | ||||
-rw-r--r-- | src/checkpoint.cpp | 4 | ||||
-rw-r--r-- | src/checkpoint.h | 3 | ||||
-rw-r--r-- | src/gemma.cpp | 2 | ||||
-rw-r--r-- | src/gemma_io.cpp | 25 | ||||
-rw-r--r-- | src/version.h | 8 |
6 files changed, 47 insertions, 8 deletions
diff --git a/doc/code/pangemma.md b/doc/code/pangemma.md index 223e2ae..6640a80 100644 --- a/doc/code/pangemma.md +++ b/doc/code/pangemma.md @@ -204,7 +204,7 @@ multiple outputs - in that case we may add filenames. And exits with: **** Checkpoint reached: read-geno-file (normal exit) ``` -# List check points +## List check points When you compile PanGEMMA with debug information @@ -216,8 +216,19 @@ and run a computation with the '-debug' switch it should output the check-points ``` **** DEBUG: checkpoint read-geno-file passed with ./example/mouse_hs1940.geno.txt.gz in src/gemma_io.cpp at line 874 in ReadFile_geno +**** DEBUG: checkpoint bimbam-kinship-matrix passed with kinship.txt in src/gemma_io.cpp at line 1598 in BimbamKin ``` +## Filtering steps + +note that both plink and BIMBAM input files have their own kinship computation with some filtering(!). +Similarly read-geno-file also filters on MAF, for example, and it is well known that the old GEMMA will read the genotype file multiple times for different purposes. With growing geno files this is becoming highly inefficient. + +In my new propagator setup these filtering steps should go in their own functions or propagators. + +To refactor this at read-geno-file we can start to write out the filtered-genotype file at the checkpoint. That will be our base line 'output'. Next we write an alternative path and make sure the outputs are the same! Sounds easy, no? + + # Other ## Example diff --git a/src/checkpoint.cpp b/src/checkpoint.cpp index a37c1be..9bcf53b 100644 --- a/src/checkpoint.cpp +++ b/src/checkpoint.cpp @@ -27,9 +27,9 @@ using namespace std; void checkpoint_run(string msg, string filename, string srcfilename, int line, string funcname) { - is_debug_mode() && std::cerr << "**** DEBUG: checkpoint " << msg << " passed with " << filename << " in " << srcfilename << " at line " << line << " in " << funcname << endl; + is_debug_mode() && std::cerr << "**** DEBUG: checkpoint '" << msg << "' passed with file " << filename << " in " << srcfilename << " at line " << line << " in func " << funcname << endl; if (msg == checkpoint_name) { - cerr << "**** Checkpoint reached: " << msg << " (normal exit)" << endl; + cerr << "**** Checkpoint reached: " << msg << " with file " << filename << " (normal exit)" << endl; exit(0); } } diff --git a/src/checkpoint.h b/src/checkpoint.h index 2792fac..c2457d9 100644 --- a/src/checkpoint.h +++ b/src/checkpoint.h @@ -30,4 +30,7 @@ void checkpoint_run(string msg, string filename, string srcfilename, int line, s #define checkpoint(msg, fname) \ checkpoint_run(msg, fname, __FILE__, __LINE__, __SHOW_FUNC) +#define checkpoint_nofile(msg) \ + checkpoint_run(msg,"none", __FILE__, __LINE__, __SHOW_FUNC) + #endif diff --git a/src/gemma.cpp b/src/gemma.cpp index a50d8ab..6c2b3f4 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -84,7 +84,7 @@ void gemma_gsl_error_handler (const char * reason, void GEMMA::PrintHeader(void) { cout << - "GEMMA " << version << " (" << date << ") by Xiang Zhou, Pjotr Prins and team (C) 2012-" << year << endl; + "GEMMA forked executable --- part of PanGEMMA " << version << " (" << date << ") by Xiang Zhou, Pjotr Prins and team (C) 2012-" << year << endl; return; } diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 698d3e2..f5a79a2 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -441,6 +441,7 @@ bool ReadFile_pheno(const string &file_pheno, infile.close(); infile.clear(); + checkpoint("read-pheno-file",file_pheno); return true; } @@ -507,6 +508,7 @@ bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt, infile.close(); infile.clear(); + checkpoint("read-cvt-file",file_cvt); return true; } @@ -633,6 +635,7 @@ bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno, infile.close(); infile.clear(); + checkpoint("read-fam-file",file_fam); return true; } @@ -1062,6 +1065,7 @@ bool ReadFile_bed(const string &file_bed, const set<string> &setSnps, infile.close(); infile.clear(); + checkpoint("read-bed-file",file_bed); return true; } @@ -1292,6 +1296,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv, infile.close(); infile.clear(); + checkpoint("read-kinship-file",file_kin); return; } @@ -1595,6 +1600,7 @@ bool BimbamKin(const string file_geno, const set<string> ksnps, infile.close(); infile.clear(); + checkpoint_nofile("bimbam-kinship-matrix"); return true; } @@ -1736,6 +1742,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, infile.close(); infile.clear(); + checkpoint_nofile("plink-kinship-matrix"); return true; } @@ -1840,6 +1847,7 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, infile.clear(); infile.close(); + checkpoint("read-genotypes",file_geno); return true; } @@ -2082,6 +2090,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, gsl_vector_free(genotype); infile.clear(); infile.close(); + checkpoint("read-bed-file",file_bed); return true; } @@ -2220,6 +2229,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, gsl_vector_free(genotype); infile.clear(); infile.close(); + checkpoint("read-bed2-file",file_bed); return true; } @@ -2288,6 +2298,7 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column, infile.clear(); infile.close(); + checkpoint("read-est-file",file_est); return true; } @@ -2362,6 +2373,7 @@ bool ReadFile_gene(const string &file_gene, vector<double> &vec_read, infile.close(); infile.clear(); + checkpoint("read-gene-exp-file",file_gene); return true; } @@ -2716,6 +2728,7 @@ bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat, infile.clear(); infile.close(); + checkpoint("read-category-file",file_cat); return true; } @@ -2746,6 +2759,7 @@ bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat, } t++; } + checkpoint("read-mcat-file",file_mcat); return true; } @@ -2943,6 +2957,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps, infile.close(); infile.clear(); + checkpoint_nofile("read-uncenterer-kinship-file"); return true; } @@ -3168,6 +3183,7 @@ bool PlinkKin(const string &file_bed, const int display_pace, infile.close(); infile.clear(); + checkpoint("read-plink-kinship-file",file_bed); return true; } @@ -3549,6 +3565,7 @@ void ReadFile_beta(const string &file_beta, infile.clear(); infile.close(); + checkpoint("read-beta-file",file_beta); return; } @@ -3712,6 +3729,7 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA, infile.clear(); infile.close(); + checkpoint("read-beta2-file",file_beta); return; } @@ -3892,6 +3910,7 @@ void ReadFile_vector(const string &file_vec, gsl_vector *vec) { infile.clear(); infile.close(); + checkpoint("read-vec-file",file_vec); return; } @@ -3919,6 +3938,7 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) { infile.clear(); infile.close(); + checkpoint("read-matrix-file",file_mat); return; } @@ -3957,6 +3977,7 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1, infile.clear(); infile.close(); + checkpoint("read-matrix2-file",file_mat); return; } @@ -3983,6 +4004,7 @@ void ReadFile_study(const string &file_study, gsl_matrix *Vq_mat, ni = gsl_vector_get(s, s_vec->size); gsl_vector_free(s); + checkpoint("read-study-file",file_study); return; } @@ -4007,6 +4029,7 @@ void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat, ni = gsl_vector_get(s, s_vec->size); gsl_vector_free(s); + checkpoint("read-ref-file",file_ref); return; } @@ -4098,6 +4121,7 @@ void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq_mat, gsl_matrix_free(Vq_sub); gsl_vector_free(q_sub); gsl_vector_free(s); + checkpoint("read-mstudy-file",file_mstudy); return; } @@ -4185,6 +4209,7 @@ void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat, gsl_matrix_free(S_sub); gsl_matrix_free(Svar_sub); gsl_vector_free(s); + checkpoint("read-mref-file",file_mref); return; } diff --git a/src/version.h b/src/version.h index 25ccfcd..3382003 100644 --- a/src/version.h +++ b/src/version.h @@ -1,5 +1,5 @@ // version.h generated by GEMMA scripts/gen_version_info.sh -#define GEMMA_VERSION "0.98.6" -#define GEMMA_DATE "2022-08-05" -#define GEMMA_YEAR "2022" -#define GEMMA_PROFILE "/gnu/store/8rvid272yb53bgascf5c468z0jhsyflj-profile" +#define GEMMA_VERSION "0.0.1" +#define GEMMA_DATE "2025-01-04" +#define GEMMA_YEAR "2025" +#define GEMMA_PROFILE "/gnu/store/ln160n2kzn791jwgv36yrxlxygjwl9hh-profile" |