From e90c2d25cdb77d41d6587188db9e61cb988f2c78 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 5 Oct 2017 07:48:10 +0000 Subject: Added a few checks and info --- src/gemma.cpp | 4 ++-- src/io.cpp | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/gemma.cpp b/src/gemma.cpp index e3fb005..2af8f8e 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -299,7 +299,7 @@ void GEMMA::PrintHelp(size_t option) { cout << " ..." << endl; cout << " missing value: NA" << endl; cout << " -p [filename] " - << " specify input BIMBAM phenotype file name" << endl; + << " specify input BIMBAM-style phenotype file name (when used with PLINK .fam phenotypes are ignored)" << endl; cout << " format: phenotype for individual 1" << endl; cout << " phenotype for individual 2" << endl; cout << " ..." << endl; @@ -1044,7 +1044,7 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { str.clear(); str.assign(argv[i]); cPar.k_mode = atoi(str.c_str()); - } else if (strcmp(argv[i], "-n") == 0) { // set pheno column (range) + } else if (strcmp(argv[i], "-n") == 0) { // set pheno column (list/range) (cPar.p_column).clear(); while (argv[i + 1] != NULL && argv[i + 1][0] != '-') { ++i; diff --git a/src/io.cpp b/src/io.cpp index bfbfc15..be40437 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -187,6 +187,7 @@ bool ReadFile_snps_header(const string &file_snps, set &setSnps) { continue; } ch_ptr = strtok((char *)line.c_str(), " , \t"); + enforce_msg(ch_ptr,"Problem reading SNP header"); for (size_t i = 0; i < header.coln; i++) { enforce_msg(ch_ptr,"Problem reading SNP file"); @@ -337,11 +338,11 @@ bool ReadFile_column(const string &file_pheno, vector &indicator_idv, for (int i = 0; i < (p_column - 1); ++i) { ch_ptr = strtok(NULL, " , \t"); } + enforce_msg(ch_ptr,"Problem reading PHENO column"); if (strcmp(ch_ptr, "NA") == 0) { indicator_idv.push_back(0); pheno.push_back(-9); } else { - // Pheno is different from pimass2. p = atof(ch_ptr); indicator_idv.push_back(1); @@ -390,7 +391,7 @@ bool ReadFile_pheno(const string &file_pheno, ch_ptr = strtok((char *)line.c_str(), " , \t"); size_t i = 0; while (i < p_max) { - enforce_msg(ch_ptr,"Wrong number of phenotypes"); + enforce_msg(ch_ptr,"Number of phenotypes out of range"); if (mapP2c.count(i + 1) != 0) { if (strcmp(ch_ptr, "NA") == 0) { ind_pheno_row[mapP2c[i + 1]] = 0; @@ -524,7 +525,7 @@ bool ReadFile_bim(const string &file_bim, vector &snpInfo) { return true; } -// Read .fam file. +// Read .fam file (ignored with -p phenotypes switch) bool ReadFile_fam(const string &file_fam, vector> &indicator_pheno, vector> &pheno, map &mapID2num, const vector &p_column) { @@ -568,6 +569,8 @@ bool ReadFile_fam(const string &file_fam, vector> &indicator_pheno, size_t i = 0; while (i < p_max) { if (mapP2c.count(i + 1) != 0) { + enforce_msg(ch_ptr,"Problem reading FAM file (phenotypes out of range)"); + if (strcmp(ch_ptr, "NA") == 0) { ind_pheno_row[mapP2c[i + 1]] = 0; pheno_row[mapP2c[i + 1]] = -9; -- cgit v1.2.3 From 530488454ae6a35b098b81b45581f88f02d6de0e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 5 Oct 2017 09:18:59 +0000 Subject: Adding debug statements on entering functions and added test for issue 58 --- src/io.cpp | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- src/lm.cpp | 4 ++++ src/lmm.cpp | 5 +++++ src/mvlmm.cpp | 5 +++++ src/prdt.cpp | 2 ++ src/varcov.cpp | 2 ++ src/vc.cpp | 10 ++++++++++ test/dev_test_suite.sh | 7 ++++--- test/test_suite.sh | 35 +++++++++++++++++++++++++++++++---- 9 files changed, 112 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/io.cpp b/src/io.cpp index be40437..6be01fd 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -135,6 +135,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) { // Read SNP file. A single column of SNP names. bool ReadFile_snps(const string file_snps, set &setSnps) { + debug_msg("entered"); setSnps.clear(); igzstream infile(file_snps.c_str(), igzstream::in); @@ -162,6 +163,7 @@ bool ReadFile_snps(const string file_snps, set &setSnps) { // values for each row are parsed. A valid header can be, for example, // RS POS CHR bool ReadFile_snps_header(const string &file_snps, set &setSnps) { + debug_msg("entered"); setSnps.clear(); igzstream infile(file_snps.c_str(), igzstream::in); @@ -219,6 +221,7 @@ bool ReadFile_snps_header(const string &file_snps, set &setSnps) { // Read log file. bool ReadFile_log(const string &file_log, double &pheno_mean) { + debug_msg("ReadFile_log"); ifstream infile(file_log.c_str(), ifstream::in); if (!infile) { cout << "error! fail to open log file: " << file_log << endl; @@ -260,6 +263,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) { bool ReadFile_anno(const string &file_anno, map &mapRS2chr, map &mapRS2bp, map &mapRS2cM) { + debug_msg("ReadFile_anno"); mapRS2chr.clear(); mapRS2bp.clear(); @@ -319,6 +323,7 @@ bool ReadFile_anno(const string &file_anno, map &mapRS2chr, // Read 1 column of phenotype. bool ReadFile_column(const string &file_pheno, vector &indicator_idv, vector &pheno, const int &p_column) { + debug_msg("entered"); indicator_idv.clear(); pheno.clear(); @@ -361,6 +366,7 @@ bool ReadFile_pheno(const string &file_pheno, vector> &indicator_pheno, vector> &pheno, const vector &p_column) { + debug_msg("entered"); indicator_pheno.clear(); pheno.clear(); @@ -418,6 +424,7 @@ bool ReadFile_pheno(const string &file_pheno, bool ReadFile_cvt(const string &file_cvt, vector &indicator_cvt, vector> &cvt, size_t &n_cvt) { + debug_msg("entered"); indicator_cvt.clear(); ifstream infile(file_cvt.c_str(), ifstream::in); @@ -484,6 +491,7 @@ bool ReadFile_cvt(const string &file_cvt, vector &indicator_cvt, // Read .bim file. bool ReadFile_bim(const string &file_bim, vector &snpInfo) { + debug_msg("entered"); snpInfo.clear(); ifstream infile(file_bim.c_str(), ifstream::in); @@ -529,6 +537,7 @@ bool ReadFile_bim(const string &file_bim, vector &snpInfo) { bool ReadFile_fam(const string &file_fam, vector> &indicator_pheno, vector> &pheno, map &mapID2num, const vector &p_column) { + debug_msg("entered"); indicator_pheno.clear(); pheno.clear(); mapID2num.clear(); @@ -612,6 +621,7 @@ bool ReadFile_geno(const string &file_geno, const set &setSnps, map &mapRS2bp, map &mapRS2cM, vector &snpInfo, size_t &ns_test, bool debug) { + debug_msg("entered"); indicator_snp.clear(); snpInfo.clear(); @@ -822,6 +832,7 @@ bool ReadFile_bed(const string &file_bed, const set &setSnps, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test) { + debug_msg("entered"); indicator_snp.clear(); size_t ns_total = snpInfo.size(); @@ -1010,6 +1021,7 @@ bool ReadFile_bed(const string &file_bed, const set &setSnps, // Missing values are replaced by mean. bool Bimbam_ReadOneSNP(const size_t inc, const vector &indicator_idv, igzstream &infile, gsl_vector *geno, double &geno_mean) { + debug_msg("entered"); size_t ni_total = indicator_idv.size(); string line; @@ -1060,6 +1072,7 @@ bool Bimbam_ReadOneSNP(const size_t inc, const vector &indicator_idv, // For PLINK, store SNPs as double too. void Plink_ReadOneSNP(const int pos, const vector &indicator_idv, ifstream &infile, gsl_vector *geno, double &geno_mean) { + debug_msg("entered"); size_t ni_total = indicator_idv.size(), n_bit; if (ni_total % 4 == 0) { n_bit = ni_total / 4; @@ -1126,6 +1139,7 @@ void Plink_ReadOneSNP(const int pos, const vector &indicator_idv, void ReadFile_kin(const string &file_kin, vector &indicator_idv, map &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G) { + debug_msg("entered"); igzstream infile(file_kin.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open kinship file: " << file_kin << endl; @@ -1242,6 +1256,7 @@ void ReadFile_kin(const string &file_kin, vector &indicator_idv, void ReadFile_mk(const string &file_mk, vector &indicator_idv, map &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G) { + debug_msg("entered"); igzstream infile(file_mk.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open file: " << file_mk << endl; @@ -1267,6 +1282,7 @@ void ReadFile_mk(const string &file_mk, vector &indicator_idv, } void ReadFile_eigenU(const string &file_ku, bool &error, gsl_matrix *U) { + debug_msg("entered"); igzstream infile(file_ku.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open the U file: " << file_ku << endl; @@ -1315,6 +1331,7 @@ void ReadFile_eigenU(const string &file_ku, bool &error, gsl_matrix *U) { } void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) { + debug_msg("entered"); igzstream infile(file_kd.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open the D file: " << file_kd << endl; @@ -1363,6 +1380,7 @@ bool BimbamKin(const string file_geno, const set ksnps, vector &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin, const bool test_nind) { + debug_msg("entered"); igzstream infile(file_geno.c_str(), igzstream::in); enforce_msg(infile, "error reading genotype file"); @@ -1499,6 +1517,7 @@ bool BimbamKin(const string file_geno, const set ksnps, bool PlinkKin(const string &file_bed, vector &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin) { + debug_msg("entered"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { cout << "error reading bed file:" << file_bed << endl; @@ -1641,6 +1660,7 @@ bool PlinkKin(const string &file_bed, vector &indicator_snp, bool ReadFile_geno(const string file_geno, vector &indicator_idv, vector &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K, bool debug) { + debug_msg("entered"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -1745,6 +1765,7 @@ bool ReadFile_geno(const string &file_geno, vector &indicator_idv, vector> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test, bool debug) { + debug_msg("entered"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -1851,6 +1872,7 @@ bool ReadFile_geno(const string &file_geno, vector &indicator_idv, bool ReadFile_bed(const string &file_bed, vector &indicator_idv, vector &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { + debug_msg("entered"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { cout << "error reading bed file:" << file_bed << endl; @@ -1982,6 +2004,7 @@ bool ReadFile_bed(const string &file_bed, vector &indicator_idv, vector &indicator_snp, vector> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test) { + debug_msg("entered"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { cout << "error reading bed file:" << file_bed << endl; @@ -2116,6 +2139,7 @@ bool ReadFile_bed(const string &file_bed, vector &indicator_idv, bool ReadFile_est(const string &file_est, const vector &est_column, map &mapRS2est) { + debug_msg("entered"); mapRS2est.clear(); ifstream infile(file_est.c_str(), ifstream::in); @@ -2176,6 +2200,7 @@ bool ReadFile_est(const string &file_est, const vector &est_column, } bool CountFileLines(const string &file_input, size_t &n_lines) { + debug_msg("entered"); igzstream infile(file_input.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open file: " << file_input << endl; @@ -2192,6 +2217,7 @@ bool CountFileLines(const string &file_input, size_t &n_lines) { // Read gene expression file. bool ReadFile_gene(const string &file_gene, vector &vec_read, vector &snpInfo, size_t &ng_total) { + debug_msg("entered"); vec_read.clear(); ng_total = 0; @@ -2255,6 +2281,7 @@ bool ReadFile_sample(const string &file_sample, vector> &pheno, const vector &p_column, vector &indicator_cvt, vector> &cvt, size_t &n_cvt) { + debug_msg("entered"); indicator_pheno.clear(); pheno.clear(); indicator_cvt.clear(); @@ -2509,6 +2536,7 @@ bool ReadFile_bgen(const string &file_bgen, const set &setSnps, const double &hwe_level, const double &r2_level, size_t &ns_test) { + debug_msg("entered"); indicator_snp.clear(); ifstream infile(file_bgen.c_str(), ios::binary); @@ -2796,6 +2824,7 @@ bool ReadFile_bgen(const string &file_bgen, const set &setSnps, // Read oxford genotype file and calculate kinship matrix. bool bgenKin(const string &file_oxford, vector &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin) { + debug_msg("entered"); string file_bgen = file_oxford; ifstream infile(file_bgen.c_str(), ios::binary); if (!infile) { @@ -2854,7 +2883,7 @@ bool bgenKin(const string &file_oxford, vector &indicator_snp, for (size_t t = 0; t < indicator_snp.size(); ++t) { if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { - ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + ProgressBar("Reading bgen SNPs ", t, indicator_snp.size() - 1); } id.clear(); @@ -3000,6 +3029,7 @@ bool bgenKin(const string &file_oxford, vector &indicator_snp, // Read header to determine which column contains which item. bool ReadHeader_io(const string &line, HEADER &header) { + debug_msg("entered"); string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps", "SNPS", "snpid", "SNPID", "rsid", "RSID", "MarkerName"}; set rs_set(rs_ptr, rs_ptr + 11); // create a set of 11 items @@ -3267,6 +3297,7 @@ bool ReadHeader_io(const string &line, HEADER &header) { // it is not included in the analysis. bool ReadFile_cat(const string &file_cat, map &mapRS2cat, size_t &n_vc) { + debug_msg("entered"); mapRS2cat.clear(); igzstream infile(file_cat.c_str(), igzstream::in); @@ -3351,6 +3382,7 @@ bool ReadFile_cat(const string &file_cat, map &mapRS2cat, bool ReadFile_mcat(const string &file_mcat, map &mapRS2cat, size_t &n_vc) { + debug_msg("entered"); mapRS2cat.clear(); igzstream infile(file_mcat.c_str(), igzstream::in); @@ -3389,6 +3421,8 @@ bool BimbamKinUncentered(const string &file_geno, const set ksnps, const map &mapRS2cat, const vector &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns) { + debug_msg("entered"); + debug_msg("BimbamKinUncentered"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -3579,6 +3613,7 @@ bool PlinkKin(const string &file_bed, const int display_pace, const map &mapRS2cat, const vector &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns) { + debug_msg("entered"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { cout << "error reading bed file:" << file_bed << endl; @@ -3804,6 +3839,7 @@ bool MFILEKin(const size_t mfile_mode, const string &file_mfile, const map &mapRS2cat, const vector> &msnpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns) { + debug_msg("entered"); size_t n_vc = vector_ns->size, ni_test = matrix_kin->size1; gsl_matrix_set_zero(matrix_kin); gsl_vector_set_zero(vector_ns); @@ -3879,6 +3915,7 @@ bool MFILEKin(const size_t mfile_mode, const string &file_mfile, // Read var file, store mapRS2wsnp. bool ReadFile_wsnp(const string &file_wsnp, map &mapRS2weight) { + debug_msg("entered"); mapRS2weight.clear(); igzstream infile(file_wsnp.c_str(), igzstream::in); @@ -3904,6 +3941,7 @@ bool ReadFile_wsnp(const string &file_wsnp, map &mapRS2weight) { bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc, map> &mapRS2wvector) { + debug_msg("entered"); mapRS2wvector.clear(); igzstream infile(file_wcat.c_str(), igzstream::in); @@ -3988,6 +4026,7 @@ void ReadFile_beta(const string &file_beta, vector &vec_ni, vector &vec_weight, vector &vec_z2, size_t &ni_total, size_t &ns_total, size_t &ns_test) { + debug_msg("entered"); vec_cat.clear(); vec_ni.clear(); vec_weight.clear(); @@ -4174,6 +4213,7 @@ void ReadFile_beta(const string &file_beta, void ReadFile_beta(const string &file_beta, const map &mapRS2wA, map &mapRS2A1, map &mapRS2z) { + debug_msg("entered"); mapRS2A1.clear(); mapRS2z.clear(); @@ -4335,6 +4375,7 @@ void Calcq(const size_t n_block, const vector &vec_cat, const vector &vec_ni, const vector &vec_weight, const vector &vec_z2, gsl_matrix *Vq, gsl_vector *q, gsl_vector *s) { + debug_msg("entered"); gsl_matrix_set_zero(Vq); gsl_vector_set_zero(q); gsl_vector_set_zero(s); @@ -4488,6 +4529,7 @@ void Calcq(const size_t n_block, const vector &vec_cat, // Read vector file. void ReadFile_vector(const string &file_vec, gsl_vector *vec) { + debug_msg("entered"); igzstream infile(file_vec.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open vector file: " << file_vec << endl; @@ -4510,6 +4552,7 @@ void ReadFile_vector(const string &file_vec, gsl_vector *vec) { } void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) { + debug_msg("entered"); igzstream infile(file_mat.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open matrix file: " << file_mat << endl; @@ -4536,6 +4579,7 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) { void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1, gsl_matrix *mat2) { + debug_msg("entered"); igzstream infile(file_mat.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open matrix file: " << file_mat << endl; @@ -4572,6 +4616,7 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1, // Read study file. void ReadFile_study(const string &file_study, gsl_matrix *Vq_mat, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) { + debug_msg("entered"); string Vqfile = file_study + ".Vq.txt"; string sfile = file_study + ".size.txt"; string qfile = file_study + ".q.txt"; @@ -4597,6 +4642,7 @@ void ReadFile_study(const string &file_study, gsl_matrix *Vq_mat, // Read reference file. void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) { + debug_msg("entered"); string sfile = file_ref + ".size.txt"; string Sfile = file_ref + ".S.txt"; @@ -4620,6 +4666,7 @@ void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat, // Read mstudy file. void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq_mat, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) { + debug_msg("entered"); gsl_matrix_set_zero(Vq_mat); gsl_vector_set_zero(q_vec); gsl_vector_set_zero(s_vec); @@ -4710,6 +4757,7 @@ void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq_mat, // Read reference file. void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) { + debug_msg("entered"); gsl_matrix_set_zero(S_mat); gsl_matrix_set_zero(Svar_mat); gsl_vector_set_zero(s_vec); diff --git a/src/lm.cpp b/src/lm.cpp index f8fc43d..83558e4 100644 --- a/src/lm.cpp +++ b/src/lm.cpp @@ -290,6 +290,7 @@ void LmCalcP(const size_t test_mode, const double yPwy, const double xPwy, } void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) { + debug_msg("entering"); ifstream infile(file_gene.c_str(), ifstream::in); if (!infile) { cout << "error reading gene expression file:" << file_gene << endl; @@ -382,6 +383,7 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) { // WJA added void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) { + debug_msg("entering"); string file_bgen = file_oxford + ".bgen"; ifstream infile(file_bgen.c_str(), ios::binary); if (!infile) { @@ -606,6 +608,7 @@ void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) { } void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) { + debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -720,6 +723,7 @@ void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) { } void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) { + debug_msg("entering"); string file_bed = file_bfile + ".bed"; ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { diff --git a/src/lmm.cpp b/src/lmm.cpp index eb76265..37f2f5b 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -1319,6 +1319,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const set gwasnps) { + debug_msg("entering"); clock_t time_start = clock(); // LOCO support @@ -1482,6 +1483,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y) { + debug_msg("entering"); string file_bed = file_bfile + ".bed"; ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { @@ -1676,6 +1678,7 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y) { + debug_msg("entering"); string file_bgen = file_oxford + ".bgen"; ifstream infile(file_bgen.c_str(), ios::binary); if (!infile) { @@ -2282,6 +2285,7 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env) { + debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -2430,6 +2434,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env) { + debug_msg("entering"); string file_bed = file_bfile + ".bed"; ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index be9fd78..c5efb6e 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -2953,6 +2953,7 @@ double PCRT(const size_t mode, const size_t d_size, const double p_value, // WJA added. void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY) { + debug_msg("entering"); string file_bgen = file_oxford + ".bgen"; ifstream infile(file_bgen.c_str(), ios::binary); if (!infile) { @@ -3501,6 +3502,7 @@ void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY) { + debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -3938,6 +3940,7 @@ void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY) { + debug_msg("entering"); string file_bed = file_bfile + ".bed"; ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { @@ -4475,6 +4478,7 @@ void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW, void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env) { + debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -4920,6 +4924,7 @@ void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, void MVLMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env) { + debug_msg("entering"); string file_bed = file_bfile + ".bed"; ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { diff --git a/src/prdt.cpp b/src/prdt.cpp index 3e7c004..9dc84bc 100644 --- a/src/prdt.cpp +++ b/src/prdt.cpp @@ -206,6 +206,7 @@ void PRDT::AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { } void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) { + debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -308,6 +309,7 @@ void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) { } void PRDT::AnalyzePlink(gsl_vector *y_prdt) { + debug_msg("entering"); string file_bed = file_bfile + ".bed"; ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { diff --git a/src/varcov.cpp b/src/varcov.cpp index 0f87ba8..39c3523 100644 --- a/src/varcov.cpp +++ b/src/varcov.cpp @@ -247,6 +247,7 @@ void Calc_Cor(vector> &X_mat, vector &cov_vec) { // genotype file and calculate the covariance matrix for neighboring // SNPs output values at 10000-SNP-interval. void VARCOV::AnalyzeBimbam() { + debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -345,6 +346,7 @@ void VARCOV::AnalyzeBimbam() { } void VARCOV::AnalyzePlink() { + debug_msg("entering"); string file_bed = file_bfile + ".bed"; ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { diff --git a/src/vc.cpp b/src/vc.cpp index b5f36c0..1465f16 100644 --- a/src/vc.cpp +++ b/src/vc.cpp @@ -407,6 +407,7 @@ int LogRL_dev12(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, // Read header to determine which column contains which item. bool ReadHeader_vc(const string &line, HEADER &header) { + debug_msg("entering"); string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps", "SNPS", "snpid", "SNPID", "rsid", "RSID"}; set rs_set(rs_ptr, rs_ptr + 10); @@ -639,6 +640,7 @@ void ReadFile_cor(const string &file_cor, const set &setSnps, vector &vec_cm, vector &vec_bp, map &mapRS2in, map &mapRS2var) { + debug_msg("entering"); vec_rs.clear(); vec_n.clear(); mapRS2in.clear(); @@ -790,6 +792,7 @@ void ReadFile_beta(const bool flag_priorscale, const string &file_beta, map &mapRS2nsamp, gsl_vector *q_vec, gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total, size_t &ns_total) { + debug_msg("entering"); mapRS2nsamp.clear(); igzstream infile(file_beta.c_str(), igzstream::in); @@ -1004,6 +1007,7 @@ void ReadFile_cor(const string &file_cor, const vector &vec_rs, const double &window_ns, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total, size_t &ns_total, size_t &ns_test, size_t &ns_pair) { + debug_msg("entering"); igzstream infile(file_cor.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open cov file: " << file_cor << endl; @@ -2213,6 +2217,7 @@ bool BimbamXwz(const string &file_geno, const int display_pace, vector &indicator_idv, vector &indicator_snp, const vector &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { + debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -2306,6 +2311,7 @@ bool PlinkXwz(const string &file_bed, const int display_pace, vector &indicator_idv, vector &indicator_snp, const vector &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { + debug_msg("entering"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { cout << "error reading bed file:" << file_bed << endl; @@ -2432,6 +2438,7 @@ bool MFILEXwz(const size_t mfile_mode, const string &file_mfile, vector> &mindicator_snp, const vector &vec_cat, const gsl_vector *w, const gsl_vector *z, gsl_matrix *XWz) { + debug_msg("entering"); gsl_matrix_set_zero(XWz); igzstream infile(file_mfile.c_str(), igzstream::in); @@ -2466,6 +2473,7 @@ bool MFILEXwz(const size_t mfile_mode, const string &file_mfile, bool BimbamXtXwz(const string &file_geno, const int display_pace, vector &indicator_idv, vector &indicator_snp, const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { + debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -2556,6 +2564,7 @@ bool BimbamXtXwz(const string &file_geno, const int display_pace, bool PlinkXtXwz(const string &file_bed, const int display_pace, vector &indicator_idv, vector &indicator_snp, const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { + debug_msg("entering"); ifstream infile(file_bed.c_str(), ios::binary); if (!infile) { cout << "error reading bed file:" << file_bed << endl; @@ -2679,6 +2688,7 @@ bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile, const int display_pace, vector &indicator_idv, vector> &mindicator_snp, const gsl_matrix *XWz, gsl_matrix *XtXWz) { + debug_msg("entering"); gsl_matrix_set_zero(XtXWz); igzstream infile(file_mfile.c_str(), igzstream::in); diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh index 9e49251..2bd432e 100755 --- a/test/dev_test_suite.sh +++ b/test/dev_test_suite.sh @@ -2,7 +2,6 @@ gemma=../bin/gemma - # Related to https://github.com/genetics-statistics/GEMMA/issues/78 testBXDStandardRelatednessMatrixKSingularError() { outn=BXDerr @@ -65,10 +64,12 @@ testCenteredRelatednessMatrixKLOCO1() { testUnivariateLinearMixedModelLOCO1() { outn=mouse_hs1940_CD8_LOCO1_lmm rm -f output/$outn.* - $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \ + $gemma -g ../example/mouse_hs1940.geno.txt.gz \ + -p ../example/mouse_hs1940.pheno.txt \ -n 1 \ -loco 1 \ - -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940_LOCO1.cXX.txt \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940_LOCO1.cXX.txt \ -snps ../example/mouse_hs1940_snps.txt -lmm \ -nind 400 \ -debug \ diff --git a/test/test_suite.sh b/test/test_suite.sh index 44eb14c..fa66b7a 100755 --- a/test/test_suite.sh +++ b/test/test_suite.sh @@ -36,7 +36,7 @@ testUnivariateLinearMixedModelFullLOCO1() { testCenteredRelatednessMatrixK() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ -p ../example/mouse_hs1940.pheno.txt \ - -gk -o mouse_hs1940 + -gk -o mouse_hs1940 -debug assertEquals 0 $? outfn=output/mouse_hs1940.cXX.txt assertEquals "1940" `wc -l < $outfn` @@ -52,7 +52,8 @@ testUnivariateLinearMixedModel() { -a ../example/mouse_hs1940.anno.txt \ -k ./output/mouse_hs1940.cXX.txt \ -lmm \ - -o mouse_hs1940_CD8_lmm + -o mouse_hs1940_CD8_lmm \ + -debug assertEquals 0 $? grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt assertEquals 0 $? @@ -67,7 +68,8 @@ testMultivariateLinearMixedModel() { -n 1 6 \ -a ../example/mouse_hs1940.anno.txt \ -k ./output/mouse_hs1940.cXX.txt \ - -lmm -o mouse_hs1940_CD8MCH_lmm + -lmm -o mouse_hs1940_CD8MCH_lmm \ + -debug assertEquals 0 $? outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt @@ -81,7 +83,8 @@ testPlinkStandardRelatednessMatrixK() { outfn=output/$testname.sXX.txt rm -f $outfn $gemma -bfile $datadir/HLC \ - -gk 2 -o $testname + -gk 2 -o $testname \ + -debug assertEquals 0 $? assertEquals "427" `wc -l < $outfn` assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` @@ -97,6 +100,30 @@ testPlinkMultivariateLinearMixedModel() { -lmm 1 \ -maf 0.1 \ -c $datadir/HLC_covariates.txt \ + -debug \ + -o $testname + assertEquals 0 $? + outfn=output/$testname.assoc.txt + assertEquals "223243" `wc -l < $outfn` + assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() { + # n=2 is original pheno in fam file + # n=1 is causal1 + # n=3..12 is causal2 + # n=13..22 is causal3 + # -n 1 2 3 15 is independent + testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes + datadir=../example + $gemma -bfile $datadir/HLC \ + -p $datadir/HLC.simu.pheno.txt \ + -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \ + -lmm 1 \ + -maf 0.1 \ + -n 1 2 3 15 \ + -c $datadir/HLC_covariates.txt \ + -debug \ -o $testname assertEquals 0 $? outfn=output/$testname.assoc.txt -- cgit v1.2.3 From d672c81f7963180c4979aecf93b624d12d3f2ed2 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 5 Oct 2017 11:10:57 +0000 Subject: Addresses https://github.com/genetics-statistics/GEMMA/issues/81 --- Makefile | 7 ++ src/lm.cpp | 8 +- src/lmm.cpp | 202 +++++++++++++++++---------------------------- src/param.h | 5 +- test/dev_test_suite.sh | 13 +-- test/lengthy_test_suite.sh | 54 ++++++++++++ test/test_suite.sh | 31 +------ 7 files changed, 156 insertions(+), 164 deletions(-) create mode 100755 test/lengthy_test_suite.sh (limited to 'src') diff --git a/Makefile b/Makefile index 81367d6..dafcb38 100644 --- a/Makefile +++ b/Makefile @@ -144,8 +144,15 @@ slow-check: all cd test && ./test_suite.sh | tee ../test.log grep -q 'success rate: 100%' test.log +lengthy-check: all + rm -vf test/output/* + cd test && ./lengthy_test_suite.sh | tee ../lengthy_test.log + grep -q 'success rate: 100%' lengthy_test.log + check: fast-check slow-check +check-all: check lengthy-check + clean: rm -vf $(SRC_DIR)/*.o rm -vf $(SRC_DIR)/*~ diff --git a/src/lm.cpp b/src/lm.cpp index 83558e4..0c2a2bb 100644 --- a/src/lm.cpp +++ b/src/lm.cpp @@ -362,7 +362,7 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) { time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0 }; sumStat.push_back(SNPs); } cout << endl; @@ -587,7 +587,7 @@ void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) { time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0}; sumStat.push_back(SNPs); } cout << endl; @@ -702,7 +702,7 @@ void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) { time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0}; sumStat.push_back(SNPs); } cout << endl; @@ -844,7 +844,7 @@ void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) { p_lrt, p_score); // store summary data - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0}; sumStat.push_back(SNPs); time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); diff --git a/src/lmm.cpp b/src/lmm.cpp index 37f2f5b..e2f23a2 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -95,6 +95,7 @@ void LMM::CopyToParam(PARAM &cPar) { } void LMM::WriteFiles() { + string file_str; file_str = path_out + "/" + file_out; file_str += ".assoc.txt"; @@ -105,150 +106,99 @@ void LMM::WriteFiles() { return; } - if (!file_gene.empty()) { - outfile << "geneID" - << "\t"; - - if (a_mode == 1) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "l_remle" - << "\t" + auto common_header = [&] () { + outfile << "beta" << "\t" + << "se" << "\t"; + + outfile << "logl_H1" << "\t"; // we may make this an option + + switch(a_mode) { + case 1: + outfile << "l_remle" << "\t" << "p_wald" << endl; - } else if (a_mode == 2) { - outfile << "l_mle" - << "\t" + break; + case 2: + outfile << "l_mle" << "\t" << "p_lrt" << endl; - } else if (a_mode == 3) { - outfile << "beta" - << "\t" - << "se" - << "\t" + break; + case 3: + outfile << "p_score" << endl; + break; + case 4: + outfile << "l_remle" << "\t" + << "l_mle" << "\t" + << "p_wald" << "\t" + << "p_lrt" << "\t" << "p_score" << endl; - } else if (a_mode == 4) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "l_remle" - << "\t" - << "l_mle" - << "\t" - << "p_wald" - << "\t" - << "p_lrt" - << "\t" - << "p_score" << endl; - } else { + break; } + }; + + auto sumstats = [&] (SUMSTAT st) { + outfile << scientific << setprecision(6) << st.beta << "\t" + << st.se << "\t"; + + outfile << st.logl_H1 << "\t"; + + switch(a_mode) { + case 1: + outfile << st.lambda_remle << "\t" + << st.p_wald << endl; + break; + case 2: + outfile << st.lambda_mle << "\t" + << st.p_lrt << endl; + break; + case 3: + outfile << st.p_score << endl; + break; + case 4: + outfile << st.lambda_remle << "\t" + << st.lambda_mle << "\t" + << st.p_wald << "\t" + << st.p_lrt << "\t" + << st.p_score << endl; + break; + } + }; + + + if (!file_gene.empty()) { + outfile << "geneID" << "\t"; + + common_header(); for (vector::size_type t = 0; t < sumStat.size(); ++t) { outfile << snpInfo[t].rs_number << "\t"; - - if (a_mode == 1) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" - << sumStat[t].p_wald << endl; - } else if (a_mode == 2) { - outfile << scientific << setprecision(6) << sumStat[t].lambda_mle - << "\t" << sumStat[t].p_lrt << endl; - } else if (a_mode == 3) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].p_score << endl; - } else if (a_mode == 4) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" - << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t" - << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl; - } else { - } + sumstats(sumStat[t]); } } else { bool process_gwasnps = setGWASnps.size(); - outfile << "chr" - << "\t" - << "rs" - << "\t" - << "ps" - << "\t" - << "n_miss" - << "\t" - << "allele1" - << "\t" - << "allele0" - << "\t" - << "af" - << "\t"; - - if (a_mode == 1) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "l_remle" - << "\t" - << "p_wald" << endl; - } else if (a_mode == 2) { - outfile << "l_mle" - << "\t" - << "p_lrt" << endl; - } else if (a_mode == 3) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "p_score" << endl; - } else if (a_mode == 4) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "l_remle" - << "\t" - << "l_mle" - << "\t" - << "p_wald" - << "\t" - << "p_lrt" - << "\t" - << "p_score" << endl; - } else { - } + + outfile << "chr" << "\t" + << "rs" << "\t" + << "ps" << "\t" + << "n_miss" << "\t" + << "allele1" << "\t" + << "allele0" << "\t" + << "af" << "\t"; + + common_header(); size_t t = 0; for (size_t i = 0; i < snpInfo.size(); ++i) { - if (indicator_snp[i] == 0) continue; auto snp = snpInfo[i].rs_number; if (process_gwasnps && setGWASnps.count(snp) == 0) continue; // cout << t << endl; - outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t" << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" << fixed << setprecision(3) << snpInfo[i].maf << "\t"; - if (a_mode == 1) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" - << sumStat[t].p_wald << endl; - } else if (a_mode == 2) { - outfile << scientific << setprecision(6) << sumStat[t].lambda_mle - << "\t" << sumStat[t].p_lrt << endl; - } else if (a_mode == 3) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].p_score << endl; - } else if (a_mode == 4) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" - << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t" - << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl; - } else { - } + sumstats(sumStat[t]); t++; } } @@ -1299,7 +1249,7 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } cout << endl; @@ -1400,7 +1350,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, // Store summary data. SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; + p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } }; @@ -1653,7 +1603,7 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, // Store summary data. SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; + p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } } @@ -1930,7 +1880,7 @@ void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, // Store summary data. SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; + p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } } @@ -2411,7 +2361,7 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } cout << endl; @@ -2589,7 +2539,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } cout << endl; diff --git a/src/param.h b/src/param.h index 08b1e10..ff279bd 100644 --- a/src/param.h +++ b/src/param.h @@ -56,6 +56,9 @@ public: double p_wald; // p value from a Wald test. double p_lrt; // p value from a likelihood ratio test. double p_score; // p value from a score test. + double logl_H1; // log likelihood under the alternative + // hypothesis as a measure of goodness of fit, + // see https://github.com/genetics-statistics/GEMMA/issues/81 }; // Results for mvLMM. @@ -118,7 +121,7 @@ public: bool mode_debug = false; uint issue; // enable tests for issue on github tracker - int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests + uint a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; vector p_column; // Which phenotype column needs analysis. size_t d_pace; // Display pace diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh index 2bd432e..37f6b28 100755 --- a/test/dev_test_suite.sh +++ b/test/dev_test_suite.sh @@ -31,7 +31,8 @@ testBXDStandardRelatednessMatrixK() { assertEquals "-116.11" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } -testBXDMultivariateLinearMixedModel() { +testBXDLMMLikelihoodRatio() { + outn=BXD_LMM_LR $gemma -g ../example/BXD_geno.txt.gz \ -p ../example/BXD_pheno.txt \ -c ../example/BXD_covariates2.txt \ @@ -39,12 +40,12 @@ testBXDMultivariateLinearMixedModel() { -k ./output/BXD.cXX.txt \ -lmm 2 -maf 0.1 \ -debug \ - -o BXD_mvlmm + -o $outn assertEquals 0 $? - outfn=output/BXD_mvlmm.assoc.txt - assertEquals "65862" `wc -w < $outfn` - assertEquals "3088489421.94" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + outfn=output/$outn.assoc.txt + assertEquals "87816" `wc -w < $outfn` + assertEquals "3088458212.93" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testCenteredRelatednessMatrixKLOCO1() { @@ -79,7 +80,7 @@ testUnivariateLinearMixedModelLOCO1() { assertEquals 0 $? outfn=output/$outn.assoc.txt assertEquals "68" `wc -l < $outfn` - assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "15465346.22" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } shunit2=`which shunit2` diff --git a/test/lengthy_test_suite.sh b/test/lengthy_test_suite.sh new file mode 100755 index 0000000..327b2b2 --- /dev/null +++ b/test/lengthy_test_suite.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Long running tests go here + +gemma=../bin/gemma + +testPlinkStandardRelatednessMatrixK() { + testname=testPlinkStandardRelatednessMatrixK + datadir=../example + outfn=output/$testname.sXX.txt + rm -f $outfn + $gemma -bfile $datadir/HLC \ + -gk 2 -o $testname \ + -debug + assertEquals 0 $? + assertEquals "427" `wc -l < $outfn` + assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() { + # This test passes, but takes over 30 minutes to run! + # n=2 is original pheno in fam file + # n=1 is causal1 + # n=3..12 is causal2 + # n=13..22 is causal3 + # -n 1 2 3 15 is independent + testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes + datadir=../example + $gemma -bfile $datadir/HLC \ + -p $datadir/HLC.simu.pheno.txt \ + -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \ + -lmm 1 \ + -maf 0.1 \ + -n 1 2 3 15 \ + -c $datadir/HLC_covariates.txt \ + -debug \ + -o $testname + assertEquals 0 $? + outfn=output/$testname.assoc.txt + assertEquals "223243" `wc -l < $outfn` + assertEquals "89754977983.69" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +shunit2=`which shunit2` + +if [ -x "$shunit2" ]; then + echo run system shunit2 + . $shunit2 +elif [ -e ../contrib/shunit2-2.0.3/src/shell/shunit2 ]; then + echo run shunit2 provided in gemma repo + . ../contrib/shunit2-2.0.3/src/shell/shunit2 +else + echo "Can not find shunit2 - see INSTALL.md" +fi diff --git a/test/test_suite.sh b/test/test_suite.sh index fa66b7a..350fc27 100755 --- a/test/test_suite.sh +++ b/test/test_suite.sh @@ -30,7 +30,7 @@ testUnivariateLinearMixedModelFullLOCO1() { assertEquals 0 $? outfn=output/$outn.assoc.txt assertEquals "951" `wc -l < $outfn` - assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "267507851.98" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testCenteredRelatednessMatrixK() { @@ -58,8 +58,8 @@ testUnivariateLinearMixedModel() { grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt assertEquals 0 $? outfn=output/mouse_hs1940_CD8_lmm.assoc.txt - assertEquals "118459" `wc -w < $outfn` - assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "129228" `wc -w < $outfn` + assertEquals "4038540440.86" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testMultivariateLinearMixedModel() { @@ -105,30 +105,7 @@ testPlinkMultivariateLinearMixedModel() { assertEquals 0 $? outfn=output/$testname.assoc.txt assertEquals "223243" `wc -l < $outfn` - assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` -} - -testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() { - # n=2 is original pheno in fam file - # n=1 is causal1 - # n=3..12 is causal2 - # n=13..22 is causal3 - # -n 1 2 3 15 is independent - testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes - datadir=../example - $gemma -bfile $datadir/HLC \ - -p $datadir/HLC.simu.pheno.txt \ - -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \ - -lmm 1 \ - -maf 0.1 \ - -n 1 2 3 15 \ - -c $datadir/HLC_covariates.txt \ - -debug \ - -o $testname - assertEquals 0 $? - outfn=output/$testname.assoc.txt - assertEquals "223243" `wc -l < $outfn` - assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "89757159113.77" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } shunit2=`which shunit2` -- cgit v1.2.3 From 86323ccaf26ad0a3b706a67a0014dd04b9965823 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 5 Oct 2017 11:55:03 +0000 Subject: LMM: skip beta (again) with LRT --- src/lmm.cpp | 14 ++++++++++---- test/dev_test_suite.sh | 2 +- test/lengthy_test_suite.sh | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/lmm.cpp b/src/lmm.cpp index e2f23a2..1193700 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -107,8 +107,10 @@ void LMM::WriteFiles() { } auto common_header = [&] () { - outfile << "beta" << "\t" - << "se" << "\t"; + if (a_mode != 2) + outfile << "beta" << "\t"; + + outfile << "se" << "\t"; outfile << "logl_H1" << "\t"; // we may make this an option @@ -135,8 +137,12 @@ void LMM::WriteFiles() { }; auto sumstats = [&] (SUMSTAT st) { - outfile << scientific << setprecision(6) << st.beta << "\t" - << st.se << "\t"; + outfile << scientific << setprecision(6); + + if (a_mode != 2) + outfile << st.beta << "\t"; + + outfile << st.se << "\t"; outfile << st.logl_H1 << "\t"; diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh index 37f6b28..0fc4423 100755 --- a/test/dev_test_suite.sh +++ b/test/dev_test_suite.sh @@ -44,7 +44,7 @@ testBXDLMMLikelihoodRatio() { assertEquals 0 $? outfn=output/$outn.assoc.txt - assertEquals "87816" `wc -w < $outfn` + assertEquals "80498" `wc -w < $outfn` assertEquals "3088458212.93" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } diff --git a/test/lengthy_test_suite.sh b/test/lengthy_test_suite.sh index 327b2b2..231475e 100755 --- a/test/lengthy_test_suite.sh +++ b/test/lengthy_test_suite.sh @@ -18,6 +18,7 @@ testPlinkStandardRelatednessMatrixK() { } testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() { + echo "Long running test!" # This test passes, but takes over 30 minutes to run! # n=2 is original pheno in fam file # n=1 is causal1 -- cgit v1.2.3