diff options
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | src/lm.cpp | 8 | ||||
-rw-r--r-- | src/lmm.cpp | 202 | ||||
-rw-r--r-- | src/param.h | 5 | ||||
-rwxr-xr-x | test/dev_test_suite.sh | 13 | ||||
-rwxr-xr-x | test/lengthy_test_suite.sh | 54 | ||||
-rwxr-xr-x | test/test_suite.sh | 31 |
7 files changed, 156 insertions, 164 deletions
@@ -144,8 +144,15 @@ slow-check: all cd test && ./test_suite.sh | tee ../test.log grep -q 'success rate: 100%' test.log +lengthy-check: all + rm -vf test/output/* + cd test && ./lengthy_test_suite.sh | tee ../lengthy_test.log + grep -q 'success rate: 100%' lengthy_test.log + check: fast-check slow-check +check-all: check lengthy-check + clean: rm -vf $(SRC_DIR)/*.o rm -vf $(SRC_DIR)/*~ @@ -362,7 +362,7 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) { time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0 }; sumStat.push_back(SNPs); } cout << endl; @@ -587,7 +587,7 @@ void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) { time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0}; sumStat.push_back(SNPs); } cout << endl; @@ -702,7 +702,7 @@ void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) { time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0}; sumStat.push_back(SNPs); } cout << endl; @@ -844,7 +844,7 @@ void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) { p_lrt, p_score); // store summary data - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0}; sumStat.push_back(SNPs); time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); diff --git a/src/lmm.cpp b/src/lmm.cpp index 37f2f5b..e2f23a2 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -95,6 +95,7 @@ void LMM::CopyToParam(PARAM &cPar) { } void LMM::WriteFiles() { + string file_str; file_str = path_out + "/" + file_out; file_str += ".assoc.txt"; @@ -105,150 +106,99 @@ void LMM::WriteFiles() { return; } - if (!file_gene.empty()) { - outfile << "geneID" - << "\t"; - - if (a_mode == 1) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "l_remle" - << "\t" + auto common_header = [&] () { + outfile << "beta" << "\t" + << "se" << "\t"; + + outfile << "logl_H1" << "\t"; // we may make this an option + + switch(a_mode) { + case 1: + outfile << "l_remle" << "\t" << "p_wald" << endl; - } else if (a_mode == 2) { - outfile << "l_mle" - << "\t" + break; + case 2: + outfile << "l_mle" << "\t" << "p_lrt" << endl; - } else if (a_mode == 3) { - outfile << "beta" - << "\t" - << "se" - << "\t" + break; + case 3: + outfile << "p_score" << endl; + break; + case 4: + outfile << "l_remle" << "\t" + << "l_mle" << "\t" + << "p_wald" << "\t" + << "p_lrt" << "\t" << "p_score" << endl; - } else if (a_mode == 4) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "l_remle" - << "\t" - << "l_mle" - << "\t" - << "p_wald" - << "\t" - << "p_lrt" - << "\t" - << "p_score" << endl; - } else { + break; } + }; + + auto sumstats = [&] (SUMSTAT st) { + outfile << scientific << setprecision(6) << st.beta << "\t" + << st.se << "\t"; + + outfile << st.logl_H1 << "\t"; + + switch(a_mode) { + case 1: + outfile << st.lambda_remle << "\t" + << st.p_wald << endl; + break; + case 2: + outfile << st.lambda_mle << "\t" + << st.p_lrt << endl; + break; + case 3: + outfile << st.p_score << endl; + break; + case 4: + outfile << st.lambda_remle << "\t" + << st.lambda_mle << "\t" + << st.p_wald << "\t" + << st.p_lrt << "\t" + << st.p_score << endl; + break; + } + }; + + + if (!file_gene.empty()) { + outfile << "geneID" << "\t"; + + common_header(); for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) { outfile << snpInfo[t].rs_number << "\t"; - - if (a_mode == 1) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" - << sumStat[t].p_wald << endl; - } else if (a_mode == 2) { - outfile << scientific << setprecision(6) << sumStat[t].lambda_mle - << "\t" << sumStat[t].p_lrt << endl; - } else if (a_mode == 3) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].p_score << endl; - } else if (a_mode == 4) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" - << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t" - << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl; - } else { - } + sumstats(sumStat[t]); } } else { bool process_gwasnps = setGWASnps.size(); - outfile << "chr" - << "\t" - << "rs" - << "\t" - << "ps" - << "\t" - << "n_miss" - << "\t" - << "allele1" - << "\t" - << "allele0" - << "\t" - << "af" - << "\t"; - - if (a_mode == 1) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "l_remle" - << "\t" - << "p_wald" << endl; - } else if (a_mode == 2) { - outfile << "l_mle" - << "\t" - << "p_lrt" << endl; - } else if (a_mode == 3) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "p_score" << endl; - } else if (a_mode == 4) { - outfile << "beta" - << "\t" - << "se" - << "\t" - << "l_remle" - << "\t" - << "l_mle" - << "\t" - << "p_wald" - << "\t" - << "p_lrt" - << "\t" - << "p_score" << endl; - } else { - } + + outfile << "chr" << "\t" + << "rs" << "\t" + << "ps" << "\t" + << "n_miss" << "\t" + << "allele1" << "\t" + << "allele0" << "\t" + << "af" << "\t"; + + common_header(); size_t t = 0; for (size_t i = 0; i < snpInfo.size(); ++i) { - if (indicator_snp[i] == 0) continue; auto snp = snpInfo[i].rs_number; if (process_gwasnps && setGWASnps.count(snp) == 0) continue; // cout << t << endl; - outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t" << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" << fixed << setprecision(3) << snpInfo[i].maf << "\t"; - if (a_mode == 1) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" - << sumStat[t].p_wald << endl; - } else if (a_mode == 2) { - outfile << scientific << setprecision(6) << sumStat[t].lambda_mle - << "\t" << sumStat[t].p_lrt << endl; - } else if (a_mode == 3) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].p_score << endl; - } else if (a_mode == 4) { - outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" - << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" - << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t" - << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl; - } else { - } + sumstats(sumStat[t]); t++; } } @@ -1299,7 +1249,7 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } cout << endl; @@ -1400,7 +1350,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, // Store summary data. SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; + p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } }; @@ -1653,7 +1603,7 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, // Store summary data. SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; + p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } } @@ -1930,7 +1880,7 @@ void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, // Store summary data. SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; + p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } } @@ -2411,7 +2361,7 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } cout << endl; @@ -2589,7 +2539,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1}; sumStat.push_back(SNPs); } cout << endl; diff --git a/src/param.h b/src/param.h index 08b1e10..ff279bd 100644 --- a/src/param.h +++ b/src/param.h @@ -56,6 +56,9 @@ public: double p_wald; // p value from a Wald test. double p_lrt; // p value from a likelihood ratio test. double p_score; // p value from a score test. + double logl_H1; // log likelihood under the alternative + // hypothesis as a measure of goodness of fit, + // see https://github.com/genetics-statistics/GEMMA/issues/81 }; // Results for mvLMM. @@ -118,7 +121,7 @@ public: bool mode_debug = false; uint issue; // enable tests for issue on github tracker - int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests + uint a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; vector<size_t> p_column; // Which phenotype column needs analysis. size_t d_pace; // Display pace diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh index 2bd432e..37f6b28 100755 --- a/test/dev_test_suite.sh +++ b/test/dev_test_suite.sh @@ -31,7 +31,8 @@ testBXDStandardRelatednessMatrixK() { assertEquals "-116.11" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } -testBXDMultivariateLinearMixedModel() { +testBXDLMMLikelihoodRatio() { + outn=BXD_LMM_LR $gemma -g ../example/BXD_geno.txt.gz \ -p ../example/BXD_pheno.txt \ -c ../example/BXD_covariates2.txt \ @@ -39,12 +40,12 @@ testBXDMultivariateLinearMixedModel() { -k ./output/BXD.cXX.txt \ -lmm 2 -maf 0.1 \ -debug \ - -o BXD_mvlmm + -o $outn assertEquals 0 $? - outfn=output/BXD_mvlmm.assoc.txt - assertEquals "65862" `wc -w < $outfn` - assertEquals "3088489421.94" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + outfn=output/$outn.assoc.txt + assertEquals "87816" `wc -w < $outfn` + assertEquals "3088458212.93" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testCenteredRelatednessMatrixKLOCO1() { @@ -79,7 +80,7 @@ testUnivariateLinearMixedModelLOCO1() { assertEquals 0 $? outfn=output/$outn.assoc.txt assertEquals "68" `wc -l < $outfn` - assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "15465346.22" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } shunit2=`which shunit2` diff --git a/test/lengthy_test_suite.sh b/test/lengthy_test_suite.sh new file mode 100755 index 0000000..327b2b2 --- /dev/null +++ b/test/lengthy_test_suite.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Long running tests go here + +gemma=../bin/gemma + +testPlinkStandardRelatednessMatrixK() { + testname=testPlinkStandardRelatednessMatrixK + datadir=../example + outfn=output/$testname.sXX.txt + rm -f $outfn + $gemma -bfile $datadir/HLC \ + -gk 2 -o $testname \ + -debug + assertEquals 0 $? + assertEquals "427" `wc -l < $outfn` + assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() { + # This test passes, but takes over 30 minutes to run! + # n=2 is original pheno in fam file + # n=1 is causal1 + # n=3..12 is causal2 + # n=13..22 is causal3 + # -n 1 2 3 15 is independent + testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes + datadir=../example + $gemma -bfile $datadir/HLC \ + -p $datadir/HLC.simu.pheno.txt \ + -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \ + -lmm 1 \ + -maf 0.1 \ + -n 1 2 3 15 \ + -c $datadir/HLC_covariates.txt \ + -debug \ + -o $testname + assertEquals 0 $? + outfn=output/$testname.assoc.txt + assertEquals "223243" `wc -l < $outfn` + assertEquals "89754977983.69" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +shunit2=`which shunit2` + +if [ -x "$shunit2" ]; then + echo run system shunit2 + . $shunit2 +elif [ -e ../contrib/shunit2-2.0.3/src/shell/shunit2 ]; then + echo run shunit2 provided in gemma repo + . ../contrib/shunit2-2.0.3/src/shell/shunit2 +else + echo "Can not find shunit2 - see INSTALL.md" +fi diff --git a/test/test_suite.sh b/test/test_suite.sh index fa66b7a..350fc27 100755 --- a/test/test_suite.sh +++ b/test/test_suite.sh @@ -30,7 +30,7 @@ testUnivariateLinearMixedModelFullLOCO1() { assertEquals 0 $? outfn=output/$outn.assoc.txt assertEquals "951" `wc -l < $outfn` - assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "267507851.98" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testCenteredRelatednessMatrixK() { @@ -58,8 +58,8 @@ testUnivariateLinearMixedModel() { grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt assertEquals 0 $? outfn=output/mouse_hs1940_CD8_lmm.assoc.txt - assertEquals "118459" `wc -w < $outfn` - assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "129228" `wc -w < $outfn` + assertEquals "4038540440.86" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testMultivariateLinearMixedModel() { @@ -105,30 +105,7 @@ testPlinkMultivariateLinearMixedModel() { assertEquals 0 $? outfn=output/$testname.assoc.txt assertEquals "223243" `wc -l < $outfn` - assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` -} - -testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() { - # n=2 is original pheno in fam file - # n=1 is causal1 - # n=3..12 is causal2 - # n=13..22 is causal3 - # -n 1 2 3 15 is independent - testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes - datadir=../example - $gemma -bfile $datadir/HLC \ - -p $datadir/HLC.simu.pheno.txt \ - -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \ - -lmm 1 \ - -maf 0.1 \ - -n 1 2 3 15 \ - -c $datadir/HLC_covariates.txt \ - -debug \ - -o $testname - assertEquals 0 $? - outfn=output/$testname.assoc.txt - assertEquals "223243" `wc -l < $outfn` - assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "89757159113.77" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } shunit2=`which shunit2` |