about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Makefile7
-rw-r--r--src/lm.cpp8
-rw-r--r--src/lmm.cpp202
-rw-r--r--src/param.h5
-rwxr-xr-xtest/dev_test_suite.sh13
-rwxr-xr-xtest/lengthy_test_suite.sh54
-rwxr-xr-xtest/test_suite.sh31
7 files changed, 156 insertions, 164 deletions
diff --git a/Makefile b/Makefile
index 81367d6..dafcb38 100644
--- a/Makefile
+++ b/Makefile
@@ -144,8 +144,15 @@ slow-check: all
 	cd test && ./test_suite.sh | tee ../test.log
 	grep -q 'success rate: 100%' test.log
 
+lengthy-check: all
+	rm -vf test/output/*
+	cd test && ./lengthy_test_suite.sh | tee ../lengthy_test.log
+	grep -q 'success rate: 100%' lengthy_test.log
+
 check: fast-check slow-check
 
+check-all: check lengthy-check
+
 clean:
 	rm -vf $(SRC_DIR)/*.o
 	rm -vf $(SRC_DIR)/*~
diff --git a/src/lm.cpp b/src/lm.cpp
index 83558e4..0c2a2bb 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -362,7 +362,7 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
     // Store summary data.
-    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0 };
     sumStat.push_back(SNPs);
   }
   cout << endl;
@@ -587,7 +587,7 @@ void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) {
     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
     // Store summary data.
-    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0};
     sumStat.push_back(SNPs);
   }
   cout << endl;
@@ -702,7 +702,7 @@ void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
     // Store summary data.
-    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0};
     sumStat.push_back(SNPs);
   }
   cout << endl;
@@ -844,7 +844,7 @@ void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) {
             p_lrt, p_score);
 
     // store summary data
-    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0};
     sumStat.push_back(SNPs);
 
     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 37f2f5b..e2f23a2 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -95,6 +95,7 @@ void LMM::CopyToParam(PARAM &cPar) {
 }
 
 void LMM::WriteFiles() {
+
   string file_str;
   file_str = path_out + "/" + file_out;
   file_str += ".assoc.txt";
@@ -105,150 +106,99 @@ void LMM::WriteFiles() {
     return;
   }
 
-  if (!file_gene.empty()) {
-    outfile << "geneID"
-            << "\t";
-
-    if (a_mode == 1) {
-      outfile << "beta"
-              << "\t"
-              << "se"
-              << "\t"
-              << "l_remle"
-              << "\t"
+  auto common_header = [&] () {
+    outfile << "beta" << "\t"
+            << "se" << "\t";
+
+    outfile << "logl_H1" << "\t";  // we may make this an option
+
+    switch(a_mode) {
+    case 1:
+      outfile << "l_remle" << "\t"
               << "p_wald" << endl;
-    } else if (a_mode == 2) {
-      outfile << "l_mle"
-              << "\t"
+      break;
+    case 2:
+      outfile << "l_mle" << "\t"
               << "p_lrt" << endl;
-    } else if (a_mode == 3) {
-      outfile << "beta"
-              << "\t"
-              << "se"
-              << "\t"
+      break;
+    case 3:
+      outfile << "p_score" << endl;
+      break;
+    case 4:
+      outfile << "l_remle" << "\t"
+              << "l_mle" << "\t"
+              << "p_wald" << "\t"
+              << "p_lrt" << "\t"
               << "p_score" << endl;
-    } else if (a_mode == 4) {
-      outfile << "beta"
-              << "\t"
-              << "se"
-              << "\t"
-              << "l_remle"
-              << "\t"
-              << "l_mle"
-              << "\t"
-              << "p_wald"
-              << "\t"
-              << "p_lrt"
-              << "\t"
-              << "p_score" << endl;
-    } else {
+      break;
     }
+  };
+
+  auto sumstats = [&] (SUMSTAT st) {
+    outfile << scientific << setprecision(6) << st.beta << "\t"
+    << st.se << "\t";
+
+    outfile << st.logl_H1 << "\t";
+
+    switch(a_mode) {
+    case 1:
+      outfile << st.lambda_remle << "\t"
+              << st.p_wald << endl;
+      break;
+    case 2:
+      outfile << st.lambda_mle << "\t"
+              << st.p_lrt << endl;
+      break;
+    case 3:
+      outfile << st.p_score << endl;
+      break;
+    case 4:
+      outfile << st.lambda_remle << "\t"
+              << st.lambda_mle << "\t"
+              << st.p_wald << "\t"
+              << st.p_lrt << "\t"
+              << st.p_score << endl;
+      break;
+    }
+  };
+
+
+  if (!file_gene.empty()) {
+    outfile << "geneID" << "\t";
+
+    common_header();
 
     for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) {
       outfile << snpInfo[t].rs_number << "\t";
-
-      if (a_mode == 1) {
-        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
-                << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
-                << sumStat[t].p_wald << endl;
-      } else if (a_mode == 2) {
-        outfile << scientific << setprecision(6) << sumStat[t].lambda_mle
-                << "\t" << sumStat[t].p_lrt << endl;
-      } else if (a_mode == 3) {
-        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
-                << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
-      } else if (a_mode == 4) {
-        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
-                << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
-                << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t"
-                << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
-      } else {
-      }
+      sumstats(sumStat[t]);
     }
   } else {
     bool process_gwasnps = setGWASnps.size();
-    outfile << "chr"
-            << "\t"
-            << "rs"
-            << "\t"
-            << "ps"
-            << "\t"
-            << "n_miss"
-            << "\t"
-            << "allele1"
-            << "\t"
-            << "allele0"
-            << "\t"
-            << "af"
-            << "\t";
-
-    if (a_mode == 1) {
-      outfile << "beta"
-              << "\t"
-              << "se"
-              << "\t"
-              << "l_remle"
-              << "\t"
-              << "p_wald" << endl;
-    } else if (a_mode == 2) {
-      outfile << "l_mle"
-              << "\t"
-              << "p_lrt" << endl;
-    } else if (a_mode == 3) {
-      outfile << "beta"
-              << "\t"
-              << "se"
-              << "\t"
-              << "p_score" << endl;
-    } else if (a_mode == 4) {
-      outfile << "beta"
-              << "\t"
-              << "se"
-              << "\t"
-              << "l_remle"
-              << "\t"
-              << "l_mle"
-              << "\t"
-              << "p_wald"
-              << "\t"
-              << "p_lrt"
-              << "\t"
-              << "p_score" << endl;
-    } else {
-    }
+
+    outfile << "chr" << "\t"
+            << "rs" << "\t"
+            << "ps" << "\t"
+            << "n_miss" << "\t"
+            << "allele1" << "\t"
+            << "allele0" << "\t"
+            << "af" << "\t";
+
+    common_header();
 
     size_t t = 0;
     for (size_t i = 0; i < snpInfo.size(); ++i) {
-
       if (indicator_snp[i] == 0)
         continue;
       auto snp = snpInfo[i].rs_number;
       if (process_gwasnps && setGWASnps.count(snp) == 0)
         continue;
       // cout << t << endl;
-
       outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
               << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
               << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t"
               << fixed << setprecision(3) << snpInfo[i].maf << "\t";
 
-      if (a_mode == 1) {
-        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
-                << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
-                << sumStat[t].p_wald << endl;
-      } else if (a_mode == 2) {
-        outfile << scientific << setprecision(6) << sumStat[t].lambda_mle
-                << "\t" << sumStat[t].p_lrt << endl;
-      } else if (a_mode == 3) {
-        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
-                << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
-      } else if (a_mode == 4) {
-        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
-                << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
-                << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t"
-                << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
-      } else {
-      }
+      sumstats(sumStat[t]);
       t++;
     }
   }
@@ -1299,7 +1249,7 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
     // Store summary data.
-    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
     sumStat.push_back(SNPs);
   }
   cout << endl;
@@ -1400,7 +1350,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
 
       // Store summary data.
       SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                      p_wald, p_lrt, p_score};
+                      p_wald, p_lrt, p_score, logl_H1};
       sumStat.push_back(SNPs);
     }
   };
@@ -1653,7 +1603,7 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
 
         // Store summary data.
         SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                        p_wald, p_lrt, p_score};
+                        p_wald, p_lrt, p_score, logl_H1};
         sumStat.push_back(SNPs);
       }
     }
@@ -1930,7 +1880,7 @@ void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
 
         // Store summary data.
         SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                        p_wald, p_lrt, p_score};
+                        p_wald, p_lrt, p_score, logl_H1};
         sumStat.push_back(SNPs);
       }
     }
@@ -2411,7 +2361,7 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
     // Store summary data.
-    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
     sumStat.push_back(SNPs);
   }
   cout << endl;
@@ -2589,7 +2539,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
     time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
     // Store summary data.
-    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
     sumStat.push_back(SNPs);
   }
   cout << endl;
diff --git a/src/param.h b/src/param.h
index 08b1e10..ff279bd 100644
--- a/src/param.h
+++ b/src/param.h
@@ -56,6 +56,9 @@ public:
   double p_wald;       // p value from a Wald test.
   double p_lrt;        // p value from a likelihood ratio test.
   double p_score;      // p value from a score test.
+  double logl_H1;      // log likelihood under the alternative
+                       // hypothesis as a measure of goodness of fit,
+                       // see https://github.com/genetics-statistics/GEMMA/issues/81
 };
 
 // Results for mvLMM.
@@ -118,7 +121,7 @@ public:
   bool mode_debug = false;
   uint issue; // enable tests for issue on github tracker
 
-  int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
+  uint a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
   int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
   vector<size_t> p_column; // Which phenotype column needs analysis.
   size_t d_pace;           // Display pace
diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh
index 2bd432e..37f6b28 100755
--- a/test/dev_test_suite.sh
+++ b/test/dev_test_suite.sh
@@ -31,7 +31,8 @@ testBXDStandardRelatednessMatrixK() {
     assertEquals "-116.11" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
-testBXDMultivariateLinearMixedModel() {
+testBXDLMMLikelihoodRatio() {
+    outn=BXD_LMM_LR
     $gemma -g ../example/BXD_geno.txt.gz \
            -p ../example/BXD_pheno.txt \
            -c ../example/BXD_covariates2.txt \
@@ -39,12 +40,12 @@ testBXDMultivariateLinearMixedModel() {
            -k ./output/BXD.cXX.txt \
            -lmm 2 -maf 0.1 \
            -debug \
-           -o BXD_mvlmm
+           -o $outn
     assertEquals 0 $?
 
-    outfn=output/BXD_mvlmm.assoc.txt
-    assertEquals "65862" `wc -w < $outfn`
-    assertEquals "3088489421.94" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+    outfn=output/$outn.assoc.txt
+    assertEquals "87816" `wc -w < $outfn`
+    assertEquals "3088458212.93" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
 testCenteredRelatednessMatrixKLOCO1() {
@@ -79,7 +80,7 @@ testUnivariateLinearMixedModelLOCO1() {
     assertEquals 0 $?
     outfn=output/$outn.assoc.txt
     assertEquals "68" `wc -l < $outfn`
-    assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+    assertEquals "15465346.22" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
 shunit2=`which shunit2`
diff --git a/test/lengthy_test_suite.sh b/test/lengthy_test_suite.sh
new file mode 100755
index 0000000..327b2b2
--- /dev/null
+++ b/test/lengthy_test_suite.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Long running tests go here
+
+gemma=../bin/gemma
+
+testPlinkStandardRelatednessMatrixK() {
+    testname=testPlinkStandardRelatednessMatrixK
+    datadir=../example
+    outfn=output/$testname.sXX.txt
+    rm -f $outfn
+    $gemma -bfile $datadir/HLC \
+           -gk 2 -o $testname \
+           -debug
+    assertEquals 0 $?
+    assertEquals "427" `wc -l < $outfn`
+    assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() {
+    # This test passes, but takes over 30 minutes to run!
+    # n=2 is original pheno in fam file
+    # n=1 is causal1
+    # n=3..12 is causal2
+    # n=13..22 is causal3
+    # -n 1 2 3 15 is independent
+    testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes
+    datadir=../example
+    $gemma -bfile $datadir/HLC \
+           -p $datadir/HLC.simu.pheno.txt \
+           -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \
+           -lmm 1 \
+           -maf 0.1 \
+           -n 1 2 3 15 \
+           -c $datadir/HLC_covariates.txt \
+           -debug \
+           -o $testname
+    assertEquals 0 $?
+    outfn=output/$testname.assoc.txt
+    assertEquals "223243" `wc -l < $outfn`
+    assertEquals "89754977983.69" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+shunit2=`which shunit2`
+
+if [ -x "$shunit2" ]; then
+    echo run system shunit2
+    . $shunit2
+elif [ -e ../contrib/shunit2-2.0.3/src/shell/shunit2 ]; then
+    echo run shunit2 provided in gemma repo
+    . ../contrib/shunit2-2.0.3/src/shell/shunit2
+else
+    echo "Can not find shunit2 - see INSTALL.md"
+fi
diff --git a/test/test_suite.sh b/test/test_suite.sh
index fa66b7a..350fc27 100755
--- a/test/test_suite.sh
+++ b/test/test_suite.sh
@@ -30,7 +30,7 @@ testUnivariateLinearMixedModelFullLOCO1() {
     assertEquals 0 $?
     outfn=output/$outn.assoc.txt
     assertEquals "951" `wc -l < $outfn`
-    assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+    assertEquals "267507851.98" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
 testCenteredRelatednessMatrixK() {
@@ -58,8 +58,8 @@ testUnivariateLinearMixedModel() {
     grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt
     assertEquals 0 $?
     outfn=output/mouse_hs1940_CD8_lmm.assoc.txt
-    assertEquals "118459" `wc -w < $outfn`
-    assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+    assertEquals "129228" `wc -w < $outfn`
+    assertEquals "4038540440.86" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
 testMultivariateLinearMixedModel() {
@@ -105,30 +105,7 @@ testPlinkMultivariateLinearMixedModel() {
     assertEquals 0 $?
     outfn=output/$testname.assoc.txt
     assertEquals "223243" `wc -l < $outfn`
-    assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
-}
-
-testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() {
-    # n=2 is original pheno in fam file
-    # n=1 is causal1
-    # n=3..12 is causal2
-    # n=13..22 is causal3
-    # -n 1 2 3 15 is independent
-    testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes
-    datadir=../example
-    $gemma -bfile $datadir/HLC \
-           -p $datadir/HLC.simu.pheno.txt \
-           -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \
-           -lmm 1 \
-           -maf 0.1 \
-           -n 1 2 3 15 \
-           -c $datadir/HLC_covariates.txt \
-           -debug \
-           -o $testname
-    assertEquals 0 $?
-    outfn=output/$testname.assoc.txt
-    assertEquals "223243" `wc -l < $outfn`
-    assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+    assertEquals "89757159113.77" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
 shunit2=`which shunit2`