aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile7
-rw-r--r--src/lm.cpp8
-rw-r--r--src/lmm.cpp202
-rw-r--r--src/param.h5
-rwxr-xr-xtest/dev_test_suite.sh13
-rwxr-xr-xtest/lengthy_test_suite.sh54
-rwxr-xr-xtest/test_suite.sh31
7 files changed, 156 insertions, 164 deletions
diff --git a/Makefile b/Makefile
index 81367d6..dafcb38 100644
--- a/Makefile
+++ b/Makefile
@@ -144,8 +144,15 @@ slow-check: all
cd test && ./test_suite.sh | tee ../test.log
grep -q 'success rate: 100%' test.log
+lengthy-check: all
+ rm -vf test/output/*
+ cd test && ./lengthy_test_suite.sh | tee ../lengthy_test.log
+ grep -q 'success rate: 100%' lengthy_test.log
+
check: fast-check slow-check
+check-all: check lengthy-check
+
clean:
rm -vf $(SRC_DIR)/*.o
rm -vf $(SRC_DIR)/*~
diff --git a/src/lm.cpp b/src/lm.cpp
index 83558e4..0c2a2bb 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -362,7 +362,7 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Store summary data.
- SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0 };
sumStat.push_back(SNPs);
}
cout << endl;
@@ -587,7 +587,7 @@ void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) {
time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Store summary data.
- SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0};
sumStat.push_back(SNPs);
}
cout << endl;
@@ -702,7 +702,7 @@ void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Store summary data.
- SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0};
sumStat.push_back(SNPs);
}
cout << endl;
@@ -844,7 +844,7 @@ void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) {
p_lrt, p_score);
// store summary data
- SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0};
sumStat.push_back(SNPs);
time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 37f2f5b..e2f23a2 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -95,6 +95,7 @@ void LMM::CopyToParam(PARAM &cPar) {
}
void LMM::WriteFiles() {
+
string file_str;
file_str = path_out + "/" + file_out;
file_str += ".assoc.txt";
@@ -105,150 +106,99 @@ void LMM::WriteFiles() {
return;
}
- if (!file_gene.empty()) {
- outfile << "geneID"
- << "\t";
-
- if (a_mode == 1) {
- outfile << "beta"
- << "\t"
- << "se"
- << "\t"
- << "l_remle"
- << "\t"
+ auto common_header = [&] () {
+ outfile << "beta" << "\t"
+ << "se" << "\t";
+
+ outfile << "logl_H1" << "\t"; // we may make this an option
+
+ switch(a_mode) {
+ case 1:
+ outfile << "l_remle" << "\t"
<< "p_wald" << endl;
- } else if (a_mode == 2) {
- outfile << "l_mle"
- << "\t"
+ break;
+ case 2:
+ outfile << "l_mle" << "\t"
<< "p_lrt" << endl;
- } else if (a_mode == 3) {
- outfile << "beta"
- << "\t"
- << "se"
- << "\t"
+ break;
+ case 3:
+ outfile << "p_score" << endl;
+ break;
+ case 4:
+ outfile << "l_remle" << "\t"
+ << "l_mle" << "\t"
+ << "p_wald" << "\t"
+ << "p_lrt" << "\t"
<< "p_score" << endl;
- } else if (a_mode == 4) {
- outfile << "beta"
- << "\t"
- << "se"
- << "\t"
- << "l_remle"
- << "\t"
- << "l_mle"
- << "\t"
- << "p_wald"
- << "\t"
- << "p_lrt"
- << "\t"
- << "p_score" << endl;
- } else {
+ break;
}
+ };
+
+ auto sumstats = [&] (SUMSTAT st) {
+ outfile << scientific << setprecision(6) << st.beta << "\t"
+ << st.se << "\t";
+
+ outfile << st.logl_H1 << "\t";
+
+ switch(a_mode) {
+ case 1:
+ outfile << st.lambda_remle << "\t"
+ << st.p_wald << endl;
+ break;
+ case 2:
+ outfile << st.lambda_mle << "\t"
+ << st.p_lrt << endl;
+ break;
+ case 3:
+ outfile << st.p_score << endl;
+ break;
+ case 4:
+ outfile << st.lambda_remle << "\t"
+ << st.lambda_mle << "\t"
+ << st.p_wald << "\t"
+ << st.p_lrt << "\t"
+ << st.p_score << endl;
+ break;
+ }
+ };
+
+
+ if (!file_gene.empty()) {
+ outfile << "geneID" << "\t";
+
+ common_header();
for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) {
outfile << snpInfo[t].rs_number << "\t";
-
- if (a_mode == 1) {
- outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
- << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
- << sumStat[t].p_wald << endl;
- } else if (a_mode == 2) {
- outfile << scientific << setprecision(6) << sumStat[t].lambda_mle
- << "\t" << sumStat[t].p_lrt << endl;
- } else if (a_mode == 3) {
- outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
- << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
- } else if (a_mode == 4) {
- outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
- << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
- << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t"
- << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
- } else {
- }
+ sumstats(sumStat[t]);
}
} else {
bool process_gwasnps = setGWASnps.size();
- outfile << "chr"
- << "\t"
- << "rs"
- << "\t"
- << "ps"
- << "\t"
- << "n_miss"
- << "\t"
- << "allele1"
- << "\t"
- << "allele0"
- << "\t"
- << "af"
- << "\t";
-
- if (a_mode == 1) {
- outfile << "beta"
- << "\t"
- << "se"
- << "\t"
- << "l_remle"
- << "\t"
- << "p_wald" << endl;
- } else if (a_mode == 2) {
- outfile << "l_mle"
- << "\t"
- << "p_lrt" << endl;
- } else if (a_mode == 3) {
- outfile << "beta"
- << "\t"
- << "se"
- << "\t"
- << "p_score" << endl;
- } else if (a_mode == 4) {
- outfile << "beta"
- << "\t"
- << "se"
- << "\t"
- << "l_remle"
- << "\t"
- << "l_mle"
- << "\t"
- << "p_wald"
- << "\t"
- << "p_lrt"
- << "\t"
- << "p_score" << endl;
- } else {
- }
+
+ outfile << "chr" << "\t"
+ << "rs" << "\t"
+ << "ps" << "\t"
+ << "n_miss" << "\t"
+ << "allele1" << "\t"
+ << "allele0" << "\t"
+ << "af" << "\t";
+
+ common_header();
size_t t = 0;
for (size_t i = 0; i < snpInfo.size(); ++i) {
-
if (indicator_snp[i] == 0)
continue;
auto snp = snpInfo[i].rs_number;
if (process_gwasnps && setGWASnps.count(snp) == 0)
continue;
// cout << t << endl;
-
outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
<< snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
<< snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t"
<< fixed << setprecision(3) << snpInfo[i].maf << "\t";
- if (a_mode == 1) {
- outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
- << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
- << sumStat[t].p_wald << endl;
- } else if (a_mode == 2) {
- outfile << scientific << setprecision(6) << sumStat[t].lambda_mle
- << "\t" << sumStat[t].p_lrt << endl;
- } else if (a_mode == 3) {
- outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
- << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
- } else if (a_mode == 4) {
- outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
- << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
- << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t"
- << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
- } else {
- }
+ sumstats(sumStat[t]);
t++;
}
}
@@ -1299,7 +1249,7 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Store summary data.
- SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
sumStat.push_back(SNPs);
}
cout << endl;
@@ -1400,7 +1350,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
// Store summary data.
SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score};
+ p_wald, p_lrt, p_score, logl_H1};
sumStat.push_back(SNPs);
}
};
@@ -1653,7 +1603,7 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
// Store summary data.
SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score};
+ p_wald, p_lrt, p_score, logl_H1};
sumStat.push_back(SNPs);
}
}
@@ -1930,7 +1880,7 @@ void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
// Store summary data.
SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score};
+ p_wald, p_lrt, p_score, logl_H1};
sumStat.push_back(SNPs);
}
}
@@ -2411,7 +2361,7 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Store summary data.
- SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
sumStat.push_back(SNPs);
}
cout << endl;
@@ -2589,7 +2539,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Store summary data.
- SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
sumStat.push_back(SNPs);
}
cout << endl;
diff --git a/src/param.h b/src/param.h
index 08b1e10..ff279bd 100644
--- a/src/param.h
+++ b/src/param.h
@@ -56,6 +56,9 @@ public:
double p_wald; // p value from a Wald test.
double p_lrt; // p value from a likelihood ratio test.
double p_score; // p value from a score test.
+ double logl_H1; // log likelihood under the alternative
+ // hypothesis as a measure of goodness of fit,
+ // see https://github.com/genetics-statistics/GEMMA/issues/81
};
// Results for mvLMM.
@@ -118,7 +121,7 @@ public:
bool mode_debug = false;
uint issue; // enable tests for issue on github tracker
- int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
+ uint a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
vector<size_t> p_column; // Which phenotype column needs analysis.
size_t d_pace; // Display pace
diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh
index 2bd432e..37f6b28 100755
--- a/test/dev_test_suite.sh
+++ b/test/dev_test_suite.sh
@@ -31,7 +31,8 @@ testBXDStandardRelatednessMatrixK() {
assertEquals "-116.11" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
-testBXDMultivariateLinearMixedModel() {
+testBXDLMMLikelihoodRatio() {
+ outn=BXD_LMM_LR
$gemma -g ../example/BXD_geno.txt.gz \
-p ../example/BXD_pheno.txt \
-c ../example/BXD_covariates2.txt \
@@ -39,12 +40,12 @@ testBXDMultivariateLinearMixedModel() {
-k ./output/BXD.cXX.txt \
-lmm 2 -maf 0.1 \
-debug \
- -o BXD_mvlmm
+ -o $outn
assertEquals 0 $?
- outfn=output/BXD_mvlmm.assoc.txt
- assertEquals "65862" `wc -w < $outfn`
- assertEquals "3088489421.94" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ outfn=output/$outn.assoc.txt
+ assertEquals "87816" `wc -w < $outfn`
+ assertEquals "3088458212.93" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testCenteredRelatednessMatrixKLOCO1() {
@@ -79,7 +80,7 @@ testUnivariateLinearMixedModelLOCO1() {
assertEquals 0 $?
outfn=output/$outn.assoc.txt
assertEquals "68" `wc -l < $outfn`
- assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals "15465346.22" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
shunit2=`which shunit2`
diff --git a/test/lengthy_test_suite.sh b/test/lengthy_test_suite.sh
new file mode 100755
index 0000000..327b2b2
--- /dev/null
+++ b/test/lengthy_test_suite.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Long running tests go here
+
+gemma=../bin/gemma
+
+testPlinkStandardRelatednessMatrixK() {
+ testname=testPlinkStandardRelatednessMatrixK
+ datadir=../example
+ outfn=output/$testname.sXX.txt
+ rm -f $outfn
+ $gemma -bfile $datadir/HLC \
+ -gk 2 -o $testname \
+ -debug
+ assertEquals 0 $?
+ assertEquals "427" `wc -l < $outfn`
+ assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() {
+ # This test passes, but takes over 30 minutes to run!
+ # n=2 is original pheno in fam file
+ # n=1 is causal1
+ # n=3..12 is causal2
+ # n=13..22 is causal3
+ # -n 1 2 3 15 is independent
+ testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes
+ datadir=../example
+ $gemma -bfile $datadir/HLC \
+ -p $datadir/HLC.simu.pheno.txt \
+ -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \
+ -lmm 1 \
+ -maf 0.1 \
+ -n 1 2 3 15 \
+ -c $datadir/HLC_covariates.txt \
+ -debug \
+ -o $testname
+ assertEquals 0 $?
+ outfn=output/$testname.assoc.txt
+ assertEquals "223243" `wc -l < $outfn`
+ assertEquals "89754977983.69" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+shunit2=`which shunit2`
+
+if [ -x "$shunit2" ]; then
+ echo run system shunit2
+ . $shunit2
+elif [ -e ../contrib/shunit2-2.0.3/src/shell/shunit2 ]; then
+ echo run shunit2 provided in gemma repo
+ . ../contrib/shunit2-2.0.3/src/shell/shunit2
+else
+ echo "Can not find shunit2 - see INSTALL.md"
+fi
diff --git a/test/test_suite.sh b/test/test_suite.sh
index fa66b7a..350fc27 100755
--- a/test/test_suite.sh
+++ b/test/test_suite.sh
@@ -30,7 +30,7 @@ testUnivariateLinearMixedModelFullLOCO1() {
assertEquals 0 $?
outfn=output/$outn.assoc.txt
assertEquals "951" `wc -l < $outfn`
- assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals "267507851.98" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testCenteredRelatednessMatrixK() {
@@ -58,8 +58,8 @@ testUnivariateLinearMixedModel() {
grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt
assertEquals 0 $?
outfn=output/mouse_hs1940_CD8_lmm.assoc.txt
- assertEquals "118459" `wc -w < $outfn`
- assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals "129228" `wc -w < $outfn`
+ assertEquals "4038540440.86" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testMultivariateLinearMixedModel() {
@@ -105,30 +105,7 @@ testPlinkMultivariateLinearMixedModel() {
assertEquals 0 $?
outfn=output/$testname.assoc.txt
assertEquals "223243" `wc -l < $outfn`
- assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
-}
-
-testPlinkMultivariateLinearMixedModelMultiplePhenotypes_Issue58() {
- # n=2 is original pheno in fam file
- # n=1 is causal1
- # n=3..12 is causal2
- # n=13..22 is causal3
- # -n 1 2 3 15 is independent
- testname=testPlinkMultivariateLinearMixedModelMultiplePhenotypes
- datadir=../example
- $gemma -bfile $datadir/HLC \
- -p $datadir/HLC.simu.pheno.txt \
- -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \
- -lmm 1 \
- -maf 0.1 \
- -n 1 2 3 15 \
- -c $datadir/HLC_covariates.txt \
- -debug \
- -o $testname
- assertEquals 0 $?
- outfn=output/$testname.assoc.txt
- assertEquals "223243" `wc -l < $outfn`
- assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals "89757159113.77" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
shunit2=`which shunit2`