aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorPjotr Prins2017-08-03 10:26:52 +0000
committerPjotr Prins2017-08-03 10:26:52 +0000
commit449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch)
tree63a4031267b10f587b695adb487aca5213889b20 /test
parentd8db988550d4cd0303f0b82a75499c2c94d97d45 (diff)
downloadpangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1. What are the use cases? 1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K 2. User passes in -snps: all these SNPs are considered for GWA and K 3. User passes in -snps and -ksnps: All these SNPs are used for GWA, Ksnps are used for K 4. User passes in -loco: SNPs are split by chromosome (GWA incl., K excl.) 5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA is subset explicitely (nyi) In all cases indicator_snp is honored and we get the most flexible way for studying SNP combinations that can be passed in in different ways. Overall added: - various comments in source code - tests in test framework inlc. fast-check - NDEBUG compilation support in the Makefile - -debug switch for GEMMA debug output - debug.h which includes enforce functions which work like assert. Unlike assert, enforce also works in release compilation - -nind switch limit the number of individuals used (trim_individuals for testing) - enforcing tests of input files - e.g. are number of individuals correct - checks for memory allocation - we should add more of those - more checks for gsl results - we should add more of those - replaced strtoken with regex as a first case. They should all be replaced. strtoken is not thread safe, for one. - introduced C++ iterators - introduced C++ closure in BimBam LMM for cached processing - more localized initialization of variables - makes for demonstratably more correct code - -ksnps adds snps into setKSnps - -gwasnps adds snps into setGWASnps - both sets are computed by -loco - attempted to make the code easier to read
Diffstat (limited to 'test')
-rw-r--r--test/dev_test_suite.sh46
-rwxr-xr-xtest/test_suite.sh62
2 files changed, 95 insertions, 13 deletions
diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh
new file mode 100644
index 0000000..522cf3d
--- /dev/null
+++ b/test/dev_test_suite.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+gemma=../bin/gemma
+
+testCenteredRelatednessMatrixKLOCO1() {
+ outn=mouse_hs1940_LOCO1
+ $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \
+ -a ../example/mouse_hs1940.anno.txt -snps ../example/mouse_hs1940_snps.txt -nind 400 -loco 1 -gk -debug -o $outn
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ outfn=output/$outn.cXX.txt
+ assertEquals 0 $?
+ assertEquals "400" `wc -l < $outfn`
+ assertEquals "0.312" `head -c 5 $outfn`
+ assertEquals "71.03" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testUnivariateLinearMixedModelLOCO1() {
+ outn=mouse_hs1940_CD8_LOCO1_lmm
+ $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \
+ -n 1 \
+ -loco 1 \
+ -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940_LOCO1.cXX.txt \
+ -snps ../example/mouse_hs1940_snps.txt -lmm \
+ -nind 400 \
+ -debug \
+ -o $outn
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ assertEquals 0 $?
+ outfn=output/$outn.assoc.txt
+ assertEquals "68" `wc -l < $outfn`
+ assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+shunit2=`which shunit2`
+
+if [ -x "$shunit2" ]; then
+ echo run system shunit2
+ . $shunit2
+elif [ -e shunit2-2.0.3/src/shell/shunit2 ]; then
+ echo run shunit2 provided in gemma repo
+ . shunit2-2.0.3/src/shell/shunit2
+else
+ echo "Can not find shunit2 - see INSTALL.md"
+fi
diff --git a/test/test_suite.sh b/test/test_suite.sh
index 467056e..625298e 100755
--- a/test/test_suite.sh
+++ b/test/test_suite.sh
@@ -2,11 +2,43 @@
gemma=../bin/gemma
+testCenteredRelatednessMatrixKFullLOCO1() {
+ outn=mouse_hs1940_full_LOCO1
+ $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+ -p ../example/mouse_hs1940.pheno.txt \
+ -a ../example/mouse_hs1940.anno.txt \
+ -loco 1 -gk -debug -o $outn
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ outfn=output/$outn.cXX.txt
+ assertEquals 0 $?
+ assertEquals "1940" `wc -l < $outfn`
+ assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testUnivariateLinearMixedModelFullLOCO1() {
+ outn=mouse_hs1940_CD8_full_LOCO1_lmm
+ $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+ -p ../example/mouse_hs1940.pheno.txt \
+ -n 1 \
+ -loco 1 \
+ -a ../example/mouse_hs1940.anno.txt \
+ -k ./output/mouse_hs1940_full_LOCO1.cXX.txt \
+ -lmm \
+ -debug \
+ -o $outn
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ assertEquals 0 $?
+ outfn=output/$outn.assoc.txt
+ assertEquals "951" `wc -l < $outfn`
+ assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
testCenteredRelatednessMatrixK() {
$gemma -g ../example/mouse_hs1940.geno.txt.gz \
- -p ../example/mouse_hs1940.pheno.txt \
- -a ../example/mouse_hs1940.anno.txt \
- -gk -o mouse_hs1940
+ -p ../example/mouse_hs1940.pheno.txt \
+ -gk -o mouse_hs1940
assertEquals 0 $?
grep "total computation time" < output/mouse_hs1940.log.txt
assertEquals 0 $?
@@ -14,36 +46,40 @@ testCenteredRelatednessMatrixK() {
assertEquals "1940" `wc -l < $outfn`
assertEquals "3763600" `wc -w < $outfn`
assertEquals "0.335" `head -c 5 $outfn`
- assertEquals "24.9799" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+ assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testUnivariateLinearMixedModel() {
$gemma -g ../example/mouse_hs1940.geno.txt.gz \
- -p ../example/mouse_hs1940.pheno.txt -n 1 \
- -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \
- -lmm -o mouse_hs1940_CD8_lmm
+ -p ../example/mouse_hs1940.pheno.txt \
+ -n 1 \
+ -a ../example/mouse_hs1940.anno.txt \
+ -k ./output/mouse_hs1940.cXX.txt \
+ -lmm \
+ -o mouse_hs1940_CD8_lmm
assertEquals 0 $?
grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt
assertEquals 0 $?
outfn=output/mouse_hs1940_CD8_lmm.assoc.txt
assertEquals "118459" `wc -w < $outfn`
- assertEquals "92047" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+ assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testMultivariateLinearMixedModel() {
$gemma -g ../example/mouse_hs1940.geno.txt.gz \
- -p ../example/mouse_hs1940.pheno.txt -n 1 6 \
- -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \
- -lmm -o mouse_hs1940_CD8MCH_lmm
+ -p ../example/mouse_hs1940.pheno.txt \
+ -n 1 6 \
+ -a ../example/mouse_hs1940.anno.txt \
+ -k ./output/mouse_hs1940.cXX.txt \
+ -lmm -o mouse_hs1940_CD8MCH_lmm
assertEquals 0 $?
grep "total computation time" < output/mouse_hs1940_CD8MCH_lmm.log.txt
assertEquals 0 $?
outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt
assertEquals "139867" `wc -w < $outfn`
- assertEquals "92079" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+ assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
-
shunit2=`which shunit2`
if [ -x "$shunit2" ]; then