aboutsummaryrefslogtreecommitdiff
path: root/test/test_suite.sh
diff options
context:
space:
mode:
authorPjotr Prins2017-08-03 10:26:52 +0000
committerPjotr Prins2017-08-03 10:26:52 +0000
commit449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch)
tree63a4031267b10f587b695adb487aca5213889b20 /test/test_suite.sh
parentd8db988550d4cd0303f0b82a75499c2c94d97d45 (diff)
downloadpangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1. What are the use cases? 1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K 2. User passes in -snps: all these SNPs are considered for GWA and K 3. User passes in -snps and -ksnps: All these SNPs are used for GWA, Ksnps are used for K 4. User passes in -loco: SNPs are split by chromosome (GWA incl., K excl.) 5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA is subset explicitely (nyi) In all cases indicator_snp is honored and we get the most flexible way for studying SNP combinations that can be passed in in different ways. Overall added: - various comments in source code - tests in test framework inlc. fast-check - NDEBUG compilation support in the Makefile - -debug switch for GEMMA debug output - debug.h which includes enforce functions which work like assert. Unlike assert, enforce also works in release compilation - -nind switch limit the number of individuals used (trim_individuals for testing) - enforcing tests of input files - e.g. are number of individuals correct - checks for memory allocation - we should add more of those - more checks for gsl results - we should add more of those - replaced strtoken with regex as a first case. They should all be replaced. strtoken is not thread safe, for one. - introduced C++ iterators - introduced C++ closure in BimBam LMM for cached processing - more localized initialization of variables - makes for demonstratably more correct code - -ksnps adds snps into setKSnps - -gwasnps adds snps into setGWASnps - both sets are computed by -loco - attempted to make the code easier to read
Diffstat (limited to 'test/test_suite.sh')
-rwxr-xr-xtest/test_suite.sh62
1 files changed, 49 insertions, 13 deletions
diff --git a/test/test_suite.sh b/test/test_suite.sh
index 467056e..625298e 100755
--- a/test/test_suite.sh
+++ b/test/test_suite.sh
@@ -2,11 +2,43 @@
gemma=../bin/gemma
+testCenteredRelatednessMatrixKFullLOCO1() {
+ outn=mouse_hs1940_full_LOCO1
+ $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+ -p ../example/mouse_hs1940.pheno.txt \
+ -a ../example/mouse_hs1940.anno.txt \
+ -loco 1 -gk -debug -o $outn
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ outfn=output/$outn.cXX.txt
+ assertEquals 0 $?
+ assertEquals "1940" `wc -l < $outfn`
+ assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testUnivariateLinearMixedModelFullLOCO1() {
+ outn=mouse_hs1940_CD8_full_LOCO1_lmm
+ $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+ -p ../example/mouse_hs1940.pheno.txt \
+ -n 1 \
+ -loco 1 \
+ -a ../example/mouse_hs1940.anno.txt \
+ -k ./output/mouse_hs1940_full_LOCO1.cXX.txt \
+ -lmm \
+ -debug \
+ -o $outn
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ assertEquals 0 $?
+ outfn=output/$outn.assoc.txt
+ assertEquals "951" `wc -l < $outfn`
+ assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
testCenteredRelatednessMatrixK() {
$gemma -g ../example/mouse_hs1940.geno.txt.gz \
- -p ../example/mouse_hs1940.pheno.txt \
- -a ../example/mouse_hs1940.anno.txt \
- -gk -o mouse_hs1940
+ -p ../example/mouse_hs1940.pheno.txt \
+ -gk -o mouse_hs1940
assertEquals 0 $?
grep "total computation time" < output/mouse_hs1940.log.txt
assertEquals 0 $?
@@ -14,36 +46,40 @@ testCenteredRelatednessMatrixK() {
assertEquals "1940" `wc -l < $outfn`
assertEquals "3763600" `wc -w < $outfn`
assertEquals "0.335" `head -c 5 $outfn`
- assertEquals "24.9799" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+ assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testUnivariateLinearMixedModel() {
$gemma -g ../example/mouse_hs1940.geno.txt.gz \
- -p ../example/mouse_hs1940.pheno.txt -n 1 \
- -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \
- -lmm -o mouse_hs1940_CD8_lmm
+ -p ../example/mouse_hs1940.pheno.txt \
+ -n 1 \
+ -a ../example/mouse_hs1940.anno.txt \
+ -k ./output/mouse_hs1940.cXX.txt \
+ -lmm \
+ -o mouse_hs1940_CD8_lmm
assertEquals 0 $?
grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt
assertEquals 0 $?
outfn=output/mouse_hs1940_CD8_lmm.assoc.txt
assertEquals "118459" `wc -w < $outfn`
- assertEquals "92047" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+ assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testMultivariateLinearMixedModel() {
$gemma -g ../example/mouse_hs1940.geno.txt.gz \
- -p ../example/mouse_hs1940.pheno.txt -n 1 6 \
- -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \
- -lmm -o mouse_hs1940_CD8MCH_lmm
+ -p ../example/mouse_hs1940.pheno.txt \
+ -n 1 6 \
+ -a ../example/mouse_hs1940.anno.txt \
+ -k ./output/mouse_hs1940.cXX.txt \
+ -lmm -o mouse_hs1940_CD8MCH_lmm
assertEquals 0 $?
grep "total computation time" < output/mouse_hs1940_CD8MCH_lmm.log.txt
assertEquals 0 $?
outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt
assertEquals "139867" `wc -w < $outfn`
- assertEquals "92079" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+ assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
-
shunit2=`which shunit2`
if [ -x "$shunit2" ]; then