about summary refs log tree commit diff
path: root/test
diff options
context:
space:
mode:
authorPjotr Prins2017-08-03 10:26:52 +0000
committerPjotr Prins2017-08-03 10:26:52 +0000
commit449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch)
tree63a4031267b10f587b695adb487aca5213889b20 /test
parentd8db988550d4cd0303f0b82a75499c2c94d97d45 (diff)
downloadpangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1.

What are the use cases?

1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K
2. User passes in -snps: all these SNPs are considered for GWA and K
3. User passes in -snps and -ksnps: All these SNPs are used for GWA,
   Ksnps are used for K
4. User passes in -loco: SNPs are split by chromosome (GWA incl., K
   excl.)
5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA
   is subset explicitely (nyi)

In all cases indicator_snp is honored and we get the most flexible way for
studying SNP combinations that can be passed in in different ways.

Overall added:

  - various comments in source code
  - tests in test framework inlc. fast-check
  - NDEBUG compilation support in the Makefile
  - -debug switch for GEMMA debug output
  - debug.h which includes enforce functions which work like
    assert. Unlike assert, enforce also works in release compilation
  - -nind switch limit the number of individuals used
    (trim_individuals for testing)
  - enforcing tests of input files - e.g. are number of individuals correct
  - checks for memory allocation - we should add more of those
  - more checks for gsl results - we should add more of those
  - replaced strtoken with regex as a first case. They should all be
    replaced. strtoken is not thread safe, for one.
  - introduced C++ iterators
  - introduced C++ closure in BimBam LMM for cached processing
  - more localized initialization of variables - makes for demonstratably
    more correct code
  - -ksnps adds snps into setKSnps
  - -gwasnps adds snps into setGWASnps
  - both sets are computed by -loco
  - attempted to make the code easier to read
Diffstat (limited to 'test')
-rw-r--r--test/dev_test_suite.sh46
-rwxr-xr-xtest/test_suite.sh62
2 files changed, 95 insertions, 13 deletions
diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh
new file mode 100644
index 0000000..522cf3d
--- /dev/null
+++ b/test/dev_test_suite.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+gemma=../bin/gemma
+
+testCenteredRelatednessMatrixKLOCO1() {
+    outn=mouse_hs1940_LOCO1
+    $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \
+           -a ../example/mouse_hs1940.anno.txt -snps ../example/mouse_hs1940_snps.txt -nind 400 -loco 1 -gk -debug -o $outn
+    assertEquals 0 $?
+    grep "total computation time" < output/$outn.log.txt
+    outfn=output/$outn.cXX.txt
+    assertEquals 0 $?
+    assertEquals "400" `wc -l < $outfn`
+    assertEquals "0.312" `head -c 5 $outfn`
+    assertEquals "71.03" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testUnivariateLinearMixedModelLOCO1() {
+    outn=mouse_hs1940_CD8_LOCO1_lmm
+    $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \
+	   -n 1 \
+	   -loco 1 \
+           -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940_LOCO1.cXX.txt \
+	   -snps ../example/mouse_hs1940_snps.txt -lmm \
+	   -nind 400 \
+	   -debug \
+           -o $outn
+    assertEquals 0 $?
+    grep "total computation time" < output/$outn.log.txt
+    assertEquals 0 $?
+    outfn=output/$outn.assoc.txt
+    assertEquals "68" `wc -l < $outfn`
+    assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+shunit2=`which shunit2`
+
+if [ -x "$shunit2" ]; then
+    echo run system shunit2
+    . $shunit2
+elif [ -e shunit2-2.0.3/src/shell/shunit2 ]; then
+    echo run shunit2 provided in gemma repo
+    . shunit2-2.0.3/src/shell/shunit2
+else
+    echo "Can not find shunit2 - see INSTALL.md"
+fi
diff --git a/test/test_suite.sh b/test/test_suite.sh
index 467056e..625298e 100755
--- a/test/test_suite.sh
+++ b/test/test_suite.sh
@@ -2,11 +2,43 @@
 
 gemma=../bin/gemma
 
+testCenteredRelatednessMatrixKFullLOCO1() {
+    outn=mouse_hs1940_full_LOCO1
+    $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+           -p ../example/mouse_hs1940.pheno.txt \
+           -a ../example/mouse_hs1940.anno.txt \
+           -loco 1 -gk -debug -o $outn
+    assertEquals 0 $?
+    grep "total computation time" < output/$outn.log.txt
+    outfn=output/$outn.cXX.txt
+    assertEquals 0 $?
+    assertEquals "1940" `wc -l < $outfn`
+    assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testUnivariateLinearMixedModelFullLOCO1() {
+    outn=mouse_hs1940_CD8_full_LOCO1_lmm
+    $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+           -p ../example/mouse_hs1940.pheno.txt \
+	   -n 1 \
+	   -loco 1 \
+           -a ../example/mouse_hs1940.anno.txt \
+           -k ./output/mouse_hs1940_full_LOCO1.cXX.txt \
+	   -lmm \
+	   -debug \
+           -o $outn
+    assertEquals 0 $?
+    grep "total computation time" < output/$outn.log.txt
+    assertEquals 0 $?
+    outfn=output/$outn.assoc.txt
+    assertEquals "951" `wc -l < $outfn`
+    assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
 testCenteredRelatednessMatrixK() {
     $gemma -g ../example/mouse_hs1940.geno.txt.gz \
-      -p ../example/mouse_hs1940.pheno.txt \
-      -a ../example/mouse_hs1940.anno.txt \
-      -gk -o mouse_hs1940
+           -p ../example/mouse_hs1940.pheno.txt \
+           -gk -o mouse_hs1940
     assertEquals 0 $?
     grep "total computation time" < output/mouse_hs1940.log.txt
     assertEquals 0 $?
@@ -14,36 +46,40 @@ testCenteredRelatednessMatrixK() {
     assertEquals "1940" `wc -l < $outfn`
     assertEquals "3763600" `wc -w < $outfn`
     assertEquals "0.335" `head -c 5 $outfn`
-    assertEquals "24.9799" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+    assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
 testUnivariateLinearMixedModel() {
     $gemma -g ../example/mouse_hs1940.geno.txt.gz \
-      -p ../example/mouse_hs1940.pheno.txt -n 1 \
-      -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \
-      -lmm -o mouse_hs1940_CD8_lmm
+           -p ../example/mouse_hs1940.pheno.txt \
+           -n 1 \
+           -a ../example/mouse_hs1940.anno.txt \
+           -k ./output/mouse_hs1940.cXX.txt \
+           -lmm \
+           -o mouse_hs1940_CD8_lmm
     assertEquals 0 $?
     grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt
     assertEquals 0 $?
     outfn=output/mouse_hs1940_CD8_lmm.assoc.txt
     assertEquals "118459" `wc -w < $outfn`
-    assertEquals "92047" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+    assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
 testMultivariateLinearMixedModel() {
     $gemma -g ../example/mouse_hs1940.geno.txt.gz \
-      -p ../example/mouse_hs1940.pheno.txt -n 1 6 \
-      -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \
-      -lmm -o mouse_hs1940_CD8MCH_lmm
+           -p ../example/mouse_hs1940.pheno.txt \
+           -n 1 6 \
+           -a ../example/mouse_hs1940.anno.txt \
+           -k ./output/mouse_hs1940.cXX.txt \
+           -lmm -o mouse_hs1940_CD8MCH_lmm
     assertEquals 0 $?
     grep "total computation time" < output/mouse_hs1940_CD8MCH_lmm.log.txt
     assertEquals 0 $?
 
     outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt
     assertEquals "139867" `wc -w < $outfn`
-    assertEquals "92079" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn`
+    assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
-
 shunit2=`which shunit2`
 
 if [ -x "$shunit2" ]; then