diff options
author | Pjotr Prins | 2017-08-03 10:26:52 +0000 |
---|---|---|
committer | Pjotr Prins | 2017-08-03 10:26:52 +0000 |
commit | 449d882a3b33ef81ef4f0127c3932b01fa796dbb (patch) | |
tree | 63a4031267b10f587b695adb487aca5213889b20 /test | |
parent | d8db988550d4cd0303f0b82a75499c2c94d97d45 (diff) | |
download | pangemma-449d882a3b33ef81ef4f0127c3932b01fa796dbb.tar.gz |
LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco
1 switch for LOCO of chromosome 1.
What are the use cases?
1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K
2. User passes in -snps: all these SNPs are considered for GWA and K
3. User passes in -snps and -ksnps: All these SNPs are used for GWA,
Ksnps are used for K
4. User passes in -loco: SNPs are split by chromosome (GWA incl., K
excl.)
5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA
is subset explicitely (nyi)
In all cases indicator_snp is honored and we get the most flexible way for
studying SNP combinations that can be passed in in different ways.
Overall added:
- various comments in source code
- tests in test framework inlc. fast-check
- NDEBUG compilation support in the Makefile
- -debug switch for GEMMA debug output
- debug.h which includes enforce functions which work like
assert. Unlike assert, enforce also works in release compilation
- -nind switch limit the number of individuals used
(trim_individuals for testing)
- enforcing tests of input files - e.g. are number of individuals correct
- checks for memory allocation - we should add more of those
- more checks for gsl results - we should add more of those
- replaced strtoken with regex as a first case. They should all be
replaced. strtoken is not thread safe, for one.
- introduced C++ iterators
- introduced C++ closure in BimBam LMM for cached processing
- more localized initialization of variables - makes for demonstratably
more correct code
- -ksnps adds snps into setKSnps
- -gwasnps adds snps into setGWASnps
- both sets are computed by -loco
- attempted to make the code easier to read
Diffstat (limited to 'test')
-rw-r--r-- | test/dev_test_suite.sh | 46 | ||||
-rwxr-xr-x | test/test_suite.sh | 62 |
2 files changed, 95 insertions, 13 deletions
diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh new file mode 100644 index 0000000..522cf3d --- /dev/null +++ b/test/dev_test_suite.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +gemma=../bin/gemma + +testCenteredRelatednessMatrixKLOCO1() { + outn=mouse_hs1940_LOCO1 + $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \ + -a ../example/mouse_hs1940.anno.txt -snps ../example/mouse_hs1940_snps.txt -nind 400 -loco 1 -gk -debug -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + outfn=output/$outn.cXX.txt + assertEquals 0 $? + assertEquals "400" `wc -l < $outfn` + assertEquals "0.312" `head -c 5 $outfn` + assertEquals "71.03" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testUnivariateLinearMixedModelLOCO1() { + outn=mouse_hs1940_CD8_LOCO1_lmm + $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -loco 1 \ + -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940_LOCO1.cXX.txt \ + -snps ../example/mouse_hs1940_snps.txt -lmm \ + -nind 400 \ + -debug \ + -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + assertEquals 0 $? + outfn=output/$outn.assoc.txt + assertEquals "68" `wc -l < $outfn` + assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +shunit2=`which shunit2` + +if [ -x "$shunit2" ]; then + echo run system shunit2 + . $shunit2 +elif [ -e shunit2-2.0.3/src/shell/shunit2 ]; then + echo run shunit2 provided in gemma repo + . shunit2-2.0.3/src/shell/shunit2 +else + echo "Can not find shunit2 - see INSTALL.md" +fi diff --git a/test/test_suite.sh b/test/test_suite.sh index 467056e..625298e 100755 --- a/test/test_suite.sh +++ b/test/test_suite.sh @@ -2,11 +2,43 @@ gemma=../bin/gemma +testCenteredRelatednessMatrixKFullLOCO1() { + outn=mouse_hs1940_full_LOCO1 + $gemma -g ../example/mouse_hs1940.geno.txt.gz \ + -p ../example/mouse_hs1940.pheno.txt \ + -a ../example/mouse_hs1940.anno.txt \ + -loco 1 -gk -debug -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + outfn=output/$outn.cXX.txt + assertEquals 0 $? + assertEquals "1940" `wc -l < $outfn` + assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testUnivariateLinearMixedModelFullLOCO1() { + outn=mouse_hs1940_CD8_full_LOCO1_lmm + $gemma -g ../example/mouse_hs1940.geno.txt.gz \ + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -loco 1 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940_full_LOCO1.cXX.txt \ + -lmm \ + -debug \ + -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + assertEquals 0 $? + outfn=output/$outn.assoc.txt + assertEquals "951" `wc -l < $outfn` + assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + testCenteredRelatednessMatrixK() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt \ - -a ../example/mouse_hs1940.anno.txt \ - -gk -o mouse_hs1940 + -p ../example/mouse_hs1940.pheno.txt \ + -gk -o mouse_hs1940 assertEquals 0 $? grep "total computation time" < output/mouse_hs1940.log.txt assertEquals 0 $? @@ -14,36 +46,40 @@ testCenteredRelatednessMatrixK() { assertEquals "1940" `wc -l < $outfn` assertEquals "3763600" `wc -w < $outfn` assertEquals "0.335" `head -c 5 $outfn` - assertEquals "24.9799" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testUnivariateLinearMixedModel() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt -n 1 \ - -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \ - -lmm -o mouse_hs1940_CD8_lmm + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940.cXX.txt \ + -lmm \ + -o mouse_hs1940_CD8_lmm assertEquals 0 $? grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt assertEquals 0 $? outfn=output/mouse_hs1940_CD8_lmm.assoc.txt assertEquals "118459" `wc -w < $outfn` - assertEquals "92047" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testMultivariateLinearMixedModel() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt -n 1 6 \ - -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \ - -lmm -o mouse_hs1940_CD8MCH_lmm + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 6 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940.cXX.txt \ + -lmm -o mouse_hs1940_CD8MCH_lmm assertEquals 0 $? grep "total computation time" < output/mouse_hs1940_CD8MCH_lmm.log.txt assertEquals 0 $? outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt assertEquals "139867" `wc -w < $outfn` - assertEquals "92079" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } - shunit2=`which shunit2` if [ -x "$shunit2" ]; then |