From 449d882a3b33ef81ef4f0127c3932b01fa796dbb Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 3 Aug 2017 10:26:52 +0000 Subject: LOCO is implemented in GEMMA for the BIMBAM format. Pass in the -loco 1 switch for LOCO of chromosome 1. What are the use cases? 1. User runs vanilla GEMMA: all SNPs are considered input for GWA and K 2. User passes in -snps: all these SNPs are considered for GWA and K 3. User passes in -snps and -ksnps: All these SNPs are used for GWA, Ksnps are used for K 4. User passes in -loco: SNPs are split by chromosome (GWA incl., K excl.) 5. User passes in -snps, -gwasnps and -ksnps could mean that also GWA is subset explicitely (nyi) In all cases indicator_snp is honored and we get the most flexible way for studying SNP combinations that can be passed in in different ways. Overall added: - various comments in source code - tests in test framework inlc. fast-check - NDEBUG compilation support in the Makefile - -debug switch for GEMMA debug output - debug.h which includes enforce functions which work like assert. Unlike assert, enforce also works in release compilation - -nind switch limit the number of individuals used (trim_individuals for testing) - enforcing tests of input files - e.g. are number of individuals correct - checks for memory allocation - we should add more of those - more checks for gsl results - we should add more of those - replaced strtoken with regex as a first case. They should all be replaced. strtoken is not thread safe, for one. - introduced C++ iterators - introduced C++ closure in BimBam LMM for cached processing - more localized initialization of variables - makes for demonstratably more correct code - -ksnps adds snps into setKSnps - -gwasnps adds snps into setGWASnps - both sets are computed by -loco - attempted to make the code easier to read --- test/dev_test_suite.sh | 46 +++++++++++++++++++++++++++++++++++++ test/test_suite.sh | 62 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 95 insertions(+), 13 deletions(-) create mode 100644 test/dev_test_suite.sh (limited to 'test') diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh new file mode 100644 index 0000000..522cf3d --- /dev/null +++ b/test/dev_test_suite.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +gemma=../bin/gemma + +testCenteredRelatednessMatrixKLOCO1() { + outn=mouse_hs1940_LOCO1 + $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \ + -a ../example/mouse_hs1940.anno.txt -snps ../example/mouse_hs1940_snps.txt -nind 400 -loco 1 -gk -debug -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + outfn=output/$outn.cXX.txt + assertEquals 0 $? + assertEquals "400" `wc -l < $outfn` + assertEquals "0.312" `head -c 5 $outfn` + assertEquals "71.03" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testUnivariateLinearMixedModelLOCO1() { + outn=mouse_hs1940_CD8_LOCO1_lmm + $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -loco 1 \ + -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940_LOCO1.cXX.txt \ + -snps ../example/mouse_hs1940_snps.txt -lmm \ + -nind 400 \ + -debug \ + -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + assertEquals 0 $? + outfn=output/$outn.assoc.txt + assertEquals "68" `wc -l < $outfn` + assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +shunit2=`which shunit2` + +if [ -x "$shunit2" ]; then + echo run system shunit2 + . $shunit2 +elif [ -e shunit2-2.0.3/src/shell/shunit2 ]; then + echo run shunit2 provided in gemma repo + . shunit2-2.0.3/src/shell/shunit2 +else + echo "Can not find shunit2 - see INSTALL.md" +fi diff --git a/test/test_suite.sh b/test/test_suite.sh index 467056e..625298e 100755 --- a/test/test_suite.sh +++ b/test/test_suite.sh @@ -2,11 +2,43 @@ gemma=../bin/gemma +testCenteredRelatednessMatrixKFullLOCO1() { + outn=mouse_hs1940_full_LOCO1 + $gemma -g ../example/mouse_hs1940.geno.txt.gz \ + -p ../example/mouse_hs1940.pheno.txt \ + -a ../example/mouse_hs1940.anno.txt \ + -loco 1 -gk -debug -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + outfn=output/$outn.cXX.txt + assertEquals 0 $? + assertEquals "1940" `wc -l < $outfn` + assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testUnivariateLinearMixedModelFullLOCO1() { + outn=mouse_hs1940_CD8_full_LOCO1_lmm + $gemma -g ../example/mouse_hs1940.geno.txt.gz \ + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -loco 1 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940_full_LOCO1.cXX.txt \ + -lmm \ + -debug \ + -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + assertEquals 0 $? + outfn=output/$outn.assoc.txt + assertEquals "951" `wc -l < $outfn` + assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + testCenteredRelatednessMatrixK() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt \ - -a ../example/mouse_hs1940.anno.txt \ - -gk -o mouse_hs1940 + -p ../example/mouse_hs1940.pheno.txt \ + -gk -o mouse_hs1940 assertEquals 0 $? grep "total computation time" < output/mouse_hs1940.log.txt assertEquals 0 $? @@ -14,36 +46,40 @@ testCenteredRelatednessMatrixK() { assertEquals "1940" `wc -l < $outfn` assertEquals "3763600" `wc -w < $outfn` assertEquals "0.335" `head -c 5 $outfn` - assertEquals "24.9799" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testUnivariateLinearMixedModel() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt -n 1 \ - -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \ - -lmm -o mouse_hs1940_CD8_lmm + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940.cXX.txt \ + -lmm \ + -o mouse_hs1940_CD8_lmm assertEquals 0 $? grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt assertEquals 0 $? outfn=output/mouse_hs1940_CD8_lmm.assoc.txt assertEquals "118459" `wc -w < $outfn` - assertEquals "92047" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testMultivariateLinearMixedModel() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt -n 1 6 \ - -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \ - -lmm -o mouse_hs1940_CD8MCH_lmm + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 6 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940.cXX.txt \ + -lmm -o mouse_hs1940_CD8MCH_lmm assertEquals 0 $? grep "total computation time" < output/mouse_hs1940_CD8MCH_lmm.log.txt assertEquals 0 $? outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt assertEquals "139867" `wc -w < $outfn` - assertEquals "92079" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } - shunit2=`which shunit2` if [ -x "$shunit2" ]; then -- cgit v1.2.3