diff options
-rw-r--r-- | INSTALL.md | 32 | ||||
-rw-r--r-- | Makefile | 17 | ||||
-rw-r--r-- | README.md | 10 | ||||
-rw-r--r-- | example/mouse_hs1940_snps.txt | 1000 | ||||
-rw-r--r-- | example/mouse_hs1940_snps_anno.txt | 1000 | ||||
-rw-r--r-- | src/debug.h | 46 | ||||
-rw-r--r-- | src/gemma.cpp | 49 | ||||
-rw-r--r-- | src/io.cpp | 117 | ||||
-rw-r--r-- | src/io.h | 29 | ||||
-rw-r--r-- | src/lmm.cpp | 203 | ||||
-rw-r--r-- | src/lmm.h | 6 | ||||
-rw-r--r-- | src/mvlmm.cpp | 6 | ||||
-rw-r--r-- | src/param.cpp | 146 | ||||
-rw-r--r-- | src/param.h | 30 | ||||
-rw-r--r-- | src/vc.h | 2 | ||||
-rw-r--r-- | test/dev_test_suite.sh | 46 | ||||
-rwxr-xr-x | test/test_suite.sh | 62 |
17 files changed, 2563 insertions, 238 deletions
@@ -19,6 +19,8 @@ dependencies: * [Eigen library](http://eigen.tuxfamily.org/dox/) * zlib +See below for installation on Guix. + ## Install GEMMA ### Bioconda @@ -38,31 +40,41 @@ using the following command guix package -i gemma +To install the build tools with GNU Guix + + guix package -i make gcc linux-libre-headers gsl eigen openblas lapack glibc ld-wrapper + ### Install from source Install listed dependencies and run - make + make -j 4 + +(the -j switch builds on 4 cores). if you get an Eigen error you may need to override the include -path. E.g. on GNU Guix with shared libs this may work +path. E.g. on GNU Guix with shared libs and DEBUG the following may +work - make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 FORCE_DYNAMIC=1 WITH_OPENBLAS=1 + make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 FORCE_DYNAMIC=1 WITH_OPENBLAS=1 DEBUG=1 to run GEMMA tests - make check + time make check -## Run tests +You can run gemma in the debugger with, for example -GEMMA uses the shunit2 test framework (version 2.0) and can be found -[here](https://github.com/genenetwork/shunit2) + gdb --args \ + ./bin/gemma -g example/mouse_hs1940.geno.txt.gz \ + -p example/mouse_hs1940.pheno.txt -a example/mouse_hs1940.anno.txt \ + -snps example/snps.txt -nind 400 -loco 1 -gk -debug -o myoutput -In the source tree: +Other options, such as compiling with warnings, are listed in the +Makefile. - git clone https://github.com/genenetwork/shunit2 contrib/shunit2 +## Run tests -and run +GEMMA includes the shunit2 test framework (version 2.0). make check @@ -30,7 +30,13 @@ SRC_DIR = ./src CPP = g++ -CPPFLAGS = -O3 -std=gnu++11 -isystem/$(EIGEN_INCLUDE_PATH) +ifdef DEBUG + # development mode + CPPFLAGS = -g -O3 -std=gnu++11 -isystem/$(EIGEN_INCLUDE_PATH) +else + # release + CPPFLAGS = -DNDEBUG -O3 -std=gnu++11 -isystem/$(EIGEN_INCLUDE_PATH) +endif ifdef SHOW_COMPILER_WARNINGS CPPFLAGS += -Wall @@ -106,12 +112,19 @@ $(OBJS) : $(HDR) $(CPP) $(CPPFLAGS) $(HEADERS) -c $*.cpp -o $*.o .SUFFIXES : .cpp .c .o $(SUFFIXES) -check: all +fast-check: all + cd test && ./dev_test_suite.sh | tee ../dev_test.log + grep -q 'success rate: 100%' dev_test.log + +slow-check: all cd test && ./test_suite.sh | tee ../test.log grep -q 'success rate: 100%' test.log +check: fast-check slow-check + clean: rm -rf ${SRC_DIR}/*.o ${SRC_DIR}/*~ *~ $(OUTPUT) + rm test/output/* DIST_COMMON = COPYING.txt README.txt Makefile DIST_SUBDIRS = src doc example bin @@ -191,16 +191,6 @@ last tested using Eigen version 3.3.3.) More information on source code, dependencies and installation can be found in [INSTALL.md](INSTALL.md). -## Contributing - -GEMMA uses the [LLVM style](http://llvm.org/docs/CodingStandards.html) -for C++ code. You should set your editor to replace tabs with spaces -and 2 spaces of indentation. Use - - clang-format -style=LLVM -i *.cpp *.h - -to validate the coding style. - ## Credits The *GEMMA* software was developed by: diff --git a/example/mouse_hs1940_snps.txt b/example/mouse_hs1940_snps.txt new file mode 100644 index 0000000..9f20aa0 --- /dev/null +++ b/example/mouse_hs1940_snps.txt @@ -0,0 +1,1000 @@ +rs3668922 +rs13480515 +rs13483034 +rs4184231 +rs3687346 +gnf10.000.619 +rs6258088 +rs3657154 +rs13459168 +rs3699245 +rs3692731 +rs4184702 +rs3716165 +rs3692165 +rs3690160 +rs3696476 +rs13478445 +CEL-7_36442524 +mCV25307043 +rs8265673 +rs6395984 +rs13476864 +rs13466711 +rs13478700 +rs13481609 +rs3718784 +rs4197715 +rs3680765 +rs6355837 +rs6245242 +rs3703111 +rs13482477 +UT_4_58.126445 +rs13483569 +rs3656008 +rs3659698 +rs3658914 +rs3721822 +gnf06.123.229 +rs3684585 +rs13483591 +rs13479927 +rs4158165 +rs3685284 +rs13476676 +rs13478825 +rs6321462 +rs13478553 +CEL-9_99044286 +rs13478287 +rs3680026 +rs13476066 +rs3679483 +rs13479172 +rs3698182 +rs13481568 +rs13478909 +rs3154953 +rs13481793 +rs13481589 +rs3694887 +rs13481316 +rs13479907 +rs13476892 +rs3700120 +rs13477451 +rs3701734 +rs13482363 +rs6272774 +rs6292345 +rs3710221 +rs13481431 +rs3653795 +rs3702122 +mCV23214561 +rs6204487 +rs13477462 +rs6211405 +rs3089148 +rs13478676 +rs6296891 +rs3705582 +gnf03.160.792 +CEL-15_28995144 +mCV24786469 +rs13482036 +rs13477228 +rs13475804 +rs3706761 +rs3663988 +rs3676096 +gnf11.093.105 +rs13479257 +rs13481511 +rs6292169 +rs13476966 +rs13477985 +rs13479540 +petM-05233-3 +mCV23486224 +rs13480036 +rs6328745 +rs3699688 +rs3695383 +gnf17.082.284 +rs6184991 +rs3714811 +rs13476765 +rs13475886 +rs13459166 +rs3701693 +mCV24198179 +rs3670738 +rs13479520 +rs4191367 +rs13477553 +mCV23994528 +rs13483087 +rs3673599 +rs3674856 +rs3711721 +rs6241342 +rs6223687 +rs3703161 +rs3684732 +rs13482419 +rs6354478 +rs13483066 +rs13477048 +rs13480180 +rs6226936 +rs13482579 +rs13481764 +rs3674369 +rs13480758 +rs6340166 +rs3723670 +gnf17.042.585 +rs6313528 +rs13477335 +CEL-7_116160192 +rs4172338 +rs6157345 +rs13476963 +rs3722255 +rs3696823 +rs3689263 +rs3090818 +rs3663068 +rs3713432 +rs13481961 +rs13479091 +rs13481583 +gnf08.055.205 +rs13478679 +rs6409039 +rs13476714 +rs3654545 +rs3703836 +rs13481588 +rs6185483 +rs3663873 +rs3023339 +rs6190892 +rs3706317 +CEL-3_59059215 +rs3664897 +rs13478971 +rs3691210 +rs6315432 +rs13479676 +rs3721068 +rs13482474 +rs4223211 +rs3023058 +rs13482928 +rs13483639 +rs4139897 +rs3726927 +rs3713702 +rs13459134 +rs13481751 +rs13481640 +rs13481804 +rs3714819 +CEL-5_45716874 +UT_5_103.462256 +rs6153168 +rs13479887 +rs4140148 +rs6159786 +rs13480315 +rs13480653 +rs3694708 +rs13478048 +rs3692387 +rs3703247 +rs13477401 +rs4228112 +rs13475757 +rs3721056 +rs4222732 +UT_6_136.421356 +rs13477902 +rs13461280 +rs3676884 +rs3721457 +rs3722149 +rs3657482 +rs13476475 +rs3695693 +rs3700085 +rs13476804 +rs3706023 +rs13479825 +rs6317800 +gnf17.083.842 +rs13483606 +rs6380191 +rs13478677 +rs6301349 +rs3710581 +rs4174474 +gnf05.084.686 +rs6314716 +rs4137234 +UT_8_130.396331 +rs13475883 +mCV23508321 +rs13481021 +rs4222215 +CEL-15_15740145 +rs3674193 +rs6328956 +gnf05.124.749 +UT_11_75.985289 +rs6276149 +rs6299927 +mCV24107897 +rs13475722 +rs3674066 +rs3695308 +rs13483207 +rs3686912 +rs13478738 +rs8257588 +rs13480275 +rs3705458 +rs13476378 +rs13475963 +rs13478983 +rs13481480 +rs6209043 +rs13477130 +rs6409040 +rs13483178 +rs4224501 +rs13480194 +rs3714056 +rs3705396 +rs13477083 +rs4139262 +mCV23822994 +rs13475774 +rs13482109 +CEL-7_41997914 +CEL-10_74277238 +rs3022934 +CEL-6_84629314 +rs4228452 +rs13480988 +UT_8_72.075442 +rs6201966 +rs8244157 +rs3663375 +rs6353774 +rs6185261 +rs3713696 +gnf02.026.314 +rs6343757 +rs13477378 +rs3715857 +rs13478798 +CEL-5_56325748 +rs13480659 +rs13478839 +rs6187389 +rs4136879 +rs6281838 +rs13476304 +gnf14.103.048 +UT_3_105.614392 +rs13480601 +rs13483549 +rs6223790 +rs6298808 +rs6263067 +rs3669513 +rs6260116 +rs3664869 +rs13482084 +gnf10.082.230 +rs13483020 +rs3660486 +rs3721276 +CEL-5_117374791 +rs3708633 +gnf01.059.986 +rs13483358 +mCV25009162 +rs3694629 +rs6186560 +rs3710202 +rs13479768 +rs13481308 +rs3672124 +rs13482714 +rs3683699 +rs4226572 +rs13479694 +rs6224522 +rs13481041 +rs3674364 +rs13477030 +rs13480845 +rs8246989 +rs6380710 +UT_9_82.824657 +rs6238322 +rs6353097 +gnf01.126.387 +rs4174329 +rs3695075 +rs6262666 +rs13476186 +mCV24130963 +rs4139548 +rs4226870 +rs13476750 +mCV22528760 +rs3702484 +rs8252588 +rs13477692 +UT_2_129.88318 +rs13482668 +rs13459184 +rs3700313 +rs6371982 +rs3700592 +rs4211364 +rs6229068 +rs3667772 +rs13480991 +rs13482482 +rs3689073 +rs13482731 +rs3698196 +mCV23893269 +rs13482306 +gnf10.004.839 +rs6241299 +rs13483217 +mCV24217147 +rs3723643 +rs13483476 +rs6328018 +rs13477019 +rs3688968 +CEL-14_90299389 +rs3676394 +rs4222922 +rs13476081 +rs13481650 +rs13479188 +rs3707749 +rs3655334 +rs3676100 +rs13477594 +rs3680100 +rs3726406 +rs3663355 +rs13483149 +rs3668028 +rs13477790 +rs13478413 +rs13479197 +petM-02162-1 +mCV24423497 +rs3705394 +CEL-15_9687257 +rs3679120 +rs6241653 +rs13477092 +mCV24338392 +CEL-7_64160024 +rs3709567 +rs13476067 +rs3670749 +rs6361711 +UT_13_4.391534 +rs6245643 +rs6212085 +rs3706792 +rs3703728 +rs4185336 +rs13477717 +rs3718386 +mhcCD8a2 +rs3676101 +rs13480536 +rs8253516 +rs3689218 +rs13476950 +rs3698001 +rs13481791 +rs3691938 +rs3685822 +rs3658897 +rs3687275 +UT_15_87.526763 +mCV23209429 +rs6230167 +rs8236770 +rs3694989 +rs13480358 +rs13476077 +rs3023025 +rs13477904 +CEL-7_43463457 +rs13477072 +rs13478750 +UT_17_25.804649 +rs6209951 +rs6162256 +rs6305232 +rs13480400 +rs3681670 +rs3675388 +rs4219590 +rs3671174 +rs6239372 +rs4197422 +rs13475872 +rs6261913 +CEL-15_74419628 +rs6413270 +rs3683507 +rs3673976 +rs6403494 +rs13477493 +rs6408534 +mCV23717969 +CEL-6_8231384 +CEL-4_74066970 +rs3677115 +rs3696518 +rs13478995 +rs13477188 +rs6250834 +rs3682333 +rs3714102 +rs6220667 +rs3711084 +rs3023233 +rs3726250 +rs3705677 +rs6303064 +rs13479287 +CEL-1_140926026 +rs4165029 +rs6353593 +rs13475789 +rs3684964 +rs3021887 +rs4227074 +rs3688343 +CEL-19_34542259 +CEL-4_118427459 +rs13480907 +rs3682128 +rs3654569 +rs3708061 +rs13480114 +rs13482620 +rs3702451 +rs8254782 +rs4229363 +rs3702220 +rs3662056 +rs3661718 +gnf10.122.050 +CEL-17_91044280 +rs13477457 +rs13482013 +rs13480721 +rs13483472 +rs3677807 +rs3688854 +gnf13.008.720 +rs13478015 +rs6281562 +rs13482331 +rs3700038 +rs13459084 +rs3672425 +rs3662930 +CEL-12_87311152 +rs3662436 +rs3708442 +rs3673457 +rs6225875 +rs3709561 +mCV22965443 +gnf05.136.579 +rs3694949 +mCV23446189 +rs13480443 +rs3685643 +rs6379062 +rs13476062 +UT_15_101.086192 +rs13459146 +rs13477775 +rs6255802 +gnf10.130.258 +rs3658044 +rs3721803 +rs4227587 +rs3676483 +mCV22625013 +rs13482065 +rs3698600 +rs13481410 +rs3727022 +gnf08.031.589 +rs6263485 +rs6188665 +rs13483016 +rs3722112 +rs6355445 +rs6217010 +rs3675669 +rs3703712 +rs13476989 +rs6217275 +rs3680632 +rs3667748 +rs13480251 +UT_6_5.754097 +rs6196597 +rs3678364 +rs6342608 +rs3695107 +rs6370750 +rs13476128 +rs3654251 +rs3672987 +gnf13.057.762 +rs13481985 +rs3655885 +rs13481830 +rs3665127 +rs3709285 +rs3695382 +gnf01.148.750 +rs13482195 +rs13483212 +rs13478952 +rs13476291 +rs13475727 +rs3695266 +rs3656875 +rs3697020 +rs4172185 +mCV22595554 +rs13481855 +UT_17_75.437568 +rs3660588 +rs3687764 +rs6366840 +rs3023981 +mCV24436061 +rs13481070 +rs3655057 +gnf03.067.696 +rs6185468 +rs13479880 +rs4197693 +mCV23045722 +gnf07.129.013 +rs6170261 +gnf10.009.098 +rs13479952 +rs3684103 +rs3688141 +rs3688207 +rs3696331 +rs3149842 +rs13477114 +rs4137908 +rs13478404 +rs3670168 +rs6318455 +gnf03.072.147 +rs13483558 +rs13479329 +rs13481462 +rs13481061 +gnf08.026.892 +rs13483222 +rs13479624 +rs3712524 +rs3667255 +rs6393943 +rs13477494 +rs3700012 +rs6390142 +rs3724616 +rs4227609 +mCV23575714 +rs6364065 +rs8275764 +rs13480454 +rs13476466 +rs4165252 +rs3677121 +rs13482870 +rs6163209 +UT_3_34.779033 +rs6160457 +rs13476793 +rs13479140 +rs4223701 +rs13478053 +rs13478178 +mCV23851630 +rs4219905 +gnf12.013.284 +rs13478398 +gnf14.013.298 +rs6374597 +rs3023265 +rs13475785 +mCV24528954 +rs6187777 +rs6208887 +gnf08.098.331 +rs4232105 +rs3655098 +gnf03.079.138 +rs6272967 +CEL-8_7689226 +rs3671527 +rs6335368 +rs4170074 +rs3686804 +rs3716195 +rs13481456 +rs13477807 +CEL-1_101048458 +rs3666667 +rs13475964 +rs3704471 +rs13459095 +rs13483631 +rs6339313 +rs13479086 +rs13480023 +rs13477952 +rs3718641 +mCV22331476 +rs13481702 +UT_4_57.645957 +rs3659348 +rs13481893 +rs6278585 +rs13483044 +rs3703582 +rs13476705 +rs6230748 +gnf18.012.857 +rs3708749 +rs4206054 +rs13481285 +rs13477397 +rs13482112 +rs3663650 +rs4231720 +mCV24380249 +rs3723136 +rs6238153 +rs13482509 +gnf18.014.547 +rs6381624 +rs13483213 +rs3679439 +rs6306557 +CEL-9_17007263 +rs6358447 +rs13477303 +UT_11_29.443258 +rs13479979 +rs3719870 +rs13481585 +rs3719672 +rs13476880 +rs3727109 +rs3682341 +gnf19.005.316 +rs4166461 +UT_2_70.039995 +rs4223511 +rs3088491 +rs13483438 +CEL-3_137067761 +rs3675505 +CEL-4_6055375 +rs3656005 +CEL-8_94486767 +rs4152838 +rs6244900 +rs3660226 +rs4231834 +CEL-9_98918429 +UT_3_89.90208 +rs6350869 +rs13480856 +rs13478502 +gnf09.028.495 +mCV24205612 +CEL-4_78089985 +rs4165510 +rs3680197 +rs4223969 +rs13479476 +rs13482622 +CEL-7_122606289 +gnf02.161.674 +rs8255228 +rs13478733 +UT_14_10.122547 +rs4165334 +rs3709732 +rs3682937 +rs13477065 +CEL-13_22517579 +rs13475064 +rs4226048 +gnf11.098.224 +rs13481380 +CEL-5_15079682 +rs4219072 +rs13481012 +rs3702472 +rs13478051 +rs13480522 +gnf19.052.359 +rs6227698 +rs3724411 +rs3708313 +rs13480065 +rs6265656 +CEL-2_168918742 +rs3718133 +rs13481827 +rs3703241 +rs3162895 +rs13476805 +rs4173581 +rs13480451 +rs13479823 +gnf10.055.290 +rs3726588 +mCV22794972 +rs4221957 +rs3695907 +rs6357455 +rs3719497 +rs6247999 +rs13479419 +rs13478796 +rs3658433 +rs13480436 +rs3717846 +gnf12.060.469 +rs3692487 +UT_4_32.135337 +rs13478522 +rs3683834 +rs3667738 +rs13459110 +CEL-3_156007479 +rs3655717 +rs13476607 +rs13479239 +mCV23970227 +rs6284081 +rs13483579 +rs13479763 +rs13481194 +gnf14.018.589 +rs13481304 +rs13478640 +rs13478205 +rs6213741 +rs3689336 +mCV25230888 +rs3675740 +rs13478133 +gnf09.020.405 +rs13482654 +rs13482681 +rs4136200 +gnf07.061.149 +rs3696278 +rs3698831 +rs6307147 +rs3691784 +UT_2_144.017259 +rs13478215 +rs6343029 +rs13478370 +CEL-12_87170441 +rs6362861 +rs3716646 +CEL-11_121219118 +rs6315002 +rs3722681 +rs13476368 +rs3688959 +rs6320527 +rs13480629 +rs13479085 +rs3697419 +rs3696307 +rs6176848 +rs13482712 +rs6351657 +rs3706825 +rs13477845 +rs3665582 +rs13477268 +rs4179233 +rs13476530 +CEL-3_94149786 +rs13475888 +UT_17_86.377392 +rs13476581 +gnf09.016.676 +rs6328845 +rs3683092 +rs6301542 +rs3678662 +rs13479965 +rs13478449 +rs4165065 +rs3142215 +rs3152403 +rs3662946 +rs13481576 +rs3699909 +rs3695682 +rs6191112 +rs6326787 +gnf10.008.653 +rs6281696 +rs13482998 +rs3701429 +rs4165893 +rs6223813 +rs3692039 +rs6224196 +rs13477307 +rs6248743 +rs13479628 +rs3710365 +rs13482200 +rs13477979 +UT_7_56.715945 +rs13459144 +rs6296253 +rs6225523 +mCV23785265 +rs13483306 +rs13481226 +rs8274734 +rs13479795 +rs13477352 +rs6318992 +rs4161857 +rs6238771 +rs4229612 +rs3690631 +rs13478964 +rs3677539 +rs13482353 +rs13479934 +CEL-17_76502178 +rs6239023 +rs3022845 +gnf05.084.314 +rs13476095 +rs3677272 +rs13480567 +rs13478818 +rs6256981 +rs6217029 +rs3690783 +rs13482930 +rs6257357 +rs3668888 +rs13480323 +rs13483471 +rs3710549 +rs3692549 +rs6394046 +rs13480294 +rs13478852 +rs4222426 +rs13478999 +rs3710273 +rs3654283 +rs6306968 +rs6310133 +rs6195266 +rs3718631 +rs3679962 +CEL-2_63143553 +rs13480164 +rs13483209 +rs3698364 +rs3689948 +rs4160175 +mCV24089992 +rs3696018 +rs13480279 +mCV22750087 +rs4184636 +rs13478690 +rs13476231 +rs3661171 +gnf07.132.051 +CEL-1_103029662 +gnf08.108.032 +rs13476639 +rs13480232 +rs13476302 +CEL-13_25470193 +rs6234428 +rs6346661 +rs13479311 +rs3671277 +rs6167568 +rs3661657 +rs13477529 +rs13476064 +rs3724658 +gnf07.032.889 +rs3713198 +rs13478010 +CEL-11_40351206 +rs6403541 +rs6325730 +rs13479983 +rs13480162 +rs3156208 +rs13481010 +rs13479570 +rs3712722 +rs13477113 +rs3726227 +rs13481552 +rs3710055 +rs4151935 +rs13478434 +rs13459062 +rs3655898 +rs13480173 +mCV23509699 +rs13481547 +rs3714431 +gnf02.119.284 +rs3680871 +rs4173052 +rs6299531 +rs13476463 +rs13478913 +rs6317860 +rs13478963 +rs3679473 +rs13476615 +rs3661039 +rs13480015 +rs6160140 +rs13483184 +rs13476330 +gnf14.019.954 +mCV23128319 +UT_3_89.066701 +rs13479879 diff --git a/example/mouse_hs1940_snps_anno.txt b/example/mouse_hs1940_snps_anno.txt new file mode 100644 index 0000000..cfd4122 --- /dev/null +++ b/example/mouse_hs1940_snps_anno.txt @@ -0,0 +1,1000 @@ +rs3668922 111771071 13 65.0648 +rs13480515 17261714 10 4.72355 +rs13483034 53249416 17 30.175 +rs4184231 48293994 16 33.7747 +rs3687346 12815936 14 2.45302 +gnf10.000.619 NA 10 0.549798 +rs6258088 82616380 4 50.7375 +rs3657154 25351639 4 13.0781 +rs13459168 79615054 3 35.9488 +rs3699245 24926186 11 13.4081 +rs3692731 105981796 1 54.3948 +rs4184702 49429820 16 33.9562 +rs3716165 125938666 1 65.8444 +rs3692165 29620174 14 12.1522 +rs3690160 37015063 11 20.2587 +rs3696476 117118328 9 79.8486 +rs13478445 106983207 5 66.1267 +CEL-7_36442524 NA 7 26.3649 +mCV25307043 NA 5 84.4605 +rs8265673 102356580 11 70.912 +rs6395984 91070578 14 37.7522 +rs13476864 157465910 2 83.6744 +rs13466711 172135244 1 93.0543 +rs13478700 33297522 6 16.0349 +rs13481609 99263253 12 55.323 +rs3718784 25287148 11 13.5421 +rs4197715 67263247 16 44.0103 +rs3680765 49397123 7 26.522 +rs6355837 129713375 4 79.2943 +rs6245242 21478738 19 14.5837 +rs3703111 16785487 14 3.44733 +rs13482477 22519205 15 7.90254 +UT_4_58.126445 NA 4 42.0498 +rs13483569 23771627 19 18.6968 +rs3656008 43629190 17 20.1529 +rs3659698 11870131 11 5.15734 +rs3658914 130828039 3 64.9985 +rs3721822 131308159 6 75.661 +gnf06.123.229 NA 6 69.8894 +rs3684585 14560524 11 6.49166 +rs13483591 30814887 19 25.1448 +rs13479927 93955646 8 46.5649 +rs4158165 7646431 16 1.69797 +rs3685284 94188047 15 55.2232 +rs13476676 104099553 2 60.4366 +rs13478825 74688786 6 35.761 +rs6321462 43650849 4 27.5981 +rs13478553 138345042 5 91.1618 +CEL-9_99044286 NA 9 58.8985 +rs13478287 62118734 5 34.264 +rs3680026 113321137 7 60.9659 +rs13476066 114957525 1 56.8395 +rs3679483 180126256 2 107.324 +rs13479172 29340347 7 12.6872 +rs3698182 83587952 8 41.2234 +rs13481568 85150110 12 43.9983 +rs13478909 95383349 6 49.7836 +rs3154953 30187827 2 23.0095 +rs13481793 44884449 13 18.1255 +rs13481589 92176340 12 50.2606 +rs3694887 103735279 5 64.1859 +rs13481316 15555143 12 7.08188 +rs13479907 89381352 8 45.0234 +rs13476892 165396284 2 90.3273 +rs3700120 104500504 9 64.1947 +rs13477451 140714780 3 71.0701 +rs3701734 41327729 11 21.5451 +rs13482363 108252278 14 47.8535 +rs6272774 54180752 6 28.7966 +rs6292345 75301469 9 48.4366 +rs3710221 110389085 2 62.5863 +rs13481431 46801424 12 22.6927 +rs3653795 166630387 2 91.2977 +rs3702122 106416324 14 47.5675 +mCV23214561 NA 19 1.14225 +rs6204487 26131893 4 13.3102 +rs13477462 143558142 3 74.9115 +rs6211405 111717282 9 72.4166 +rs3089148 92941850 8 46.3161 +rs13478676 27655708 6 11.5581 +rs6296891 79983947 8 38.1803 +rs3705582 17603351 11 8.09978 +gnf03.160.792 NA 3 87.4855 +CEL-15_28995144 NA 15 10.5792 +mCV24786469 NA 1 112.529 +rs13482036 118384631 13 69.1715 +rs13477228 82436166 3 38.3819 +rs13475804 34929867 1 14.1453 +rs3706761 91989672 14 37.8867 +rs3663988 139319168 7 86.2784 +rs3676096 10671078 5 2.91098 +gnf11.093.105 NA 11 51.8811 +rs13479257 53778809 7 29.122 +rs13481511 68024081 12 31.0881 +rs6292169 56447533 8 28.4885 +rs13476966 8755001 3 0.445861 +rs13477985 131771792 4 80.6336 +rs13479540 133996942 7 81.1202 +petM-05233-3 NA 5 13.1446 +mCV23486224 NA 4 66.4078 +rs13480036 126607116 8 77.8966 +rs6328745 64264672 18 42.9404 +rs3699688 58957762 4 42.4062 +rs3695383 33796072 14 14.3559 +gnf17.082.284 NA 17 51.9272 +rs6184991 101697093 8 51.1608 +rs3714811 124207921 4 74.4119 +rs13476765 128474600 2 68.7832 +rs13475886 61171619 1 29.2939 +rs13459166 130541379 2 70.0231 +rs3701693 11135402 14 1.96294 +mCV24198179 NA 3 87.1922 +rs3670738 27325835 3 9.26999 +rs13479520 NA 7 73.2198 +rs4191367 59133145 16 38.4306 +rs13477553 9463709 4 3.01119 +mCV23994528 NA 1 109.078 +rs13483087 69269163 17 42.0473 +rs3673599 62806600 12 28.5686 +rs3674856 111217854 3 54.1693 +rs3711721 107226023 7 57.2408 +rs6241342 119182633 7 64.4351 +rs6223687 81780432 9 51.8775 +rs3703161 128357744 8 79.7733 +rs3684732 83007814 17 53.9756 +rs13482419 4301393 15 0.1 +rs6354478 6241050 5 2.26507 +rs13483066 63320298 17 35.254 +rs13477048 33086267 3 15.767 +rs13480180 48420735 9 30.3166 +rs6226936 158845979 1 80.054 +rs13482579 52834896 15 21.0118 +rs13481764 36557616 13 11.5364 +rs3674369 23210432 15 8.30254 +rs13480758 108473286 10 62.7941 +rs6340166 72797040 5 41.6748 +rs3723670 52514798 9 32.2421 +gnf17.042.585 NA 17 19.521 +rs6313528 19045014 11 8.81978 +rs13477335 116920487 3 56.0978 +CEL-7_116160192 NA 7 73.3692 +rs4172338 34723432 16 26.7945 +rs6157345 95619583 1 52.9014 +rs13476963 7679816 3 0.192857 +rs3722255 63336847 3 29.8132 +rs3696823 19105543 15 7.47134 +rs3689263 74311123 2 46.3781 +rs3090818 13826959 14 2.55302 +rs3663068 18736595 14 5.03702 +rs3713432 78097862 7 41.4979 +rs13481961 97270237 13 51.6175 +rs13479091 147487801 6 89.333 +rs13481583 90350885 12 48.6582 +gnf08.055.205 NA 8 28.4778 +rs13478679 28352522 6 11.6127 +rs6409039 94162742 12 50.9036 +rs13476714 115139005 2 64.0296 +rs3654545 72091954 17 44.1973 +rs3703836 100585663 15 63.0016 +rs13481588 91968729 12 50.1599 +rs6185483 55235582 19 50.2192 +rs3663873 110177041 3 53.8391 +rs3023339 24804820 12 8.97317 +rs6190892 10474677 15 4.23068 +rs3706317 61091492 3 29.2317 +CEL-3_59059215 NA 3 28.6449 +rs3664897 67499597 13 32.9733 +rs13478971 111299794 6 59.1094 +rs3691210 157995224 2 83.8744 +rs6315432 46233922 5 27.8612 +rs13479676 30360115 8 16.2089 +rs3721068 113915010 9 75.0971 +rs13482474 22155623 15 7.61826 +rs4223211 69796844 2 42.153 +rs3023058 135829456 5 88.8705 +rs13482928 25964330 17 10.451 +rs13483639 44191430 19 38.0252 +rs4139897 47856079 18 26.9405 +rs3726927 141075115 1 72.6875 +rs3713702 64271271 11 36.9088 +rs13459134 115271947 11 88.401 +rs13481751 33720509 13 10.223 +rs13481640 108205932 12 64.8493 +rs13481804 47603346 13 20.193 +rs3714819 55350965 17 31.1628 +CEL-5_45716874 NA 5 28.0401 +UT_5_103.462256 NA 5 64.5011 +rs6153168 8299968 8 1.10444 +rs13479887 88295611 8 44.4278 +rs4140148 150476349 4 95.9108 +rs6159786 29684270 14 12.2408 +rs13480315 84973533 9 52.9101 +rs13480653 74981224 10 43.0295 +rs3694708 160008042 2 85.8928 +rs13478048 150309421 4 95.8765 +rs3692387 126903210 5 80.2873 +rs3703247 45999569 7 24.6749 +rs13477401 129579191 3 62.7618 +rs4228112 13009201 10 2.58728 +rs13475757 21286957 1 5.9138 +rs3721056 71032448 9 44.4738 +rs4222732 156808590 1 79.3212 +UT_6_136.421356 NA 6 77.9665 +rs13477902 108638296 4 66.3258 +rs13461280 111266995 12 66.858 +rs3676884 94921847 4 58.1594 +rs3721457 117687044 7 64.2093 +rs3722149 34756708 7 17.1813 +rs3657482 114065685 7 61.3477 +rs13476475 46764454 2 29.2929 +rs3695693 9619040 9 0.086045 +rs3700085 119673759 9 82.4914 +rs13476804 139496332 2 76.2408 +rs3706023 73054685 17 45.1552 +rs13479825 83183210 8 40.8723 +rs6317800 96894089 2 57.0795 +gnf17.083.842 NA 17 53.508 +rs13483606 36056602 19 30.2593 +rs6380191 39627107 3 20.1768 +rs13478677 27951181 6 11.5812 +rs6301349 96146686 14 39.3196 +rs3710581 36397464 19 31.8693 +rs4174474 38174823 16 29.3054 +gnf05.084.686 NA 5 53.0968 +rs6314716 40617203 14 15.8064 +rs4137234 67539780 13 32.9929 +UT_8_130.396331 NA 8 80.4067 +rs13475883 59379132 1 28.543 +mCV23508321 NA 11 13.8615 +rs13481021 51107253 11 27.7449 +rs4222215 23996126 1 9.03999 +CEL-15_15740145 NA 15 6.89363 +rs3674193 178717320 2 104.867 +rs6328956 63898025 4 43.9073 +gnf05.124.749 NA 5 84.3148 +UT_11_75.985289 NA 11 45.4302 +rs6276149 98443248 7 54.5189 +rs6299927 92435165 14 37.9519 +mCV24107897 NA 1 4.04864 +rs13475722 10754229 1 2.6061 +rs3674066 149681877 3 80.9788 +rs3695308 33703006 3 16.0978 +rs13483207 11226999 18 4.2883 +rs3686912 64326546 11 36.9343 +rs13478738 46837420 6 24.0994 +rs8257588 47863308 19 40.8718 +rs13480275 73613909 9 45.4436 +rs3705458 99300806 5 61.4322 +rs13476378 22396323 2 14.8814 +rs13475963 NA 1 48.7321 +rs13478983 114263350 6 61.2351 +rs13481480 59142685 12 27.809 +rs6209043 89939632 4 54.6618 +rs13477130 57833682 3 27.879 +rs6409040 49076611 15 18.363 +rs13483178 94236078 17 63.0775 +rs4224501 56744581 4 39.532 +rs13480194 52312797 9 32.1413 +rs3714056 68879859 13 33.6366 +rs3705396 85421292 3 39.7315 +rs13477083 43525954 3 21.2979 +rs4139262 8919487 19 1.97111 +mCV23822994 NA 17 8.73244 +rs13475774 26044918 1 9.60917 +rs13482109 27631766 14 11.2008 +CEL-7_41997914 NA 7 29.2827 +CEL-10_74277238 NA 10 42.8842 +rs3022934 133563227 2 73.1717 +CEL-6_84629314 NA 6 40.551 +rs4228452 111294252 10 64.1812 +rs13480988 41682478 11 21.6189 +UT_8_72.075442 NA 8 32.7213 +rs6201966 19844417 7 2.78979 +rs8244157 54618045 14 18.5842 +rs3663375 93316837 10 54.9628 +rs6353774 59723178 1 29.093 +rs6185261 12011734 11 5.28424 +rs3713696 23486604 9 7.56987 +gnf02.026.314 NA 2 22.6103 +rs6343757 6027976 6 1.47261 +rs13477378 122515116 3 59.6775 +rs3715857 19251978 15 7.47391 +rs13478798 64385626 6 32.4757 +CEL-5_56325748 NA 5 33.8212 +rs13480659 77467415 10 45.1763 +rs13478839 77811523 6 39.1209 +rs6187389 86170027 15 43.1673 +rs4136879 41440054 2 26.8189 +rs6281838 129945301 1 67.5078 +rs13476304 191230917 1 114.102 +gnf14.103.048 NA 14 47.6719 +UT_3_105.614392 NA 3 49.9643 +rs13480601 44306419 10 21.5679 +rs13483549 18156623 19 11.4771 +rs6223790 25520164 5 14.5473 +rs6298808 49216689 10 24.4764 +rs6263067 127168237 1 66.4928 +rs3669513 15559838 1 4.49129 +rs6260116 65803230 7 31.4936 +rs3664869 96886065 8 49.3778 +rs13482084 12241471 14 1.97196 +gnf10.082.230 NA 10 46.6722 +rs13483020 50512518 17 28.41 +rs3660486 32027092 17 14.5449 +rs3721276 14365170 3 1.72857 +CEL-5_117374791 NA 5 75.939 +rs3708633 118743250 13 69.5115 +gnf01.059.986 NA 1 29.9036 +rs13483358 50687407 18 28.7903 +mCV25009162 NA 5 100.287 +rs3694629 7565278 17 3.30346 +rs6186560 36105285 9 21.6087 +rs3710202 33692194 6 16.198 +rs13479768 54356344 8 28.4475 +rs13481308 13326947 12 5.82788 +rs3672124 110615073 12 66.5493 +rs13482714 92843376 15 53.8644 +rs3683699 30504073 18 16.2736 +rs4226572 44304449 7 23.8749 +rs13479694 34211730 8 19.8181 +rs6224522 90610312 3 41.1674 +rs13481041 56701595 11 30.6467 +rs3674364 84313424 13 44.1566 +rs13477030 27601284 3 9.34774 +rs13480845 5635235 11 1.91167 +rs8246989 7614643 3 0.121366 +rs6380710 26214430 1 10.0092 +UT_9_82.824657 NA 9 51.354 +rs6238322 28056254 19 22.4092 +rs6353097 14805061 5 5.75677 +gnf01.126.387 NA 1 66.0094 +rs4174329 38079234 16 29.2566 +rs3695075 101550815 12 56.7889 +rs6262666 41652135 1 20.5225 +rs13476186 153891983 1 76.4535 +mCV24130963 NA 19 0 +rs4139548 20371773 2 14.1165 +rs4226870 118119561 7 64.3133 +rs13476750 123655476 2 66.8472 +mCV22528760 NA 2 64.9448 +rs3702484 9688352 17 5.26721 +rs8252588 3713237 7 0.0234926 +rs13477692 48886872 4 31.7633 +UT_2_129.88318 NA 2 69.9554 +rs13482668 80923574 15 40.2533 +rs13459184 95122411 3 44.0512 +rs3700313 17681589 11 8.16015 +rs6371982 34135438 3 16.9723 +rs3700592 92343344 13 48.1792 +rs4211364 80830072 16 52.1212 +rs6229068 106516721 7 57.1148 +rs3667772 60168778 3 29.0757 +rs13480991 42821191 11 22.2087 +rs13482482 24593717 15 8.51311 +rs3689073 7385255 3 0.0958321 +rs13482731 97760644 15 59.8722 +rs3698196 86127915 3 39.7781 +mCV23893269 NA 9 0 +rs13482306 91549652 14 37.8223 +gnf10.004.839 NA 10 0.949684 +rs6241299 85595434 4 51.744 +rs13483217 14112799 18 5.68026 +mCV24217147 NA 10 71.1244 +rs3723643 122318583 2 66.64 +rs13483476 84649628 18 62.872 +rs6328018 26133356 12 10.1958 +rs13477019 23824920 3 7.72032 +rs3688968 119721407 4 71.3737 +CEL-14_90299389 NA 14 40.2483 +rs3676394 85374875 18 63.0525 +rs4222922 193272691 1 117.136 +rs13476081 119860126 1 58.2392 +rs13481650 111188138 12 66.8118 +rs13479188 35339838 7 17.7493 +rs3707749 63100183 17 35.1897 +rs3655334 118448960 7 64.3432 +rs3676100 13307520 14 2.50164 +rs13477594 20288095 4 9.69074 +rs3680100 21919402 18 10.2863 +rs3726406 112713037 4 67.3946 +rs3663355 48113334 4 31.5662 +rs13483149 86990367 17 59.774 +rs3668028 86433604 14 36.6159 +rs13477790 77633242 4 47.989 +rs13478413 99002935 5 61.4213 +rs13479197 37796997 7 21.0346 +petM-02162-1 NA 19 12.7818 +mCV24423497 NA 13 45.1619 +rs3705394 13565037 11 6.1493 +CEL-15_9687257 NA 15 2.71222 +rs3679120 22921279 10 8.01228 +rs6241653 133096673 1 68.2746 +rs13477092 47207215 3 21.6659 +mCV24338392 NA 17 8.98278 +CEL-7_64160024 NA 7 41.3808 +rs3709567 126209771 8 75.9823 +rs13476067 115261364 1 56.9937 +rs3670749 52959274 12 24.1983 +rs6361711 42638721 5 25.4031 +UT_13_4.391534 NA 13 0.0406796 +rs6245643 92582795 4 56.7146 +rs6212085 15901251 5 6.29309 +rs3706792 78990729 14 34.3249 +rs3703728 51409446 7 28.0787 +rs4185336 50013921 16 34.4523 +rs13477717 55490244 4 35.9432 +rs3718386 120762044 2 66.4325 +mhcCD8a2 NA 6 34.5609 +rs3676101 26196372 5 15.2652 +rs13480536 21933905 10 7.18871 +rs8253516 68882191 8 32.1055 +rs3689218 19023811 7 1.89155 +rs13476950 3785419 3 0.00564417 +rs3698001 100446376 12 56.4327 +rs13481791 44207516 13 18.0616 +rs3691938 68266513 5 40.0212 +rs3685822 64093541 9 38.7064 +rs3658897 32553900 12 13.9304 +rs3687275 43070188 19 36.8259 +UT_15_87.526763 NA 15 42.9087 +mCV23209429 NA 2 23.8655 +rs6230167 52546441 1 25.9716 +rs8236770 83672212 8 41.2256 +rs3694989 66158607 9 39.979 +rs13480358 96686737 9 57.9811 +rs13476077 118975178 1 57.8821 +rs3023025 143182416 4 90.5694 +rs13477904 109080466 4 66.4292 +CEL-7_43463457 NA 7 29.4967 +rs13477072 40459556 3 20.5114 +rs13478750 50520106 6 26.5327 +UT_17_25.804649 NA 17 10.4846 +rs6209951 94400523 11 57.079 +rs6162256 80059839 4 48.9979 +rs6305232 153227479 1 76.153 +rs13480400 106647931 9 66.2224 +rs3681670 52031205 14 18.5124 +rs3675388 152670038 2 81.6619 +rs4219590 92046041 16 59.4329 +rs3671174 28101343 18 15.114 +rs6239372 34698851 7 17.1564 +rs4197422 50582943 14 18.4722 +rs13475872 53011522 1 26.0101 +rs6261913 138649668 6 80.9736 +CEL-15_74419628 NA 15 35.7485 +rs6413270 37941444 9 22.0292 +rs3683507 79200892 3 35.9255 +rs3673976 43329014 19 36.8352 +rs6403494 54826720 17 30.9164 +rs13477493 151419353 3 81.4861 +rs6408534 35432115 5 20.2374 +mCV23717969 NA 4 8.05003 +CEL-6_8231384 NA 6 2.66868 +CEL-4_74066970 NA 4 47.4533 +rs3677115 40583019 19 34.61 +rs3696518 43956027 6 23.0561 +rs13478995 117470880 6 63.9779 +rs13477188 72112256 3 33.1754 +rs6250834 87578767 13 45.2162 +rs3682333 31949698 5 19.2374 +rs3714102 101893149 5 61.9542 +rs6220667 174204401 1 95.3999 +rs3711084 102307079 13 54.7342 +rs3023233 28596047 10 14.5969 +rs3726250 53008551 4 34.4287 +rs3705677 40419685 5 23.85 +rs6303064 19600292 18 8.2778 +rs13479287 62079258 7 30.5235 +CEL-1_140926026 NA 1 72.6592 +rs4165029 16262762 16 10.7089 +rs6353593 68424814 2 41.0552 +rs13475789 30005368 1 10.6518 +rs3684964 64661178 18 42.9849 +rs3021887 118115231 2 65.6914 +rs4227074 23448883 8 10.0443 +rs3688343 34272795 12 15.285 +CEL-19_34542259 NA 19 30.0494 +CEL-4_118427459 NA 4 72.0372 +rs13480907 22220570 11 11.8791 +rs3682128 37262913 14 15.2686 +rs3654569 52550668 9 32.2468 +rs3708061 81304223 4 49.1775 +rs13480114 26961500 9 10.8423 +rs13482620 66244636 15 32.0154 +rs3702451 102120297 13 54.6506 +rs8254782 115490033 6 61.9184 +rs4229363 53150467 12 24.2016 +rs3702220 80737850 13 40.6781 +rs3662056 105105757 4 64.7232 +rs3661718 144810830 2 77.9934 +gnf10.122.050 NA 10 75.6302 +CEL-17_91044280 NA 17 62.9279 +rs13477457 142436681 3 73.841 +rs13482013 111021030 13 64.6391 +rs13480721 97521558 10 57.1833 +rs13483472 83748624 18 62.6477 +rs3677807 106694559 8 55.5849 +rs3688854 20281706 2 14.0579 +gnf13.008.720 NA 13 1.56575 +rs13478015 139155261 4 86.1166 +rs6281562 126292376 1 65.9182 +rs13482331 98520204 14 40.473 +rs3700038 40378101 13 15.8963 +rs13459084 31196214 5 19.1588 +rs3672425 68346128 14 27.0922 +rs3662930 115522043 11 88.4566 +CEL-12_87311152 NA 12 50.508 +rs3662436 68825035 13 33.6148 +rs3708442 192343890 1 116.059 +rs3673457 81859840 9 51.9022 +rs6225875 117143301 14 52.1314 +rs3709561 44222161 15 16.1091 +mCV22965443 NA 17 16.8477 +gnf05.136.579 NA 5 96.0368 +rs3694949 38455561 9 22.1334 +mCV23446189 NA 3 62.2481 +rs13480443 117512156 9 80.2729 +rs3685643 162250061 1 81.8151 +rs6379062 54154770 9 32.9995 +rs13476062 114013439 1 56.3604 +UT_15_101.086192 NA 15 60.6777 +rs13459146 34517940 15 13.1797 +rs13477775 73942635 4 46.1614 +rs6255802 50772424 12 23.5146 +gnf10.130.258 NA 10 82.4073 +rs3658044 19291760 1 5.52206 +rs3721803 7003954 10 0.0282926 +rs4227587 32754921 9 16.4785 +rs3676483 44938143 18 25.1621 +mCV22625013 NA 11 87.2647 +rs13482065 14353510 14 2.75302 +rs3698600 15055680 14 2.83797 +rs13481410 40842857 12 20.3982 +rs3727022 104351877 2 60.6366 +gnf08.031.589 NA 8 18.1181 +rs6263485 90009339 2 53.1725 +rs6188665 65422955 16 41.0204 +rs13483016 49557882 17 27.621 +rs3722112 119168908 7 64.4331 +rs6355445 69962379 9 43.8183 +rs6217010 96031708 3 44.6523 +rs3675669 189806053 1 111.506 +rs3703712 72766774 9 45.0321 +rs13476989 16133288 3 2.73571 +rs6217275 36426704 7 18.4446 +rs3680632 8376061 12 2.38564 +rs3667748 16644045 17 7.23965 +rs13480251 66990365 9 40.4916 +UT_6_5.754097 NA 6 1.47255 +rs6196597 104974507 10 61.2507 +rs3678364 11077682 10 1.77062 +rs6342608 78159752 15 38.865 +rs3695107 70414591 5 40.4936 +rs6370750 49485509 5 29.1957 +rs13476128 136824086 1 70.9481 +rs3654251 23990827 5 10.7105 +rs3672987 33110220 17 16.0344 +gnf13.057.762 NA 13 28.499 +rs13481985 98646123 13 52.1827 +rs3655885 45438349 1 23.3298 +rs13481830 58441821 13 27.839 +rs3665127 39413046 1 18.0974 +rs3709285 159034957 1 80.122 +rs3695382 36324656 12 17.4139 +gnf01.148.750 NA 1 74.0501 +rs13482195 56370859 14 18.6328 +rs13483212 12110690 18 4.51687 +rs13478952 106305503 6 56.6647 +rs13476291 187611490 1 107.862 +rs13475727 12388784 1 3.32307 +rs3695266 158005173 2 83.8758 +rs3656875 68196791 8 31.9452 +rs3697020 125538952 2 67.8524 +rs4172185 34466988 16 26.725 +mCV22595554 NA 9 39.7043 +rs13481855 65296578 13 32.5145 +UT_17_75.437568 NA 17 47.0353 +rs3660588 30328765 3 13.2523 +rs3687764 95749389 4 58.6842 +rs6366840 29150899 19 23.2882 +rs3023981 70099153 4 45.8222 +mCV24436061 NA 8 23.0116 +rs13481070 65095597 11 37.9938 +rs3655057 13383077 12 5.85244 +gnf03.067.696 NA 3 33.0652 +rs6185468 70243524 14 27.7446 +rs13479880 86747129 8 42.9433 +rs4197693 67131002 16 43.9926 +mCV23045722 NA 19 39.618 +gnf07.129.013 NA 7 79.2206 +rs6170261 59044168 16 38.3828 +gnf10.009.098 NA 10 1.86089 +rs13479952 100909560 8 51.1506 +rs3684103 94242458 4 57.7461 +rs3688141 75192214 17 47.297 +rs3688207 45359288 13 18.269 +rs3696331 117410810 4 68.8658 +rs3149842 118647783 2 65.7741 +rs13477114 53289168 3 25.342 +rs4137908 125644708 1 65.783 +rs13478404 96921361 5 60.7814 +rs3670168 128292534 3 61.6118 +rs6318455 71935887 4 45.9727 +gnf03.072.147 NA 3 34.0507 +rs13483558 20593264 19 12.8776 +rs13479329 72269923 7 38.9093 +rs13481462 54640358 12 25.3172 +rs13481061 62992617 11 35.4556 +gnf08.026.892 NA 8 15.561 +rs13483222 15295934 18 6.3883 +rs13479624 16859147 8 5.24451 +rs3712524 169850145 1 89.6632 +rs3667255 64002748 8 30.5917 +rs6393943 114407245 6 61.3394 +rs13477494 151564321 3 81.4878 +rs3700012 101759668 12 56.823 +rs6390142 78868207 13 40.2396 +rs3724616 3000665 17 0 +rs4227609 44391960 9 28.3837 +mCV23575714 NA 1 115.926 +rs6364065 23474991 7 2.88943 +rs8275764 65231542 4 44.0691 +rs13480454 120697838 9 82.8681 +rs13476466 43977995 2 27.7787 +rs4165252 20737433 16 13.0361 +rs3677121 88276304 11 53.5264 +rs13482870 10724549 17 5.54242 +rs6163209 101958342 11 68.0653 +UT_3_34.779033 NA 3 16.1586 +rs6160457 64275598 6 32.463 +rs13476793 136427281 2 75.2043 +rs13479140 16960631 7 1.53121 +rs4223701 181516393 2 108.339 +rs13478053 151285670 4 96.9651 +rs13478178 33984479 5 19.9019 +mCV23851630 NA 11 6.02756 +rs4219905 92999666 16 60.0987 +gnf12.013.284 NA 12 7.33577 +rs13478398 93725627 5 60.0184 +gnf14.013.298 NA 14 5.98588 +rs6374597 110506551 9 70.1087 +rs3023265 57760615 11 32.0606 +rs13475785 28820258 1 10.369 +mCV24528954 NA 3 29.874 +rs6187777 141124127 2 76.8726 +rs6208887 86407128 17 59.5461 +gnf08.098.331 NA 8 49.439 +rs4232105 23165771 19 17.9673 +rs3655098 68648726 9 42.2188 +gnf03.079.138 NA 3 37.9533 +rs6272967 39325001 8 22.1753 +CEL-8_7689226 NA 8 0.934165 +rs3671527 99145299 4 60.3594 +rs6335368 9779999 11 4.00031 +rs4170074 32194780 16 23.6911 +rs3686804 79333348 3 35.933 +rs3716195 42056780 5 25.0781 +rs13481456 52868858 12 24.1968 +rs13477807 83002718 4 50.844 +CEL-1_101048458 NA 1 54.0781 +rs3666667 30760759 15 11.2639 +rs13475964 86305558 1 48.7524 +rs3704471 126179960 2 67.88 +rs13459095 149946882 5 106.163 +rs13483631 42250015 19 36.2254 +rs6339313 65706902 5 36.1283 +rs13479086 145295816 6 88.1522 +rs13480023 122762890 8 72.8859 +rs13477952 122951885 4 74.1086 +rs3718641 53706897 7 29.0918 +mCV22331476 NA 18 30.7475 +rs13481702 14756099 13 2.29972 +UT_4_57.645957 NA 4 41.0374 +rs3659348 104390684 7 56.2984 +rs13481893 77562077 13 40.0012 +rs6278585 76817924 17 48.6672 +rs13483044 55948460 17 31.4437 +rs3703582 75871062 13 39.8798 +rs13476705 113074931 2 63.047 +rs6230748 116670570 11 89.2832 +gnf18.012.857 NA 18 7.00811 +rs3708749 80820030 5 51.6511 +rs4206054 75907368 16 50.0217 +rs13481285 7014383 12 1.03636 +rs13477397 128636596 3 61.7118 +rs13482112 28314296 14 11.4074 +rs3663650 41882900 8 22.7548 +rs4231720 86810010 17 59.7523 +mCV24380249 NA 1 51.0885 +rs3723136 92666016 6 44.8436 +rs6238153 95354108 11 57.2209 +rs13482509 32042169 15 12.0618 +gnf18.014.547 NA 18 7.83115 +rs6381624 75950905 18 55.4755 +rs13483213 12550519 18 4.95258 +rs3679439 97128561 8 49.4815 +rs6306557 148394294 4 92.7194 +CEL-9_17007263 NA 9 3.44286 +rs6358447 97626757 1 53.3083 +rs13477303 103988744 3 50.1972 +UT_11_29.443258 NA 11 14.9225 +rs13479979 108947107 8 58.3727 +rs3719870 66727372 5 38.8588 +rs13481585 91259993 12 49.0705 +rs3719672 67527644 14 26.0991 +rs13476880 161889604 2 87.1829 +rs3727109 52293740 12 24.0607 +rs3682341 68078502 17 40.7239 +gnf19.005.316 NA 19 1.23778 +rs4166461 28404556 16 20.1233 +UT_2_70.039995 NA 2 42.1196 +rs4223511 131993535 2 71.8356 +rs3088491 12430080 15 4.73955 +rs13483438 74218827 18 54.9736 +CEL-3_137067761 NA 3 67.9901 +rs3675505 95138968 1 52.8545 +CEL-4_6055375 NA 4 0.85 +rs3656005 45334674 19 39.0591 +CEL-8_94486767 NA 8 47.6569 +rs4152838 5029450 16 0.060615 +rs6244900 82656865 17 53.654 +rs3660226 66950667 12 30.0297 +rs4231834 44021991 18 24.869 +CEL-9_98918429 NA 9 58.8341 +UT_3_89.90208 NA 3 40.7882 +rs6350869 60557821 18 38.2388 +rs13480856 7952877 11 2.875 +rs13478502 124000381 5 78.3279 +gnf09.028.495 NA 9 17.9921 +mCV24205612 NA 10 61.6863 +CEL-4_78089985 NA 4 48.9627 +rs4165510 27359166 16 19.5495 +rs3680197 44208892 2 28.02 +rs4223969 55049341 3 26.23 +rs13479476 116888764 7 63.592 +rs13482622 66676342 15 32.0824 +CEL-7_122606289 NA 7 83.7589 +gnf02.161.674 NA 2 84.0244 +rs8255228 125914815 3 60.8674 +rs13478733 45626021 6 23.6075 +UT_14_10.122547 NA 14 2.80698 +rs4165334 23467605 16 15.1795 +rs3709732 117966892 3 57.3639 +rs3682937 4680332 11 0.368248 +rs13477065 38106857 3 19.7519 +CEL-13_22517579 NA 13 5.97895 +rs13475064 88909952 3 41.0153 +rs4226048 83769061 6 40.4709 +gnf11.098.224 NA 11 55.8043 +rs13481380 32097332 12 13.8492 +CEL-5_15079682 NA 5 7.01831 +rs4219072 91372335 16 58.9792 +rs13481012 47164882 11 25.2198 +rs3702472 68091016 8 31.8841 +rs13478051 150948093 4 96.0396 +rs13480522 18897378 10 5.73292 +gnf19.052.359 NA 19 47.1283 +rs6227698 75773696 13 39.8739 +rs3724411 30763164 12 13.2573 +rs3708313 100432682 7 55.1222 +rs13480065 9290508 9 0.0798989 +rs6265656 156863894 2 83.2744 +CEL-2_168918742 NA 2 92.6145 +rs3718133 48701997 18 27.1292 +rs13481827 57777869 13 27.279 +rs3703241 57632187 17 32.2351 +rs3162895 64201441 1 31.943 +rs13476805 139724060 2 76.3511 +rs4173581 36240751 16 28.1142 +rs13480451 119956886 9 82.5477 +rs13479823 71315172 8 32.558 +gnf10.055.290 NA 10 28.0444 +rs3726588 82107859 13 42.8688 +mCV22794972 NA 9 50.157 +rs4221957 129825026 3 63.6595 +rs3695907 10708690 11 4.22495 +rs6357455 138894858 3 69.0818 +rs3719497 25502498 17 10.2689 +rs6247999 10092290 9 0.092191 +rs13479419 96873989 7 53.5015 +rs13478796 63559836 6 32.2792 +rs3658433 84048258 11 51.3069 +rs13480436 114920618 9 77.1771 +rs3717846 64516102 7 30.9685 +gnf12.060.469 NA 12 28.5514 +rs3692487 5455372 2 1.63333 +UT_4_32.135337 NA 4 15.6675 +rs13478522 128472892 5 82.0077 +rs3683834 58573929 17 32.6778 +rs3667738 38492075 8 22.0235 +rs13459110 71483232 9 44.5914 +CEL-3_156007479 NA 3 85.6353 +rs3655717 65465164 9 39.5517 +rs13476607 82857454 2 52.0807 +rs13479239 50250025 7 27.4887 +mCV23970227 NA 1 117.959 +rs6284081 118123572 10 71.4247 +rs13483579 27168119 19 21.6241 +rs13479763 53255231 8 28.4346 +rs13481194 101599023 11 66.812 +gnf14.018.589 NA 14 9.63075 +rs13481304 12560594 12 5.48029 +rs13478640 15793571 6 5.46185 +rs13478205 41042724 5 24.6781 +rs6213741 51592773 12 23.7299 +rs3689336 96412477 9 57.8953 +mCV25230888 NA 8 8.22771 +rs3675740 16259704 17 6.9478 +rs13478133 21614246 5 9.48779 +gnf09.020.405 NA 9 9.92649 +rs13482654 76694306 15 37.9741 +rs13482681 84037487 15 41.9407 +rs4136200 76792153 6 36.8737 +gnf07.061.149 NA 7 32.2117 +rs3696278 96385635 13 51.2258 +rs3698831 22282024 15 7.73826 +rs6307147 149955081 3 81.0356 +rs3691784 103605005 7 55.4013 +UT_2_144.017259 NA 2 77.8769 +rs13478215 43916770 5 25.9162 +rs6343029 102342395 8 51.1738 +rs13478370 85461198 5 52.1106 +CEL-12_87170441 NA 12 50.4505 +rs6362861 29524193 17 12.9715 +rs3716646 42700910 12 21.5678 +CEL-11_121219118 NA 11 93.4868 +rs6315002 103049359 4 62.6978 +rs3722681 109772459 3 53.7748 +rs13476368 19754856 2 13.199 +rs3688959 103109374 13 55.6665 +rs6320527 18153796 13 3.1297 +rs13480629 67616314 10 39.2775 +rs13479085 144961358 6 88.0165 +rs3697419 51271183 15 19.473 +rs3696307 53743149 10 26.4057 +rs6176848 62074692 3 29.7978 +rs13482712 92246015 15 53.6547 +rs6351657 72658698 3 33.21 +rs3706825 25536653 10 10.7893 +rs13477845 93583955 4 57.4851 +rs3665582 24949551 6 11.044 +rs13477268 93334805 3 43.2302 +rs4179233 43610765 16 32.1165 +rs13476530 60018236 2 35.735 +CEL-3_94149786 NA 3 43.4168 +rs13475888 61832866 1 29.7573 +UT_17_86.377392 NA 17 57.3467 +rs13476581 75183883 2 46.8876 +gnf09.016.676 NA 9 6.9609 +rs6328845 57804215 18 35.0594 +rs3683092 153948287 2 82.1402 +rs6301542 24209205 2 15.9398 +rs3678662 132481915 1 67.8244 +rs13479965 105786703 8 55.4047 +rs13478449 108317299 5 67.1726 +rs4165065 17412079 16 11.2657 +rs3142215 89010568 6 43.287 +rs3152403 120578808 6 66.2133 +rs3662946 74972027 15 35.9599 +rs13481576 87436066 12 46.0183 +rs3699909 55797488 2 32.6046 +rs3695682 101720448 2 59.8233 +rs6191112 103953719 5 64.2996 +rs6326787 48710811 11 25.6949 +gnf10.008.653 NA 10 1.81036 +rs6281696 125587031 10 78.7936 +rs13482998 44087916 17 20.9796 +rs3701429 30273156 6 11.8836 +rs4165893 27642868 16 19.7544 +rs6223813 20793953 19 12.9872 +rs3692039 17860829 5 7.8506 +rs6224196 95167096 7 52.8414 +rs13477307 104568741 3 50.4719 +rs6248743 53445370 9 32.4421 +rs13479628 18316447 8 6.78878 +rs3710365 148943308 5 105.327 +rs13482200 58311845 14 20.9659 +rs13477979 130327192 4 79.6086 +UT_7_56.715945 NA 7 30.9113 +rs13459144 75917673 14 33.1319 +rs6296253 64089536 10 32.0596 +rs6225523 13412177 10 2.80997 +mCV23785265 NA 11 17.2534 +rs13483306 37690719 18 19.6422 +rs13481226 110180261 11 79.752 +rs8274734 97578265 16 63.2491 +rs13479795 62814479 8 30.4128 +rs13477352 114824207 3 55.2 +rs6318992 129264054 2 69.643 +rs4161857 11503560 16 7.10762 +rs6238771 45246665 6 23.5263 +rs4229612 113258631 12 67.873 +rs3690631 21752252 14 6.60107 +rs13478964 109290415 6 57.5866 +rs3677539 148598417 6 89.7667 +rs13482353 105541900 14 47.4948 +rs13479934 96039380 8 48.5991 +CEL-17_76502178 NA 17 49.1803 +rs6239023 94055997 6 46.0336 +rs3022845 159757167 1 80.4004 +gnf05.084.314 NA 5 52.8925 +rs13476095 125585192 1 65.7706 +rs3677272 40581902 1 19.7657 +rs13480567 29165512 10 14.963 +rs13478818 73103167 6 35.6966 +rs6256981 47991663 15 17.613 +rs6217029 30238744 9 14.3296 +rs3690783 139005099 6 81.2162 +rs13482930 26814003 17 11.0915 +rs6257357 85547561 8 42.4327 +rs3668888 9721365 18 3.52581 +rs13480323 87421906 9 54.244 +rs13483471 83708324 18 62.6377 +rs3710549 61191513 14 22.6801 +rs3692549 79764683 1 45.6659 +rs6394046 62503744 8 30.356 +rs13480294 78778319 9 50.2908 +rs13478852 80286336 6 39.5394 +rs4222426 72394390 1 37.9167 +rs13478999 118802009 6 65.0539 +rs3710273 72608518 16 46.4118 +rs3654283 36521893 14 14.7327 +rs6306968 14321330 19 5.10557 +rs6310133 53537973 16 36.5582 +rs6195266 69739630 6 33.2752 +rs3718631 85338422 14 36.4695 +rs3679962 128092617 3 61.3572 +CEL-2_63143553 NA 2 38.6366 +rs13480164 43734825 9 27.2757 +rs13483209 11474843 18 4.30049 +rs3698364 80916231 6 39.6459 +rs3689948 58617854 14 21.1439 +rs4160175 9358371 16 4.28429 +mCV24089992 NA 4 44.0233 +rs3696018 57106985 7 29.5787 +rs13480279 74535146 9 46.404 +mCV22750087 NA 15 38.095 +rs4184636 49377266 16 33.9214 +rs13478690 30922499 6 11.9082 +rs13476231 170308965 1 89.7295 +rs3661171 108857576 2 62.3013 +gnf07.132.051 NA 7 83.9304 +CEL-1_103029662 NA 1 54.1438 +gnf08.108.032 NA 8 55.7296 +rs13476639 92826811 2 54.5724 +rs13480232 62166177 9 37.663 +rs13476302 190821384 1 113.581 +CEL-13_25470193 NA 13 8.29689 +rs6234428 75800556 6 36.0448 +rs6346661 114956180 13 65.9882 +rs13479311 67691836 7 33.4006 +rs3671277 59444538 4 42.5784 +rs6167568 6643072 9 0.0399495 +rs3661657 84732368 11 51.3319 +rs13477529 146081755 3 77.4519 +rs13476064 114591926 1 56.654 +rs3724658 72745162 9 45.0225 +gnf07.032.889 NA 7 23.318 +rs3713198 14474864 1 4.11122 +rs13478010 138126234 4 85.7815 +CEL-11_40351206 NA 11 21.4824 +rs6403541 90670588 8 45.2098 +rs6325730 91044525 3 42.0909 +rs13479983 109935693 8 59.4531 +rs13480162 43331857 9 27.0228 +rs3156208 67173026 4 44.7891 +rs13481010 46817658 11 24.9759 +rs13479570 144628561 7 89.5022 +rs3712722 97810460 5 60.9961 +rs13477113 53269733 3 25.3194 +rs3726227 68844133 2 41.1018 +rs13481552 79385348 12 40.7924 +rs3710055 87957884 15 46.5522 +rs4151935 63773115 16 40.099 +rs13478434 104143745 5 64.3726 +rs13459062 31172022 2 23.8501 +rs3655898 33679585 9 17.9254 +rs13480173 46528572 9 28.9087 +mCV23509699 NA 6 75.5597 +rs13481547 76043647 12 37.4975 +rs3714431 57709572 4 40.8898 +gnf02.119.284 NA 2 66.4519 +rs3680871 59243995 3 28.8458 +rs4173052 35752888 16 28.0389 +rs6299531 122925223 9 84.6598 +rs13476463 43444560 2 27.291 +rs13478913 96245986 6 50.0936 +rs6317860 52146393 16 35.9934 +rs13478963 109153049 6 57.5694 +rs3679473 99813179 7 54.9201 +rs13476615 85785881 2 52.838 +rs3661039 90358030 6 43.6703 +rs13480015 120635544 8 71.0562 +rs6160140 66281288 7 31.6887 +rs13483184 3796542 18 0.1 +rs13476330 5846367 2 2.45833 +gnf14.019.954 NA 14 10.9232 +mCV23128319 NA 10 0.428594 +UT_3_89.066701 NA 3 40.0675 +rs13479879 86508968 8 42.8263 diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 0000000..84637e1 --- /dev/null +++ b/src/debug.h @@ -0,0 +1,46 @@ +#ifndef __DEBUG_H__ +#define __DEBUG_H__ + +// enforce works like assert but also when NDEBUG is set (i.e., it +// always works). enforce_msg prints message instead of expr + +#if defined NDEBUG +#define debug_msg(msg) +#else +#define debug_msg(msg) cout << "**** DEBUG: " << msg << endl; +#endif + +/* This prints an "Assertion failed" message and aborts. */ +extern "C" void __assert_fail(const char *__assertion, const char *__file, + unsigned int __line, + const char *__function) __THROW + __attribute__((__noreturn__)); + +#define __ASSERT_FUNCTION __PRETTY_FUNCTION__ + +#define enforce(expr) \ + ((expr) \ + ? __ASSERT_VOID_CAST(0) \ + : __assert_fail(__STRING(expr), __FILE__, __LINE__, __ASSERT_FUNCTION)) + +#define enforce_msg(expr, msg) \ + ((expr) ? __ASSERT_VOID_CAST(0) \ + : __assert_fail(msg, __FILE__, __LINE__, __ASSERT_FUNCTION)) + +#define enforce_str(expr, msg) \ + ((expr) \ + ? __ASSERT_VOID_CAST(0) \ + : __assert_fail((msg).c_str(), __FILE__, __LINE__, __ASSERT_FUNCTION)) + +// Helpers to create a unique varname per MACRO +#define COMBINE1(X, Y) X##Y +#define COMBINE(X, Y) COMBINE1(X, Y) + +#define enforce_gsl(expr) \ + auto COMBINE(res, __LINE__) = (expr); \ + (COMBINE(res, __LINE__) == 0 \ + ? __ASSERT_VOID_CAST(0) \ + : __assert_fail(gsl_strerror(COMBINE(res, __LINE__)), __FILE__, \ + __LINE__, __ASSERT_FUNCTION)) + +#endif diff --git a/src/gemma.cpp b/src/gemma.cpp index c72475b..0fb8c54 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -151,6 +151,7 @@ void GEMMA::PrintHelp(size_t option) { cout << " 11: obtain predicted values" << endl; cout << " 12: calculate snp variance covariance" << endl; cout << " 13: note" << endl; + cout << " 14: debug options" << endl; cout << endl; } @@ -446,7 +447,16 @@ void GEMMA::PrintHelp(size_t option) { } if (option == 4) { - cout << " RELATEDNESS MATRIX CALCULATION OPTIONS" << endl; + cout << " RELATEDNESS MATRIX (K) CALCULATION OPTIONS" << endl; + cout << " -ksnps [filename] " + << " specify input snps file name to compute K" << endl; + cout << " -loco [chr] " + << " leave one chromosome out (LOCO) by name (requires -a annotation " + "file)" + << endl; + cout << " -a [filename] " + << " specify input BIMBAM SNP annotation file name (LOCO only)" + << endl; cout << " -gk [num] " << " specify which type of kinship/relatedness matrix to generate " "(default 1)" @@ -682,6 +692,14 @@ void GEMMA::PrintHelp(size_t option) { cout << endl; } + if (option == 14) { + cout << " DEBUG OPTIONS" << endl; + cout << " -silence silent terminal display" << endl; + cout << " -debug debug output" << endl; + cout << " -nind [num] read up to num individuals" << endl; + cout << endl; + } + return; } @@ -1008,7 +1026,7 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { str.clear(); str.assign(argv[i]); cPar.k_mode = atoi(str.c_str()); - } else if (strcmp(argv[i], "-n") == 0) { + } else if (strcmp(argv[i], "-n") == 0) { // set pheno column (range) (cPar.p_column).clear(); while (argv[i + 1] != NULL && argv[i + 1][0] != '-') { ++i; @@ -1076,6 +1094,12 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { cPar.r2_level = atof(str.c_str()); } else if (strcmp(argv[i], "-notsnp") == 0) { cPar.maf_level = -1; + } else if (strcmp(argv[i], "-loco") == 0) { + assert(argv[i + 1]); + ++i; + str.clear(); + str.assign(argv[i]); + cPar.loco = str; } else if (strcmp(argv[i], "-gk") == 0) { if (cPar.a_mode != 0) { cPar.error = true; @@ -1315,6 +1339,14 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { str.clear(); str.assign(argv[i]); cPar.nr_iter = atoi(str.c_str()); + } else if (strcmp(argv[i], "-nind") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.ni_max = atoi(str.c_str()); // for testing purposes } else if (strcmp(argv[i], "-emp") == 0) { if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { continue; @@ -1533,6 +1565,8 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { str.clear(); str.assign(argv[i]); cPar.window_ns = atoi(str.c_str()); + } else if (strcmp(argv[i], "-debug") == 0) { + cPar.mode_debug = true; } else { cout << "error! unrecognized option: " << argv[i] << endl; cPar.error = true; @@ -1808,14 +1842,16 @@ void GEMMA::BatchRun(PARAM &cPar) { gsl_matrix_free(H_full); } - // Generate Kinship matrix + // Generate Kinship matrix (optionally using LOCO) if (cPar.a_mode == 21 || cPar.a_mode == 22) { cout << "Calculating Relatedness Matrix ... " << endl; gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total); + enforce_msg(G, "allocate G"); // just to be sure time_start = clock(); cPar.CalcKin(G); + cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); if (cPar.error == true) { cout << "error! fail to calculate relatedness matrix. " << endl; @@ -2613,6 +2649,7 @@ return;} cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 31) { // Fit LMM or mvLMM or eigen gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph); + enforce_msg(Y, "allocate Y"); // just to be sure gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt); gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c // matrix @@ -2749,7 +2786,7 @@ return;} CalcUtX(U, Y, UtY); // calculate REMLE/MLE estimate and pve for univariate model - if (cPar.n_ph == 1) { + if (cPar.n_ph == 1) { // one phenotype gsl_vector_view beta = gsl_matrix_row(B, 0); gsl_vector_view se_beta = gsl_matrix_row(se_B, 0); gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); @@ -2819,7 +2856,7 @@ return;} } } - // Fit LMM or mvLMM + // Fit LMM or mvLMM (w. LOCO) if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 || cPar.a_mode == 4) { if (cPar.n_ph == 1) { @@ -2844,7 +2881,7 @@ return;} } else { if (cPar.file_gxe.empty()) { cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector); + &Y_col.vector, cPar.setGWASnps); } else { cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env); @@ -25,6 +25,7 @@ #include <iomanip> #include <iostream> #include <map> +#include <regex> #include <set> #include <sstream> #include <stdio.h> @@ -132,7 +133,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) { } // Read SNP file. -bool ReadFile_snps(const string &file_snps, set<string> &setSnps) { +bool ReadFile_snps(const string file_snps, set<string> &setSnps) { setSnps.clear(); igzstream infile(file_snps.c_str(), igzstream::in); @@ -654,6 +655,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, major = ch_ptr; if (setSnps.size() != 0 && setSnps.count(rs) == 0) { + // if SNP in geno but not in -snps we add an missing value SNPINFO sInfo = {"-9", rs, -9, -9, minor, major, 0, -9, -9, 0, 0, file_pos}; snpInfo.push_back(sInfo); @@ -684,9 +686,8 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, gsl_vector_set_zero(genotype_miss); for (int i = 0; i < ni_total; ++i) { ch_ptr = strtok(NULL, " , \t"); - if (indicator_idv[i] == 0) { + if (indicator_idv[i] == 0) continue; - } if (strcmp(ch_ptr, "NA") == 0) { gsl_vector_set(genotype_miss, c_idv, 1); @@ -1334,55 +1335,78 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) { } // Read bimbam mean genotype file and calculate kinship matrix. -bool BimbamKin(const string &file_geno, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin) { +bool BimbamKin(const string file_geno, const set<string> ksnps, + vector<int> &indicator_snp, const int k_mode, + const int display_pace, gsl_matrix *matrix_kin, + const bool test_nind) { igzstream infile(file_geno.c_str(), igzstream::in); - if (!infile) { - cout << "error reading genotype file:" << file_geno << endl; - return false; - } - - string line; - char *ch_ptr; + enforce_msg(infile, "error reading genotype file"); size_t n_miss; double d, geno_mean, geno_var; + // setKSnp and/or LOCO support + bool process_ksnps = ksnps.size(); + size_t ni_total = matrix_kin->size1; gsl_vector *geno = gsl_vector_alloc(ni_total); gsl_vector *geno_miss = gsl_vector_alloc(ni_total); - // Create a large matrix. - size_t msize = 10000; + // Xlarge contains inds x markers + const size_t msize = K_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize); + enforce_msg(Xlarge, "allocate Xlarge"); + gsl_matrix_set_zero(Xlarge); + // For every SNP read the genotype per individual size_t ns_test = 0; for (size_t t = 0; t < indicator_snp.size(); ++t) { + string line; !safeGetline(infile, line).eof(); if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); } - if (indicator_snp[t] == 0) { + if (indicator_snp[t] == 0) continue; - } - ch_ptr = strtok((char *)line.c_str(), " , \t"); - ch_ptr = strtok(NULL, " , \t"); - ch_ptr = strtok(NULL, " , \t"); + std::regex_token_iterator<std::string::iterator> rend; + regex split_on("[,[:blank:]]+"); + regex_token_iterator<string::iterator> tokens(line.begin(), line.end(), + split_on, -1); + if (test_nind) { + // ascertain the number of genotype fields match + uint token_num = 0; + for (auto x = tokens; x != rend; x++) + token_num++; + enforce_str(token_num == ni_total + 3, line + " count fields"); + } + + auto snp = *tokens; // first field + // check whether SNP is included in ksnps (used by LOCO) + if (process_ksnps && ksnps.count(snp) == 0) + continue; + + tokens++; // skip nucleotide fields + tokens++; // skip nucleotide fields + // calc SNP stats geno_mean = 0.0; n_miss = 0; geno_var = 0.0; gsl_vector_set_all(geno_miss, 0); for (size_t i = 0; i < ni_total; ++i) { - ch_ptr = strtok(NULL, " , \t"); - if (strcmp(ch_ptr, "NA") == 0) { + tokens++; + enforce_str(tokens != rend, line + " number of fields"); + string field = *tokens; + if (field == "NA") { gsl_vector_set(geno_miss, i, 0); n_miss++; } else { - d = atof(ch_ptr); + d = stod(field); + // make sure genotype field contains a number + if (field != "0" && field != "0.0") + enforce_str(d != 0.0f, field); gsl_vector_set(geno, i, d); gsl_vector_set(geno_miss, i, 1); geno_mean += d; @@ -1406,23 +1430,28 @@ bool BimbamKin(const string &file_geno, vector<int> &indicator_snp, if (k_mode == 2 && geno_var != 0) { gsl_vector_scale(geno, 1.0 / sqrt(geno_var)); } + // set the SNP column ns_test gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % msize); - gsl_vector_memcpy(&Xlarge_col.vector, geno); + enforce_gsl(gsl_vector_memcpy(&Xlarge_col.vector, geno)); ns_test++; + // compute kinship matrix and return in matrix_kin a SNP at a time if (ns_test % msize == 0) { eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); gsl_matrix_set_zero(Xlarge); } } - if (ns_test % msize != 0) { eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); } cout << endl; - gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test); + // scale the kinship matrix + enforce_gsl(gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test)); + + // and transpose + // FIXME: the following is very slow for (size_t i = 0; i < ni_total; ++i) { for (size_t j = 0; j < i; ++j) { @@ -1430,6 +1459,8 @@ bool BimbamKin(const string &file_geno, vector<int> &indicator_snp, gsl_matrix_set(matrix_kin, i, j, d); } } + // GSL is faster - and there are even faster methods + // enforce_gsl(gsl_matrix_transpose(matrix_kin)); gsl_vector_free(geno); gsl_vector_free(geno_miss); @@ -1463,7 +1494,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, int n_bit; // Create a large matrix. - size_t msize = 10000; + const size_t msize = K_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize); gsl_matrix_set_zero(Xlarge); @@ -1583,7 +1614,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, // Read bimbam mean genotype file, the second time, recode "mean" // genotype and calculate K. -bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, +bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { igzstream infile(file_geno.c_str(), igzstream::in); @@ -3326,13 +3357,14 @@ bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat, // Read bimbam mean genotype file and calculate kinship matrix; this // time, the kinship matrix is not centered, and can contain multiple // K matrix. -bool BimbamKin(const string &file_geno, const int display_pace, - const vector<int> &indicator_idv, - const vector<int> &indicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<SNPINFO> &snpInfo, const gsl_matrix *W, - gsl_matrix *matrix_kin, gsl_vector *vector_ns) { +bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps, + const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns) { igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -3368,7 +3400,7 @@ bool BimbamKin(const string &file_geno, const int display_pace, } // Create a large matrix. - size_t msize = 10000; + const size_t msize = K_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc); gsl_matrix_set_zero(Xlarge); @@ -3378,9 +3410,8 @@ bool BimbamKin(const string &file_geno, const int display_pace, if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); } - if (indicator_snp[t] == 0) { + if (indicator_snp[t] == 0) continue; - } ch_ptr = strtok((char *)line.c_str(), " , \t"); ch_ptr = strtok(NULL, " , \t"); @@ -3562,7 +3593,7 @@ bool PlinkKin(const string &file_bed, const int display_pace, } // Create a large matrix. - size_t msize = 10000; + const size_t msize = K_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc); gsl_matrix_set_zero(Xlarge); @@ -3742,7 +3773,8 @@ bool PlinkKin(const string &file_bed, const int display_pace, } bool MFILEKin(const size_t mfile_mode, const string &file_mfile, - const int display_pace, const vector<int> &indicator_idv, + const set<string> setKSnps, const int display_pace, + const vector<int> &indicator_idv, const vector<vector<int>> &mindicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, @@ -3774,8 +3806,9 @@ bool MFILEKin(const size_t mfile_mode, const string &file_mfile, PlinkKin(file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp); } else { - BimbamKin(file_name, display_pace, indicator_idv, mindicator_snp[l], - mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp); + BimbamKinUncentered(file_name, setKSnps, display_pace, indicator_idv, + mindicator_snp[l], mapRS2weight, mapRS2cat, + msnpInfo[l], W, kin_tmp, ns_tmp); } // Add ns. @@ -34,7 +34,7 @@ void ProgressBar(string str, double p, double total); void ProgressBar(string str, double p, double total, double ratio); std::istream &safeGetline(std::istream &is, std::string &t); -bool ReadFile_snps(const string &file_snps, set<string> &setSnps); +bool ReadFile_snps(const string file_snps, set<string> &setSnps); bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps); bool ReadFile_log(const string &file_log, double &pheno_mean); @@ -83,13 +83,14 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv, void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U); void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval); -bool BimbamKin(const string &file_geno, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin); +bool BimbamKin(const string file_geno, const set<string> ksnps, + vector<int> &indicator_snp, const int k_mode, + const int display_pace, gsl_matrix *matrix_kin, + const bool test_nind); bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin); -bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, +bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, @@ -124,13 +125,14 @@ bool ReadFile_catc(const string &file_cat, bool ReadFile_mcatc(const string &file_mcat, map<string, vector<double>> &mapRS2catc, size_t &n_cat); -bool BimbamKin(const string &file_geno, const int display_pace, - const vector<int> &indicator_idv, - const vector<int> &indicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<SNPINFO> &snpInfo, const gsl_matrix *W, - gsl_matrix *matrix_kin, gsl_vector *vector_ns); +bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps, + const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns); bool PlinkKin(const string &file_bed, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, @@ -139,7 +141,8 @@ bool PlinkKin(const string &file_bed, const int display_pace, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns); bool MFILEKin(const size_t mfile_mode, const string &file_mfile, - const int display_pace, const vector<int> &indicator_idv, + const set<string> setKSnps, const int display_pace, + const vector<int> &indicator_idv, const vector<vector<int>> &mindicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, diff --git a/src/lmm.cpp b/src/lmm.cpp index 3f51073..eb76265 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -80,6 +80,7 @@ void LMM::CopyFromParam(PARAM &cPar) { indicator_idv = cPar.indicator_idv; indicator_snp = cPar.indicator_snp; snpInfo = cPar.snpInfo; + setGWASnps = cPar.setGWASnps; return; } @@ -165,6 +166,7 @@ void LMM::WriteFiles() { } } } else { + bool process_gwasnps = setGWASnps.size(); outfile << "chr" << "\t" << "rs" @@ -217,9 +219,13 @@ void LMM::WriteFiles() { size_t t = 0; for (size_t i = 0; i < snpInfo.size(); ++i) { - if (indicator_snp[i] == 0) { + + if (indicator_snp[i] == 0) continue; - } + auto snp = snpInfo[i].rs_number; + if (process_gwasnps && setGWASnps.count(snp) == 0) + continue; + // cout << t << endl; outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t" @@ -1311,78 +1317,126 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y) { - igzstream infile(file_geno.c_str(), igzstream::in); - if (!infile) { - cout << "error reading genotype file:" << file_geno << endl; - return; - } - + const gsl_matrix *W, const gsl_vector *y, + const set<string> gwasnps) { clock_t time_start = clock(); - string line; - char *ch_ptr; - - double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; - double p_lrt = 0, p_score = 0; - double logl_H1 = 0.0; - int n_miss, c_phen; - double geno, x_mean; + // LOCO support + bool process_gwasnps = gwasnps.size(); + if (process_gwasnps) + debug_msg("AnalyzeBimbam w. LOCO"); // Calculate basic quantities. size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - gsl_vector *x = gsl_vector_alloc(U->size1); - gsl_vector *x_miss = gsl_vector_alloc(U->size1); + const size_t inds = U->size1; + gsl_vector *x = gsl_vector_alloc(inds); // #inds + gsl_vector *x_miss = gsl_vector_alloc(inds); gsl_vector *Utx = gsl_vector_alloc(U->size2); gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); gsl_vector *ab = gsl_vector_alloc(n_index); - // Create a large matrix. - size_t msize = 10000; - gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix_set_zero(Xlarge); + // Create a large matrix with LMM_BATCH_SIZE columns for batched processing + // const size_t msize=(process_gwasnps ? 1 : LMM_BATCH_SIZE); + const size_t msize = LMM_BATCH_SIZE; + gsl_matrix *Xlarge = gsl_matrix_alloc(inds, msize); + gsl_matrix *UtXlarge = gsl_matrix_alloc(inds, msize); + enforce_msg(Xlarge && UtXlarge, "Xlarge memory check"); // just to be sure + gsl_matrix_set_zero(Xlarge); gsl_matrix_set_zero(Uab); CalcUab(UtW, Uty, Uab); // start reading genotypes and analyze - size_t c = 0, t_last = 0; - for (size_t t = 0; t < indicator_snp.size(); ++t) { - if (indicator_snp[t] == 0) { - continue; + size_t c = 0; + + igzstream infile(file_geno.c_str(), igzstream::in); + enforce_msg(infile, "error reading genotype file"); + + auto batch_compute = [&](size_t l) { // using a C++ closure + // Compute SNPs in batch, note the computations are independent per SNP + gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l); + gsl_matrix_view UtXlarge_sub = + gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l); + + time_start = clock(); + eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, + &UtXlarge_sub.matrix); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Xlarge); + for (size_t i = 0; i < l; i++) { + // for every batch... + gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); + gsl_vector_memcpy(Utx, &UtXlarge_col.vector); + + CalcUab(UtW, Uty, Utx, Uab); + + time_start = clock(); + FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; + + double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; + double p_lrt = 0, p_score = 0; + double logl_H1 = 0.0; + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + CalcRLScore(l_mle_null, param1, beta, se, p_score); + } + + if (a_mode == 1 || a_mode == 4) { + // for univariate a_mode is 1 + CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); + CalcRLWald(lambda_remle, param1, beta, se, p_wald); + } + + if (a_mode == 2 || a_mode == 4) { + CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); } - t_last++; - } + }; + for (size_t t = 0; t < indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); + // for every SNP + string line; + safeGetline(infile, line); if (t % d_pace == 0 || t == (ns_total - 1)) { ProgressBar("Reading SNPs ", t, ns_total - 1); } - if (indicator_snp[t] == 0) { + if (indicator_snp[t] == 0) continue; - } - ch_ptr = strtok((char *)line.c_str(), " , \t"); + char *ch_ptr = strtok((char *)line.c_str(), " , \t"); + auto snp = string(ch_ptr); + // check whether SNP is included in gwasnps (used by LOCO) + if (process_gwasnps && gwasnps.count(snp) == 0) + continue; ch_ptr = strtok(NULL, " , \t"); ch_ptr = strtok(NULL, " , \t"); - x_mean = 0.0; - c_phen = 0; - n_miss = 0; + double x_mean = 0.0; + int c_phen = 0; + int n_miss = 0; gsl_vector_set_zero(x_miss); for (size_t i = 0; i < ni_total; ++i) { + // get the genotypes per individual and compute stats per SNP ch_ptr = strtok(NULL, " , \t"); - if (indicator_idv[i] == 0) { + if (indicator_idv[i] == 0) continue; - } if (strcmp(ch_ptr, "NA") == 0) { gsl_vector_set(x_miss, c_phen, 0.0); n_miss++; } else { - geno = atof(ch_ptr); + double geno = atof(ch_ptr); gsl_vector_set(x, c_phen, geno); gsl_vector_set(x_miss, c_phen, 1.0); @@ -1398,66 +1452,16 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, gsl_vector_set(x, i, x_mean); } } - + // copy genotype values for SNP into Xlarge cache gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); gsl_vector_memcpy(&Xlarge_col.vector, x); - c++; - - if (c % msize == 0 || c == t_last) { - size_t l = 0; - if (c % msize == 0) { - l = msize; - } else { - l = c % msize; - } - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start = clock(); - eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - gsl_matrix_set_zero(Xlarge); + c++; // count SNPs going in - for (size_t i = 0; i < l; i++) { - gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); - gsl_vector_memcpy(Utx, &UtXlarge_col.vector); - - CalcUab(UtW, Uty, Utx, Uab); - - time_start = clock(); - FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; - - // 3 is before 1. - if (a_mode == 3 || a_mode == 4) { - CalcRLScore(l_mle_null, param1, beta, se, p_score); - } - - if (a_mode == 1 || a_mode == 4) { - CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, - logl_H1); - CalcRLWald(lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode == 2 || a_mode == 4) { - CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); - p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); - } - - time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; - - sumStat.push_back(SNPs); - } - } + if (c == msize) + batch_compute(msize); } + batch_compute(c % msize); + // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl; cout << endl; gsl_vector_free(x); @@ -1505,7 +1509,7 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, gsl_vector *ab = gsl_vector_alloc(n_index); // Create a large matrix. - size_t msize = 10000; + size_t msize = LMM_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix_set_zero(Xlarge); @@ -1528,9 +1532,8 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, size_t c = 0, t_last = 0; for (size_t t = 0; t < snpInfo.size(); ++t) { - if (indicator_snp[t] == 0) { + if (indicator_snp[t] == 0) continue; - } t_last++; } for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { @@ -1697,7 +1700,7 @@ void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, gsl_vector *ab = gsl_vector_alloc(n_index); // Create a large matrix. - size_t msize = 10000; + size_t msize = LMM_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix_set_zero(Xlarge); @@ -26,6 +26,8 @@ using namespace std; +#define LMM_BATCH_SIZE 10000 // used for batch processing + class FUNC_PARAM { public: @@ -78,6 +80,7 @@ public: vector<int> indicator_snp; vector<SNPINFO> snpInfo; // Record SNP information. + set<string> setGWASnps; // Record SNP information. // Not included in PARAM. vector<SUMSTAT> sumStat; // Output SNPSummary Data. @@ -97,7 +100,8 @@ public: const gsl_matrix *W, const gsl_vector *y); void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y); + const gsl_matrix *W, const gsl_vector *y, + const set<string> gwasnps); void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index f1ab3fc..eb591ca 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -2974,7 +2974,7 @@ void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, string line; // Create a large matrix. - size_t msize = 10000; + size_t msize = LMM_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix_set_zero(Xlarge); @@ -3531,7 +3531,7 @@ void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2; // Create a large matrix. - size_t msize = 10000; + size_t msize = LMM_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix_set_zero(Xlarge); @@ -3968,7 +3968,7 @@ void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2; // Create a large matrix. - size_t msize = 10000; + size_t msize = LMM_BATCH_SIZE; gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); gsl_matrix_set_zero(Xlarge); diff --git a/src/param.cpp b/src/param.cpp index 2572bbb..6ab4339 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -38,6 +38,54 @@ using namespace std; +// ---- Helper functions which do not use the PARAM scope + +// Calculate the SNP sets as used in LOCO from mapchr which was filled +// from the annotation file. Returns ksnps (used for K) and gwasnps (used for +// GWA). Both are complimentary with each other and subsets of setSnps. + +void LOCO_set_Snps(set<string> &ksnps, set<string> &gwasnps, + const map<string, string> mapchr, const string loco) { + enforce_msg(ksnps.size() == 0, "make sure knsps is not initialized twice"); + enforce_msg(gwasnps.size() == 0, + "make sure gwasnps is not initialized twice"); + for (auto &kv : mapchr) { + auto snp = kv.first; + auto chr = kv.second; + if (chr != loco) { + ksnps.insert(snp); + } else { + gwasnps.insert(snp); + } + } +} + +// Trim #individuals to size which is used to write tests that run faster +// +// Note it actually trims the number of functional individuals +// (indicator_idv[x] == 1). This should match indicator_cvt etc. If +// this gives problems with certain sets we can simply trim to size. + +void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) { + if (ni_max) { + size_t count = 0; + for (auto ind = idvs.begin(); ind != idvs.end(); ++ind) { + if (*ind) + count++; + if (count >= ni_max) + break; + } + if (count != idvs.size()) { + if (debug) + cout << "**** TEST MODE: trim individuals from " << idvs.size() + << " to " << count << endl; + idvs.resize(count); + } + } +} + +// ---- PARAM class implementation + PARAM::PARAM(void) : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000), file_out("result"), path_out("./output/"), miss_level(0.05), @@ -96,6 +144,15 @@ void PARAM::ReadFiles(void) { setSnps.clear(); } + // Read KSNP set. + if (!file_ksnps.empty()) { + if (ReadFile_snps(file_ksnps, setKSnps) == false) { + error = true; + } + } else { + setKSnps.clear(); + } + // For prediction. if (!file_epm.empty()) { if (ReadFile_est(file_epm, est_column, mapRS2est) == false) { @@ -164,6 +221,7 @@ void PARAM::ReadFiles(void) { } else { n_cvt = 1; } + trim_individuals(indicator_cvt, ni_max, mode_debug); if (!file_gxe.empty()) { if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) { @@ -176,6 +234,8 @@ void PARAM::ReadFiles(void) { } } + trim_individuals(indicator_idv, ni_max, mode_debug); + // WJA added. // Read genotype and phenotype file for bgen format. if (!file_oxford.empty()) { @@ -273,12 +333,15 @@ void PARAM::ReadFiles(void) { gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); CopyCvt(W); + trim_individuals(indicator_idv, ni_max, mode_debug); + trim_individuals(indicator_cvt, ni_max, mode_debug); if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp, maf_level, miss_level, hwe_level, r2_level, mapRS2chr, mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) { error = true; } gsl_matrix_free(W); + ns_total = indicator_snp.size(); } @@ -457,6 +520,11 @@ void PARAM::ReadFiles(void) { // ni_test, save all useful covariates. ProcessCvtPhen(); } + + // Compute setKSnps when -loco is passed in + if (!loco.empty()) { + LOCO_set_Snps(setKSnps, setGWASnps, mapRS2chr, loco); + } return; } @@ -869,22 +937,22 @@ void PARAM::CheckParam(void) { error = true; } - str = file_snps; - if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { - cout << "error! fail to open snps file: " << str << endl; - error = true; - } - - str = file_log; - if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { - cout << "error! fail to open log file: " << str << endl; - error = true; - } + enforce_fexists(file_snps, "open file"); + enforce_fexists(file_ksnps, "open file"); + enforce_fexists(file_gwasnps, "open file"); + enforce_fexists(file_log, "open file"); + enforce_fexists(file_anno, "open file"); - str = file_anno; - if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { - cout << "error! fail to open annotation file: " << str << endl; - error = true; + if (!loco.empty()) { + enforce_msg((a_mode >= 1 && a_mode <= 4) || a_mode == 21 || a_mode == 22, + "LOCO only works with LMM and K"); + enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)"); + enforce_msg(file_oxford.empty(), "LOCO does not work with Oxford (yet)"); + enforce_msg(file_gxe.empty(), "LOCO does not support GXE (yet)"); + enforce_msg(!file_anno.empty(), + "LOCO requires annotation file (-a switch)"); + enforce_msg(file_ksnps.empty(), "LOCO does not allow -ksnps switch"); + enforce_msg(file_gwasnps.empty(), "LOCO does not allow -gwasnps switch"); } str = file_kin; @@ -1005,7 +1073,7 @@ void PARAM::CheckData(void) { (indicator_cvt).size() != (indicator_idv).size()) { error = true; cout << "error! number of rows in the covariates file do not " - << "match the number of individuals. " << endl; + << "match the number of individuals. " << indicator_cvt.size() << endl; return; } if ((indicator_gxe).size() != 0 && @@ -1123,7 +1191,15 @@ void PARAM::CheckData(void) { if (!file_gene.empty()) { cout << "## number of total genes = " << ng_total << endl; } else if (file_epm.empty() && a_mode != 43 && a_mode != 5) { - cout << "## number of total SNPs = " << ns_total << endl; + if (!loco.empty()) + cout << "## leave one chromosome out (LOCO) = " << loco << endl; + cout << "## number of total SNPs = " << ns_total << endl; + if (setSnps.size()) + cout << "## number of considered SNPS = " << setSnps.size() << endl; + if (setKSnps.size()) + cout << "## number of SNPS for K = " << setKSnps.size() << endl; + if (setGWASnps.size()) + cout << "## number of SNPS for GWAS = " << setGWASnps.size() << endl; cout << "## number of analyzed SNPs = " << ns_test << endl; } else { } @@ -1297,20 +1373,22 @@ void PARAM::CalcKin(gsl_matrix *matrix_kin) { if (!file_bfile.empty()) { file_str = file_bfile + ".bed"; + enforce_msg(loco.empty(), "FIXME: LOCO nyi"); if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == false) { error = true; } } else if (!file_oxford.empty()) { file_str = file_oxford + ".bgen"; + enforce_msg(loco.empty(), "FIXME: LOCO nyi"); if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == false) { error = true; } } else { file_str = file_geno; - if (BimbamKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == - false) { + if (BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace, + matrix_kin, ni_max == 0) == false) { error = true; } } @@ -1720,37 +1798,43 @@ void PARAM::CalcS(const map<string, double> &mapRS2wA, } else if (!file_geno.empty()) { file_str = file_geno; if (mapRS2wA.size() == 0) { - if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, - mapRS2cat, snpInfo, W, K, ns) == false) { + if (BimbamKinUncentered(file_str, setKSnps, d_pace, indicator_idv, + indicator_snp, mapRS2wK, mapRS2cat, snpInfo, W, K, + ns) == false) { error = true; } } else { - if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, - mapRS2cat, snpInfo, W, A, ns) == false) { + if (BimbamKinUncentered(file_str, setKSnps, d_pace, indicator_idv, + indicator_snp, mapRS2wA, mapRS2cat, snpInfo, W, A, + ns) == false) { error = true; } } } else if (!file_mbfile.empty()) { if (mapRS2wA.size() == 0) { - if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp, - mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) { + if (MFILEKin(1, file_mbfile, setKSnps, d_pace, indicator_idv, + mindicator_snp, mapRS2wK, mapRS2cat, msnpInfo, W, K, + ns) == false) { error = true; } } else { - if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp, - mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) { + if (MFILEKin(1, file_mbfile, setKSnps, d_pace, indicator_idv, + mindicator_snp, mapRS2wA, mapRS2cat, msnpInfo, W, A, + ns) == false) { error = true; } } } else if (!file_mgeno.empty()) { if (mapRS2wA.size() == 0) { - if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp, - mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) { + if (MFILEKin(0, file_mgeno, setKSnps, d_pace, indicator_idv, + mindicator_snp, mapRS2wK, mapRS2cat, msnpInfo, W, K, + ns) == false) { error = true; } } else { - if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp, - mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) { + if (MFILEKin(0, file_mgeno, setKSnps, d_pace, indicator_idv, + mindicator_snp, mapRS2wA, mapRS2cat, msnpInfo, W, A, + ns) == false) { error = true; } } diff --git a/src/param.h b/src/param.h index 33e2431..45d8c0f 100644 --- a/src/param.h +++ b/src/param.h @@ -19,12 +19,15 @@ #ifndef __PARAM_H__ #define __PARAM_H__ +#include "debug.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" #include <map> #include <set> #include <vector> +#define K_BATCH_SIZE 10000 // #snps used for batched K + using namespace std; class SNPINFO { @@ -110,6 +113,7 @@ class PARAM { public: // IO-related parameters. bool mode_silence; + bool mode_debug = false; int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; vector<size_t> p_column; // Which phenotype column needs analysis. @@ -135,12 +139,14 @@ public: string file_bf, file_hyp; string path_out; - string file_epm; // Estimated parameter file. - string file_ebv; // Estimated breeding value file. - string file_log; // Log file containing mean estimate. - string file_read; // File containing total number of reads. - string file_gene; // Gene expression file. - string file_snps; // File containing analyzed SNPs or genes. + string file_epm; // Estimated parameter file. + string file_ebv; // Estimated breeding value file. + string file_log; // Log file containing mean estimate. + string file_read; // File containing total number of reads. + string file_gene; // Gene expression file. + string file_snps; // File containing analyzed SNPs or genes. + string file_ksnps; // File SNPs for computing K + string file_gwasnps; // File SNPs for computing GWAS // WJA added. string file_oxford; @@ -152,6 +158,7 @@ public: double r2_level; // LMM-related parameters. + string loco; double l_min; double l_max; size_t n_region; @@ -215,6 +222,7 @@ public: // Number of individuals. size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; + size_t ni_max = 0; // -nind switch for testing purposes // Number of observed and missing phenotypes. size_t np_obs, np_miss; @@ -305,7 +313,9 @@ public: vector<SNPINFO> snpInfo; // Record SNP information. vector<vector<SNPINFO>> msnpInfo; // Record SNP information. - set<string> setSnps; // Set of snps for analysis. + set<string> setSnps; // Set of snps for analysis (-snps). + set<string> setKSnps; // Set of snps for K (-ksnps and LOCO) + set<string> setGWASnps; // Set of snps for GWA (-gwasnps and LOCO) // Constructor. PARAM(); @@ -351,4 +361,10 @@ public: size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt); +// Helpers for checking parameters +#define enforce_fexists(fn, msg) \ + if (!fn.empty()) \ + enforce_msg(stat(fn.c_str(), &fileInfo) == 0, \ + ((std::string(__STRING(fn)) + ": " + msg).c_str())); + #endif @@ -40,6 +40,8 @@ public: bool noconstrain; }; +// Methods for variant component (VC) estimation + class VC { public: diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh new file mode 100644 index 0000000..522cf3d --- /dev/null +++ b/test/dev_test_suite.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +gemma=../bin/gemma + +testCenteredRelatednessMatrixKLOCO1() { + outn=mouse_hs1940_LOCO1 + $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \ + -a ../example/mouse_hs1940.anno.txt -snps ../example/mouse_hs1940_snps.txt -nind 400 -loco 1 -gk -debug -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + outfn=output/$outn.cXX.txt + assertEquals 0 $? + assertEquals "400" `wc -l < $outfn` + assertEquals "0.312" `head -c 5 $outfn` + assertEquals "71.03" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testUnivariateLinearMixedModelLOCO1() { + outn=mouse_hs1940_CD8_LOCO1_lmm + $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -loco 1 \ + -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940_LOCO1.cXX.txt \ + -snps ../example/mouse_hs1940_snps.txt -lmm \ + -nind 400 \ + -debug \ + -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + assertEquals 0 $? + outfn=output/$outn.assoc.txt + assertEquals "68" `wc -l < $outfn` + assertEquals "15465553.30" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +shunit2=`which shunit2` + +if [ -x "$shunit2" ]; then + echo run system shunit2 + . $shunit2 +elif [ -e shunit2-2.0.3/src/shell/shunit2 ]; then + echo run shunit2 provided in gemma repo + . shunit2-2.0.3/src/shell/shunit2 +else + echo "Can not find shunit2 - see INSTALL.md" +fi diff --git a/test/test_suite.sh b/test/test_suite.sh index 467056e..625298e 100755 --- a/test/test_suite.sh +++ b/test/test_suite.sh @@ -2,11 +2,43 @@ gemma=../bin/gemma +testCenteredRelatednessMatrixKFullLOCO1() { + outn=mouse_hs1940_full_LOCO1 + $gemma -g ../example/mouse_hs1940.geno.txt.gz \ + -p ../example/mouse_hs1940.pheno.txt \ + -a ../example/mouse_hs1940.anno.txt \ + -loco 1 -gk -debug -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + outfn=output/$outn.cXX.txt + assertEquals 0 $? + assertEquals "1940" `wc -l < $outfn` + assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + +testUnivariateLinearMixedModelFullLOCO1() { + outn=mouse_hs1940_CD8_full_LOCO1_lmm + $gemma -g ../example/mouse_hs1940.geno.txt.gz \ + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -loco 1 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940_full_LOCO1.cXX.txt \ + -lmm \ + -debug \ + -o $outn + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + assertEquals 0 $? + outfn=output/$outn.assoc.txt + assertEquals "951" `wc -l < $outfn` + assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` +} + testCenteredRelatednessMatrixK() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt \ - -a ../example/mouse_hs1940.anno.txt \ - -gk -o mouse_hs1940 + -p ../example/mouse_hs1940.pheno.txt \ + -gk -o mouse_hs1940 assertEquals 0 $? grep "total computation time" < output/mouse_hs1940.log.txt assertEquals 0 $? @@ -14,36 +46,40 @@ testCenteredRelatednessMatrixK() { assertEquals "1940" `wc -l < $outfn` assertEquals "3763600" `wc -w < $outfn` assertEquals "0.335" `head -c 5 $outfn` - assertEquals "24.9799" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testUnivariateLinearMixedModel() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt -n 1 \ - -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \ - -lmm -o mouse_hs1940_CD8_lmm + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940.cXX.txt \ + -lmm \ + -o mouse_hs1940_CD8_lmm assertEquals 0 $? grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt assertEquals 0 $? outfn=output/mouse_hs1940_CD8_lmm.assoc.txt assertEquals "118459" `wc -w < $outfn` - assertEquals "92047" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testMultivariateLinearMixedModel() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ - -p ../example/mouse_hs1940.pheno.txt -n 1 6 \ - -a ../example/mouse_hs1940.anno.txt -k ./output/mouse_hs1940.cXX.txt \ - -lmm -o mouse_hs1940_CD8MCH_lmm + -p ../example/mouse_hs1940.pheno.txt \ + -n 1 6 \ + -a ../example/mouse_hs1940.anno.txt \ + -k ./output/mouse_hs1940.cXX.txt \ + -lmm -o mouse_hs1940_CD8MCH_lmm assertEquals 0 $? grep "total computation time" < output/mouse_hs1940_CD8MCH_lmm.log.txt assertEquals 0 $? outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt assertEquals "139867" `wc -w < $outfn` - assertEquals "92079" `perl -nle '$sum += substr($_,0,6) } END { print $sum' $outfn` + assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } - shunit2=`which shunit2` if [ -x "$shunit2" ]; then |