diff options
-rw-r--r-- | src/gemma.cpp | 923 | ||||
-rw-r--r-- | src/lmm.cpp | 1373 | ||||
-rw-r--r-- | src/param.cpp | 59 |
3 files changed, 1249 insertions, 1106 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp index bc29e9d..11b33c1 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <iostream> @@ -31,22 +31,7 @@ #include "gsl/gsl_eigen.h" #include "gsl/gsl_cdf.h" -#include "lapack.h" //for functions EigenDecomp - -#ifdef FORCE_FLOAT -#include "io_float.h" //for function ReadFile_kin -#include "gemma_float.h" -#include "vc_float.h" -#include "lm_float.h" //for LM class -#include "bslmm_float.h" //for BSLMM class -#include "bslmmdap_float.h" //for BSLMMDAP class -#include "ldr_float.h" //for LDR class -#include "lmm_float.h" //for LMM class, and functions CalcLambda, CalcPve, CalcVgVe -#include "mvlmm_float.h" //for MVLMM class -#include "prdt_float.h" //for PRDT class -#include "varcov_float.h" //for MVLMM class -#include "mathfunc_float.h" //for a few functions -#else +#include "lapack.h" #include "io.h" #include "gemma.h" #include "vc.h" @@ -59,410 +44,440 @@ #include "prdt.h" #include "varcov.h" #include "mathfunc.h" -#endif - using namespace std; - - GEMMA::GEMMA(void): version("0.96"), date("05/17/2017"), year("2017") {} -void GEMMA::PrintHeader (void) -{ - cout<<endl; - cout<<"*********************************************************"<<endl; - cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl; - cout<<" Version "<<version<<", "<<date<<" "<<endl; - cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl; - cout<<" (C) "<<year<<" Xiang Zhou "<<endl; - cout<<" GNU General Public License "<<endl; - cout<<" For Help, Type ./gemma -h "<<endl; - cout<<"*********************************************************"<<endl; - cout<<endl; - - return; +void GEMMA::PrintHeader (void) { + cout<<endl; + cout<<"*********************************************************"<<endl; + cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl; + cout<<" Version "<<version<<", "<<date<<" "<< + endl; + cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl; + cout<<" (C) "<<year<<" Xiang Zhou "<<endl; + cout<<" GNU General Public License "<<endl; + cout<<" For Help, Type ./gemma -h "<<endl; + cout<<"*********************************************************"<<endl; + cout<<endl; + + return; } - -void GEMMA::PrintLicense (void) -{ +void GEMMA::PrintLicense (void) { cout<<endl; - cout<<"The Software Is Distributed Under GNU General Public License, But May Also Require The Following Notifications."<<endl; + cout<<"The Software Is Distributed Under GNU General Public "<< + "License, But May Also Require The Following Notifications."<<endl; cout<<endl; - cout<<"Including Lapack Routines In The Software May Require The Following Notification:"<<endl; - cout<<"Copyright (c) 1992-2010 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved."<<endl; - cout<<"Copyright (c) 2000-2010 The University of California Berkeley. All rights reserved."<<endl; - cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. All rights reserved."<<endl; + cout<<"Including Lapack Routines In The Software May Require"<< + " The Following Notification:"<<endl; + cout<<"Copyright (c) 1992-2010 The University of Tennessee and "<< + "The University of Tennessee Research Foundation. All rights "<< + "reserved."<<endl; + cout<<"Copyright (c) 2000-2010 The University of California "<< + "Berkeley. All rights reserved."<<endl; + cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. "<< + "All rights reserved."<<endl; cout<<endl; cout<<"$COPYRIGHT$"<<endl; cout<<"Additional copyrights may follow"<<endl; cout<<"$HEADER$"<<endl; - cout<<"Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:"<<endl; - cout<<"- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer."<<endl; - cout<<"- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution."<<endl; - cout<<"- Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission."<<endl; - cout<<"The copyright holders provide no reassurances that the source code provided does not infringe any patent, copyright, or any other " - <<"intellectual property rights of third parties. The copyright holders disclaim any liability to any recipient for claims brought against " - <<"recipient by any third party for infringement of that parties intellectual property rights. "<<endl; - cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT " - <<"LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT " - <<"OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT " - <<"LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY " - <<"THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE " - <<"OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."<<endl; + cout<<"Redistribution and use in source and binary forms, with or "<< + "without modification, are permitted provided that the following "<< + " conditions are met:"<<endl; + cout<<"- Redistributions of source code must retain the above "<< + "copyright notice, this list of conditions and the following "<< + "disclaimer."<<endl; + cout<<"- Redistributions in binary form must reproduce the above "<< + "copyright notice, this list of conditions and the following "<< + "disclaimer listed in this license in the documentation and/or "<< + "other materials provided with the distribution."<<endl; + cout<<"- Neither the name of the copyright holders nor the names "<< + "of its contributors may be used to endorse or promote products "<< + "derived from this software without specific prior written "<< + "permission."<<endl; + cout<<"The copyright holders provide no reassurances that the "<< + "source code provided does not infringe any patent, copyright, "<< + "or any other "<< + "intellectual property rights of third parties. "<< + "The copyright holders disclaim any liability to any recipient "<< + "for claims brought against "<< + "recipient by any third party for infringement of that parties "<< + "intellectual property rights. "<<endl; + cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "<< + "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "<< + "INCLUDING, BUT NOT "<< + "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "<< + "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "<< + "SHALL THE COPYRIGHT "<< + "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "<< + "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "<< + "(INCLUDING, BUT NOT "<< + "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "<< + "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "<< + "HOWEVER CAUSED AND ON ANY "<< + "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "<< + "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "<< + "OUT OF THE USE "<< + "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "<< + "SUCH DAMAGE."<<endl; cout<<endl; - - return; } - - -void GEMMA::PrintHelp(size_t option) -{ - if (option==0) { - cout<<endl; - cout<<" GEMMA version "<<version<<", released on "<<date<<endl; - cout<<" implemented by Xiang Zhou"<<endl; - cout<<endl; - cout<<" type ./gemma -h [num] for detailed helps"<<endl; - cout<<" options: " << endl; - cout<<" 1: quick guide"<<endl; - cout<<" 2: file I/O related"<<endl; - cout<<" 3: SNP QC"<<endl; - cout<<" 4: calculate relatedness matrix"<<endl; - cout<<" 5: perform eigen decomposition"<<endl; - cout<<" 6: perform variance component estimation"<<endl; - cout<<" 7: fit a linear model"<<endl; - cout<<" 8: fit a linear mixed model"<<endl; - cout<<" 9: fit a multivariate linear mixed model"<<endl; - cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl; - cout<<" 11: obtain predicted values"<<endl; - cout<<" 12: calculate snp variance covariance"<<endl; - cout<<" 13: note"<<endl; - cout<<endl; - } - - if (option==1) { - cout<<" QUICK GUIDE" << endl; - cout<<" to generate a relatedness matrix: "<<endl; - cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl; - cout<<" to generate the S matrix: "<<endl; - cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl; - cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl; - cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl; - cout<<" to generate the q vector: "<<endl; - cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl; - cout<<" to generate the ldsc weigthts: "<<endl; - cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl; - cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl; - cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl; - cout<<" to estimate variance components: "<<endl; - cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl; - cout<<" options for the above two commands: -crt -windowbp [num]"<<endl; - cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl; - cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl; - cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl; - cout<<" to fit a linear mixed model: "<<endl; - cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" to fit a linear mixed model to test g by e effects: "<<endl; - cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl; - cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" to fit a multivariate linear mixed model: "<<endl; - cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl; - cout<<" to fit a Bayesian sparse linear mixed model: "<<endl; - cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl; - cout<<" to obtain predicted values: "<<endl; - cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl; - cout<<" to calculate correlations between SNPs: "<<endl; - cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl; - cout<<endl; - } - - if (option==2) { - cout<<" FILE I/O RELATED OPTIONS" << endl; - cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl; - cout<<" requires: *.fam, *.bim and *.bed files"<<endl; - cout<<" missing value: -9"<<endl; - cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl; - cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl; - cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl; - cout<<" format: phenotype for individual 1"<<endl; - cout<<" phenotype for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl; - cout<<" format: rs#1, base_position, chr_number"<<endl; - cout<<" rs#2, base_position, chr_number"<<endl; - cout<<" ..."<<endl; - // WJA added - cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl; - cout<<" requires: *.bgen, *.sample files"<<endl; - - cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl; - cout<<" format: variable for individual 1"<<endl; - cout<<" variable for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl; - cout<<" format: variable for individual 1"<<endl; - cout<<" variable for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl; - cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl; - cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl; - cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl; - cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl; - cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl; - cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl; - cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl; - cout<<" missing value: NA"<<endl; - cout<<" note: the intercept (a column of 1s) may need to be included"<<endl; - cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl; - cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl; - cout<<" options: n1: rs column number"<<endl; - cout<<" n2: estimated alpha column number (0 to ignore)"<<endl; - cout<<" n3: estimated beta column number (0 to ignore)"<<endl; - cout<<" n4: estimated gamma column number (0 to ignore)"<<endl; - cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl; - cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl; - cout<<" format: value for individual 1"<<endl; - cout<<" value for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl; - cout<<" -mu [num] "<<" specify input estimated mean value"<<endl; - cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl; - cout<<" format: header"<<endl; - cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl; - cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl; - cout<<" ..."<<endl; - cout<<" missing value: not allowed"<<endl; - cout<<" -r [filename] "<<" specify input total read count file name"<<endl; - cout<<" format: total read count for individual 1"<<endl; - cout<<" total read count for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl; - cout<<" format: rs#1"<<endl; - cout<<" rs#2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -silence "<<" silent terminal display"<<endl; - cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl; - cout<<" options: 1: \"n by n matrix\" format"<<endl; - cout<<" 2: \"id id value\" format"<<endl; - cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl; - cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl; - cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl; - cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl; - cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl; - cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl; - cout<<endl; - } - - if (option==3) { - cout<<" SNP QC OPTIONS" << endl; - cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl; - cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl; - cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl; - cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl; - cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl; - cout<<endl; - } - - if (option==4) { - cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl; - cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl; - cout<<" options: 1: centered XX^T/p"<<endl; - cout<<" 2: standardized XX^T/p"<<endl; - cout<<" note: non-polymorphic SNPs are excluded "<<endl; - cout<<endl; - } - - if (option==5) { - cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl; - cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl; - cout<<endl; - } - - if (option==6) { - cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl; - cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl; - cout<<" options (with kinship file): 1: HE regression (default)"<<endl; - cout<<" 2: REML"<<endl; - cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl; - cout<<" 2: Standardized genotypes"<<endl; - cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl; - cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl; - cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl; - cout<<endl; - } - - if (option==7) { - cout<<" LINEAR MODEL OPTIONS" << endl; - cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl; - cout<<" options: 1: Wald test"<<endl; - cout<<" 2: Likelihood ratio test"<<endl; - cout<<" 3: Score test"<<endl; - cout<<" 4: 1-3"<<endl; - cout<<endl; - } - - if (option==8) { - cout<<" LINEAR MIXED MODEL OPTIONS" << endl; - cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl; - cout<<" options: 1: Wald test"<<endl; - cout<<" 2: Likelihood ratio test"<<endl; - cout<<" 3: Score test"<<endl; - cout<<" 4: 1-3"<<endl; - cout<<" 5: Parameter estimation in the null model only"<<endl; - cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl; - cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl; - cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl; - cout<<endl; - } - - if (option==9) { - cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl; - cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl; - cout<<" -emi "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl; - cout<<" -nri "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl; - cout<<" -emp "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl; - cout<<" -nrp "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl; - cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl; - cout<<endl; - } - - if (option==10) { - cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl; - cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl; - cout<<" options: 1: BSLMM"<<endl; - cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl; - cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl; - cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl; - cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl; - - cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl; - cout<<" options: 1: LDR"<<endl; - - cout<<" MCMC OPTIONS" << endl; - cout<<" Prior" << endl; - cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl; - cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl; - cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl; - cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl; - cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl; - cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl; - cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl; - cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl; - - cout<<" Proposal" << endl; - cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl; - cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; - cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; - cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl; - - cout<<" Others" << endl; - cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl; - cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl; - cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl; - cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl; - cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl; - cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl; - cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl; - cout<<endl; - } - - if (option==11) { - cout<<" PREDICTION OPTIONS" << endl; - cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl; - cout<<" options: 1: predict for individuals with missing phenotypes"<<endl; - cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl; - cout<<endl; - } - - if (option==12) { - cout<<" CALC CORRELATION OPTIONS" << endl; - cout<<" -calccor "<<endl; - cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl; - cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl; - cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl; - cout<<endl; - } - - if (option==13) { - cout<<" NOTE"<<endl; - cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl; - cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl; - cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl; - cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl; - cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl; - cout<<endl; - } - - return; +void GEMMA::PrintHelp(size_t option) { + if (option==0) { + cout<<endl; + cout<<" GEMMA version "<<version<<", released on "<<date<<endl; + cout<<" implemented by Xiang Zhou"<<endl; + cout<<endl; + cout<<" type ./gemma -h [num] for detailed helps"<<endl; + cout<<" options: " << endl; + cout<<" 1: quick guide"<<endl; + cout<<" 2: file I/O related"<<endl; + cout<<" 3: SNP QC"<<endl; + cout<<" 4: calculate relatedness matrix"<<endl; + cout<<" 5: perform eigen decomposition"<<endl; + cout<<" 6: perform variance component estimation"<<endl; + cout<<" 7: fit a linear model"<<endl; + cout<<" 8: fit a linear mixed model"<<endl; + cout<<" 9: fit a multivariate linear mixed model"<<endl; + cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl; + cout<<" 11: obtain predicted values"<<endl; + cout<<" 12: calculate snp variance covariance"<<endl; + cout<<" 13: note"<<endl; + cout<<endl; + } + + if (option==1) { + cout<<" QUICK GUIDE" << endl; + cout<<" to generate a relatedness matrix: "<<endl; + cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl; + cout<<" to generate the S matrix: "<<endl; + cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl; + cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl; + cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl; + cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl; + cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl; + cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl; + cout<<" to generate the q vector: "<<endl; + cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl; + cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl; + cout<<" to generate the ldsc weigthts: "<<endl; + cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl; + cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl; + cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl; + cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl; + cout<<" to estimate variance components: "<<endl; + cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl; + cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl; + cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl; + cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl; + cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl; + cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl; + cout<<" options for the above two commands: -crt -windowbp [num]"<<endl; + cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl; + cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl; + cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl; + cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl; + cout<<" to fit a linear mixed model: "<<endl; + cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; + cout<<" to fit a linear mixed model to test g by e effects: "<<endl; + cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; + cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl; + cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; + cout<<" to fit a multivariate linear mixed model: "<<endl; + cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl; + cout<<" to fit a Bayesian sparse linear mixed model: "<<endl; + cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl; + cout<<" to obtain predicted values: "<<endl; + cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl; + cout<<" to calculate correlations between SNPs: "<<endl; + cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl; + cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl; + cout<<endl; + } + + if (option==2) { + cout<<" FILE I/O RELATED OPTIONS" << endl; + cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl; + cout<<" requires: *.fam, *.bim and *.bed files"<<endl; + cout<<" missing value: -9"<<endl; + cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl; + cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl; + cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl; + cout<<" ..."<<endl; + cout<<" missing value: NA"<<endl; + cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl; + cout<<" format: phenotype for individual 1"<<endl; + cout<<" phenotype for individual 2"<<endl; + cout<<" ..."<<endl; + cout<<" missing value: NA"<<endl; + cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl; + cout<<" format: rs#1, base_position, chr_number"<<endl; + cout<<" rs#2, base_position, chr_number"<<endl; + cout<<" ..."<<endl; + + // WJA added. + cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl; + cout<<" requires: *.bgen, *.sample files"<<endl; + + cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl; + cout<<" format: variable for individual 1"<<endl; + cout<<" variable for individual 2"<<endl; + cout<<" ..."<<endl; + cout<<" missing value: NA"<<endl; + cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl; + cout<<" format: variable for individual 1"<<endl; + cout<<" variable for individual 2"<<endl; + cout<<" ..."<<endl; + cout<<" missing value: NA"<<endl; + cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl; + cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl; + cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl; + cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl; + cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl; + cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl; + cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl; + cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl; + cout<<" missing value: NA"<<endl; + cout<<" note: the intercept (a column of 1s) may need to be included"<<endl; + cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl; + cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl; + cout<<" options: n1: rs column number"<<endl; + cout<<" n2: estimated alpha column number (0 to ignore)"<<endl; + cout<<" n3: estimated beta column number (0 to ignore)"<<endl; + cout<<" n4: estimated gamma column number (0 to ignore)"<<endl; + cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl; + cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl; + cout<<" format: value for individual 1"<<endl; + cout<<" value for individual 2"<<endl; + cout<<" ..."<<endl; + cout<<" missing value: NA"<<endl; + cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl; + cout<<" -mu [num] "<<" specify input estimated mean value"<<endl; + cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl; + cout<<" format: header"<<endl; + cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl; + cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl; + cout<<" ..."<<endl; + cout<<" missing value: not allowed"<<endl; + cout<<" -r [filename] "<<" specify input total read count file name"<<endl; + cout<<" format: total read count for individual 1"<<endl; + cout<<" total read count for individual 2"<<endl; + cout<<" ..."<<endl; + cout<<" missing value: NA"<<endl; + cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl; + cout<<" format: rs#1"<<endl; + cout<<" rs#2"<<endl; + cout<<" ..."<<endl; + cout<<" missing value: NA"<<endl; + cout<<" -silence "<<" silent terminal display"<<endl; + cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl; + cout<<" options: 1: \"n by n matrix\" format"<<endl; + cout<<" 2: \"id id value\" format"<<endl; + cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl; + cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl; + cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl; + cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl; + cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl; + cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl; + cout<<endl; + } + + if (option==3) { + cout<<" SNP QC OPTIONS" << endl; + cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl; + cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl; + cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl; + cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl; + cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl; + cout<<endl; + } + + if (option==4) { + cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl; + cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl; + cout<<" options: 1: centered XX^T/p"<<endl; + cout<<" 2: standardized XX^T/p"<<endl; + cout<<" note: non-polymorphic SNPs are excluded "<<endl; + cout<<endl; + } + + if (option==5) { + cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl; + cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl; + cout<<endl; + } + + if (option==6) { + cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl; + cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl; + cout<<" options (with kinship file): 1: HE regression (default)"<<endl; + cout<<" 2: REML"<<endl; + cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl; + cout<<" 2: Standardized genotypes"<<endl; + cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl; + cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl; + cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl; + cout<<endl; + } + + if (option==7) { + cout<<" LINEAR MODEL OPTIONS" << endl; + cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl; + cout<<" options: 1: Wald test"<<endl; + cout<<" 2: Likelihood ratio test"<<endl; + cout<<" 3: Score test"<<endl; + cout<<" 4: 1-3"<<endl; + cout<<endl; + } + + if (option==8) { + cout<<" LINEAR MIXED MODEL OPTIONS" << endl; + cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl; + cout<<" options: 1: Wald test"<<endl; + cout<<" 2: Likelihood ratio test"<<endl; + cout<<" 3: Score test"<<endl; + cout<<" 4: 1-3"<<endl; + cout<<" 5: Parameter estimation in the null model only"<<endl; + cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl; + cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl; + cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl; + cout<<endl; + } + + if (option==9) { + cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl; + cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl; + cout<<" -emi "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl; + cout<<" -nri "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl; + cout<<" -emp "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl; + cout<<" -nrp "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl; + cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl; + cout<<endl; + } + + if (option==10) { + cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl; + cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl; + cout<<" options: 1: BSLMM"<<endl; + cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl; + cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl; + cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl; + cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl; + + cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl; + cout<<" options: 1: LDR"<<endl; + + cout<<" MCMC OPTIONS" << endl; + cout<<" Prior" << endl; + cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl; + cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl; + cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl; + cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl; + cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl; + cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl; + cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl; + cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl; + + cout<<" Proposal" << endl; + cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl; + cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; + cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; + cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl; + + cout<<" Others" << endl; + cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl; + cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl; + cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl; + cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl; + cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl; + cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl; + cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl; + cout<<endl; + } + + if (option==11) { + cout<<" PREDICTION OPTIONS" << endl; + cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl; + cout<<" options: 1: predict for individuals with missing phenotypes"<<endl; + cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl; + cout<<endl; + } + + if (option==12) { + cout<<" CALC CORRELATION OPTIONS" << endl; + cout<<" -calccor "<<endl; + cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl; + cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl; + cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl; + cout<<endl; + } + + if (option==13) { + cout<<" NOTE"<<endl; + cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl; + cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl; + cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl; + cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl; + cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl; + cout<<endl; + } + + return; } -//options -//gk: 21-22 -//gs: 25-26 -//gq: 27-28 -//eigen: 31-32 -//lmm: 1-5 -//bslmm: 11-15 -//predict: 41-43 -//lm: 51 -//vc: 61 -//ci: 66-67 -//calccor: 71 -//gw: 72 - -void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) -{ +// OPTIONS +// ------- +// gk: 21-22 +// gs: 25-26 +// gq: 27-28 +// eigen: 31-32 +// lmm: 1-5 +// bslmm: 11-15 +// predict: 41-43 +// lm: 51 +// vc: 61 +// ci: 66-67 +// calccor: 71 +// gw: 72 + +void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) { string str; for(int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-bfile")==0 || strcmp(argv[i], "--bfile")==0 || strcmp(argv[i], "-b")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if (strcmp(argv[i], "-bfile")==0 || + strcmp(argv[i], "--bfile")==0 || + strcmp(argv[i], "-b")==0) { + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_bfile=str; } - else if (strcmp(argv[i], "-mbfile")==0 || strcmp(argv[i], "--mbfile")==0 || strcmp(argv[i], "-mb")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + else if (strcmp(argv[i], "-mbfile")==0 || + strcmp(argv[i], "--mbfile")==0 || + strcmp(argv[i], "-mb")==0) { + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); @@ -472,113 +487,148 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) cPar.mode_silence=true; } else if (strcmp(argv[i], "-g")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_geno=str; } else if (strcmp(argv[i], "-mg")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_mgeno=str; } else if (strcmp(argv[i], "-p")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_pheno=str; } else if (strcmp(argv[i], "-a")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_anno=str; } - // WJA added - else if (strcmp(argv[i], "-oxford")==0 || strcmp(argv[i], "--oxford")==0 || strcmp(argv[i], "-x")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + + // WJA added. + else if (strcmp(argv[i], "-oxford")==0 || + strcmp(argv[i], "--oxford")==0 || + strcmp(argv[i], "-x")==0) { + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_oxford=str; } else if (strcmp(argv[i], "-gxe")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_gxe=str; } else if (strcmp(argv[i], "-widv")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_weight=str; } else if (strcmp(argv[i], "-wsnp")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_wsnp=str; } else if (strcmp(argv[i], "-wcat")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_wcat=str; } else if (strcmp(argv[i], "-k")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_kin=str; } else if (strcmp(argv[i], "-mk")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_mk=str; } else if (strcmp(argv[i], "-u")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_ku=str; } else if (strcmp(argv[i], "-d")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_kd=str; } else if (strcmp(argv[i], "-c")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_cvt=str; } else if (strcmp(argv[i], "-cat")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); cPar.file_cat=str; } else if (strcmp(argv[i], "-mcat")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} + if(argv[i+1] == NULL || argv[i+1][0] == '-') { + continue; + } ++i; str.clear(); str.assign(argv[i]); @@ -956,16 +1006,6 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) str.assign(argv[i]); cPar.a_mode=10+atoi(str.c_str()); } - /* - else if (strcmp(argv[i], "-ldr")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=14; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=13+atoi(str.c_str()); - } - */ else if (strcmp(argv[i], "-hmin")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; @@ -1124,20 +1164,19 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) else {cout<<"error! unrecognized option: "<<argv[i]<<endl; cPar.error=true; continue;} } - //change prediction mode to 43, if the epm file is not provided - if (cPar.a_mode==41 && cPar.file_epm.empty()) {cPar.a_mode=43;} + // Change prediction mode to 43 if the epm file is not provided. + if (cPar.a_mode==41 && cPar.file_epm.empty()) { + cPar.a_mode=43; + } return; } - - -void GEMMA::BatchRun (PARAM &cPar) -{ +void GEMMA::BatchRun (PARAM &cPar) { clock_t time_begin, time_start; time_begin=clock(); - //Read Files + // Read Files. cout<<"Reading Files ... "<<endl; cPar.ReadFiles(); if (cPar.error==true) {cout<<"error! fail to read files. "<<endl; return;} @@ -1210,7 +1249,6 @@ void GEMMA::BatchRun (PARAM &cPar) gsl_vector_free(y_prdt); } - //Prediction with kinship matrix only; for one or more phenotypes if (cPar.a_mode==43) { //first, use individuals with full phenotypes to obtain estimates of Vg and Ve @@ -1224,6 +1262,7 @@ void GEMMA::BatchRun (PARAM &cPar) gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph); gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt); + //set covariates matrix W and phenotype matrix Y //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); @@ -1401,8 +1440,8 @@ void GEMMA::BatchRun (PARAM &cPar) cVarcov.CopyToParam(cPar); } - - //Compute the S matrix (and its variance), that is used for variance component estimation using summary statistics + // Compute the S matrix (and its variance), that is used for + // variance component estimation using summary statistics. if (cPar.a_mode==25 || cPar.a_mode==26) { cout<<"Calculating the S Matrix ... "<<endl; @@ -1440,22 +1479,7 @@ void GEMMA::BatchRun (PARAM &cPar) cPar.WriteMatrix (S, "S"); cPar.WriteVector (ns, "size"); cPar.WriteVar ("snps"); - /* - cout<<scientific; - for (size_t i=0; i<cPar.n_vc; i++) { - for (size_t j=0; j<cPar.n_vc; j++) { - cout<<gsl_matrix_get(S, i, j)<<" "; - } - cout<<endl; - } - for (size_t i=cPar.n_vc; i<cPar.n_vc*2; i++) { - for (size_t j=0; j<cPar.n_vc; j++) { - cout<<gsl_matrix_get(S, i, j)<<" "; - } - cout<<endl; - } - */ gsl_matrix_free (S); gsl_vector_free (ns); @@ -1508,8 +1532,7 @@ void GEMMA::BatchRun (PARAM &cPar) gsl_vector_free (s); } - - //Calculate SNP covariance + // Calculate SNP covariance. if (cPar.a_mode==71) { VARCOV cVarcov; cVarcov.CopyFromParam(cPar); @@ -1522,9 +1545,8 @@ void GEMMA::BatchRun (PARAM &cPar) cVarcov.CopyToParam(cPar); } - - - //LM + + // LM. if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); @@ -1573,7 +1595,6 @@ void GEMMA::BatchRun (PARAM &cPar) gsl_matrix_free (W); } - //VC estimation with one or multiple kinship matrices //REML approach only //if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp @@ -1591,7 +1612,7 @@ void GEMMA::BatchRun (PARAM &cPar) cPar.UpdateSNP (mapRS2wK); - //setup matrices and vectors + // Setup matrices and vectors. gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc); gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); gsl_vector *q=gsl_vector_alloc (cPar.n_vc); diff --git a/src/lmm.cpp b/src/lmm.cpp index a707534..73a9232 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,8 +16,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ - - #include <iostream> #include <fstream> #include <sstream> @@ -25,6 +23,7 @@ #include <iomanip> #include <cmath> #include <iostream> +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <bitset> @@ -34,8 +33,6 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" - - #include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" #include "gsl/gsl_min.h" @@ -45,22 +42,11 @@ #include "eigenlib.h" #include "lapack.h" #include "gzstream.h" - -#ifdef FORCE_FLOAT -#include "lmm_float.h" -#else #include "lmm.h" -#endif - using namespace std; - - - - -void LMM::CopyFromParam (PARAM &cPar) -{ +void LMM::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; d_pace=cPar.d_pace; @@ -69,7 +55,8 @@ void LMM::CopyFromParam (PARAM &cPar) file_out=cPar.file_out; path_out=cPar.path_out; file_gene=cPar.file_gene; - // WJA added + + // WJA added. file_oxford=cPar.file_oxford; l_min=cPar.l_min; @@ -97,9 +84,7 @@ void LMM::CopyFromParam (PARAM &cPar) return; } - -void LMM::CopyToParam (PARAM &cPar) -{ +void LMM::CopyToParam (PARAM &cPar) { cPar.time_UtX=time_UtX; cPar.time_opt=time_opt; @@ -108,83 +93,120 @@ void LMM::CopyToParam (PARAM &cPar) return; } - - -void LMM::WriteFiles () -{ +void LMM::WriteFiles () { string file_str; file_str=path_out+"/"+file_out; file_str+=".assoc.txt"; ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;} + if (!outfile) { + cout<<"error writing file: "<<file_str.c_str()<<endl; + return; + } if (!file_gene.empty()) { outfile<<"geneID"<<"\t"; if (a_mode==1) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"p_wald"<<endl; + outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<< + "\t"<<"p_wald"<<endl; } else if (a_mode==2) { outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl; } else if (a_mode==3) { outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl; } else if (a_mode==4) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl; + outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<< + "\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<< + "\t"<<"p_score"<<endl; } else {} for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) { outfile<<snpInfo[t].rs_number<<"\t"; if (a_mode==1) { - outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].p_wald <<endl; + outfile<<scientific<<setprecision(6)<< + sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<< + sumStat[t].lambda_remle<<"\t"<< + sumStat[t].p_wald <<endl; } else if (a_mode==2) { - outfile<<scientific<<setprecision(6)<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_lrt<<endl; + outfile<<scientific<<setprecision(6)<< + sumStat[t].lambda_mle<<"\t"<< + sumStat[t].p_lrt<<endl; } else if (a_mode==3) { - outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_score<<endl; + outfile<<scientific<<setprecision(6)<< + sumStat[t].beta<<"\t"<<sumStat[t].se<< + "\t"<<sumStat[t].p_score<<endl; } else if (a_mode==4) { - outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl; + outfile<<scientific<<setprecision(6)<< + sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<< + sumStat[t].lambda_remle<<"\t"<< + sumStat[t].lambda_mle<<"\t"<< + sumStat[t].p_wald <<"\t"<< + sumStat[t].p_lrt<<"\t"<< + sumStat[t].p_score<<endl; } else {} } } else { - outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t"; + outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t" + <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t"; if (a_mode==1) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"p_wald"<<endl; + outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t" + <<"p_wald"<<endl; } else if (a_mode==2) { outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl; } else if (a_mode==3) { outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl; } else if (a_mode==4) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl; + outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t" + <<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<< + "\t"<<"p_score"<<endl; } else {} size_t t=0; for (size_t i=0; i<snpInfo.size(); ++i) { if (indicator_snp[i]==0) {continue;} - outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t"; + outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<< + "\t"<<snpInfo[i].base_position<<"\t"<< + snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<< + snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<< + snpInfo[i].maf<<"\t"; if (a_mode==1) { - outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].p_wald <<endl; + outfile<<scientific<<setprecision(6)<< + sumStat[t].beta<<"\t"<<sumStat[t].se<< + "\t"<<sumStat[t].lambda_remle<<"\t"<< + sumStat[t].p_wald <<endl; } else if (a_mode==2) { - outfile<<scientific<<setprecision(6)<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_lrt<<endl; + outfile<<scientific<<setprecision(6)<< + sumStat[t].lambda_mle<<"\t"<< + sumStat[t].p_lrt<<endl; } else if (a_mode==3) { - outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_score<<endl; + outfile<<scientific<<setprecision(6)<< + sumStat[t].beta<<"\t"<<sumStat[t].se<< + "\t"<<sumStat[t].p_score<<endl; } else if (a_mode==4) { - outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl; + outfile<<scientific<<setprecision(6)<< + sumStat[t].beta<<"\t"<<sumStat[t].se<< + "\t"<<sumStat[t].lambda_remle<<"\t"<< + sumStat[t].lambda_mle<<"\t"<< + sumStat[t].p_wald <<"\t"<< + sumStat[t].p_lrt<<"\t"<< + sumStat[t].p_score<<endl; } else {} t++; } } - outfile.close(); outfile.clear(); return; } -void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, const gsl_matrix *Uab, const gsl_vector *ab, gsl_matrix *Pab) -{ +void CalcPab (const size_t n_cvt, const size_t e_mode, + const gsl_vector *Hi_eval, const gsl_matrix *Uab, + const gsl_vector *ab, gsl_matrix *Pab) { size_t index_ab, index_aw, index_bw, index_ww; double p_ab; double ps_ab, ps_aw, ps_bw, ps_ww; @@ -194,23 +216,26 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval for (size_t b=a; b<=n_cvt+2; ++b) { index_ab=GetabIndex (a, b, n_cvt); if (p==0) { - gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab); - gsl_blas_ddot (Hi_eval, &Uab_col.vector, &p_ab); - if (e_mode!=0) {p_ab=gsl_vector_get (ab, index_ab)-p_ab;} - gsl_matrix_set (Pab, 0, index_ab, p_ab); + gsl_vector_const_view Uab_col= + gsl_matrix_const_column (Uab, index_ab); + gsl_blas_ddot(Hi_eval,&Uab_col.vector,&p_ab); + if (e_mode!=0) { + p_ab=gsl_vector_get (ab, index_ab)-p_ab; + } + gsl_matrix_set (Pab, 0, index_ab, p_ab); } else { - index_aw=GetabIndex (a, p, n_cvt); - index_bw=GetabIndex (b, p, n_cvt); - index_ww=GetabIndex (p, p, n_cvt); - - ps_ab=gsl_matrix_get (Pab, p-1, index_ab); - ps_aw=gsl_matrix_get (Pab, p-1, index_aw); - ps_bw=gsl_matrix_get (Pab, p-1, index_bw); - ps_ww=gsl_matrix_get (Pab, p-1, index_ww); - - p_ab=ps_ab-ps_aw*ps_bw/ps_ww; - gsl_matrix_set (Pab, p, index_ab, p_ab); + index_aw=GetabIndex (a, p, n_cvt); + index_bw=GetabIndex (b, p, n_cvt); + index_ww=GetabIndex (p, p, n_cvt); + + ps_ab=gsl_matrix_get (Pab, p-1, index_ab); + ps_aw=gsl_matrix_get (Pab, p-1, index_aw); + ps_bw=gsl_matrix_get (Pab, p-1, index_bw); + ps_ww=gsl_matrix_get (Pab, p-1, index_ww); + + p_ab=ps_ab-ps_aw*ps_bw/ps_ww; + gsl_matrix_set (Pab, p, index_ab, p_ab); } } } @@ -218,9 +243,9 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval return; } - -void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_eval, const gsl_matrix *Uab, const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) -{ +void CalcPPab (const size_t n_cvt, const size_t e_mode, + const gsl_vector *HiHi_eval, const gsl_matrix *Uab, + const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) { size_t index_ab, index_aw, index_bw, index_ww; double p2_ab; double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww; @@ -230,28 +255,33 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e for (size_t b=a; b<=n_cvt+2; ++b) { index_ab=GetabIndex (a, b, n_cvt); if (p==0) { - gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab); - gsl_blas_ddot (HiHi_eval, &Uab_col.vector, &p2_ab); - if (e_mode!=0) {p2_ab=p2_ab-gsl_vector_get (ab, index_ab)+2.0*gsl_matrix_get (Pab, 0, index_ab);} - gsl_matrix_set (PPab, 0, index_ab, p2_ab); + gsl_vector_const_view Uab_col= + gsl_matrix_const_column (Uab, index_ab); + gsl_blas_ddot (HiHi_eval, &Uab_col.vector, + &p2_ab); + if (e_mode!=0) { + p2_ab=p2_ab-gsl_vector_get(ab,index_ab) + + 2.0*gsl_matrix_get (Pab, 0, index_ab); + } + gsl_matrix_set (PPab, 0, index_ab, p2_ab); } else { - index_aw=GetabIndex (a, p, n_cvt); - index_bw=GetabIndex (b, p, n_cvt); - index_ww=GetabIndex (p, p, n_cvt); - - ps2_ab=gsl_matrix_get (PPab, p-1, index_ab); - ps_aw=gsl_matrix_get (Pab, p-1, index_aw); - ps_bw=gsl_matrix_get (Pab, p-1, index_bw); - ps_ww=gsl_matrix_get (Pab, p-1, index_ww); - ps2_aw=gsl_matrix_get (PPab, p-1, index_aw); - ps2_bw=gsl_matrix_get (PPab, p-1, index_bw); - ps2_ww=gsl_matrix_get (PPab, p-1, index_ww); - - p2_ab=ps2_ab+ps_aw*ps_bw*ps2_ww/(ps_ww*ps_ww); - p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww; - gsl_matrix_set (PPab, p, index_ab, p2_ab); - + index_aw=GetabIndex (a, p, n_cvt); + index_bw=GetabIndex (b, p, n_cvt); + index_ww=GetabIndex (p, p, n_cvt); + + ps2_ab=gsl_matrix_get (PPab, p-1, index_ab); + ps_aw=gsl_matrix_get (Pab, p-1, index_aw); + ps_bw=gsl_matrix_get (Pab, p-1, index_bw); + ps_ww=gsl_matrix_get (Pab, p-1, index_ww); + ps2_aw=gsl_matrix_get (PPab, p-1, index_aw); + ps2_bw=gsl_matrix_get (PPab, p-1, index_bw); + ps2_ww=gsl_matrix_get (PPab, p-1, index_ww); + + p2_ab=ps2_ab+ps_aw*ps_bw* + ps2_ww/(ps_ww*ps_ww); + p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww; + gsl_matrix_set (PPab, p, index_ab, p2_ab); } } } @@ -259,44 +289,56 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e return; } - -void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab, const gsl_vector *ab, const gsl_matrix *Pab, const gsl_matrix *PPab, gsl_matrix *PPPab) -{ +void CalcPPPab (const size_t n_cvt, const size_t e_mode, + const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab, + const gsl_vector *ab, const gsl_matrix *Pab, + const gsl_matrix *PPab, gsl_matrix *PPPab) { size_t index_ab, index_aw, index_bw, index_ww; double p3_ab; - double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw, ps3_ww; + double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, + ps3_aw, ps3_bw, ps3_ww; for (size_t p=0; p<=n_cvt+1; ++p) { for (size_t a=p+1; a<=n_cvt+2; ++a) { for (size_t b=a; b<=n_cvt+2; ++b) { index_ab=GetabIndex (a, b, n_cvt); if (p==0) { - gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab); - gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector, &p3_ab); - if (e_mode!=0) {p3_ab=gsl_vector_get (ab, index_ab)-p3_ab+3.0*gsl_matrix_get (PPab, 0, index_ab)-3.0*gsl_matrix_get (Pab, 0, index_ab);} - gsl_matrix_set (PPPab, 0, index_ab, p3_ab); + gsl_vector_const_view Uab_col= + gsl_matrix_const_column (Uab, index_ab); + gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector, + &p3_ab); + if (e_mode!=0) { + p3_ab=gsl_vector_get (ab, index_ab)- + p3_ab+3.0*gsl_matrix_get(PPab,0,index_ab) + -3.0*gsl_matrix_get (Pab, 0, index_ab); + } + gsl_matrix_set (PPPab, 0, index_ab, p3_ab); } else { - index_aw=GetabIndex (a, p, n_cvt); - index_bw=GetabIndex (b, p, n_cvt); - index_ww=GetabIndex (p, p, n_cvt); - - ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab); - ps_aw=gsl_matrix_get (Pab, p-1, index_aw); - ps_bw=gsl_matrix_get (Pab, p-1, index_bw); - ps_ww=gsl_matrix_get (Pab, p-1, index_ww); - ps2_aw=gsl_matrix_get (PPab, p-1, index_aw); - ps2_bw=gsl_matrix_get (PPab, p-1, index_bw); - ps2_ww=gsl_matrix_get (PPab, p-1, index_ww); - ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw); - ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw); - ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww); - - p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww/(ps_ww*ps_ww*ps_ww); - p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw+ps2_aw*ps2_bw)/ps_ww; - p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/(ps_ww*ps_ww); - - gsl_matrix_set (PPPab, p, index_ab, p3_ab); + index_aw=GetabIndex (a, p, n_cvt); + index_bw=GetabIndex (b, p, n_cvt); + index_ww=GetabIndex (p, p, n_cvt); + + ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab); + ps_aw=gsl_matrix_get (Pab, p-1, index_aw); + ps_bw=gsl_matrix_get (Pab, p-1, index_bw); + ps_ww=gsl_matrix_get (Pab, p-1, index_ww); + ps2_aw=gsl_matrix_get (PPab, p-1, index_aw); + ps2_bw=gsl_matrix_get (PPab, p-1, index_bw); + ps2_ww=gsl_matrix_get (PPab, p-1, index_ww); + ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw); + ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw); + ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww); + + p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww + /(ps_ww*ps_ww*ps_ww); + p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw + + ps2_aw*ps2_bw)/ps_ww; + p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw* + ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/ + (ps_ww*ps_ww); + + gsl_matrix_set (PPPab, p, index_ab, p3_ab); } } } @@ -304,10 +346,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiH return; } - - -double LogL_f (double l, void *params) -{ +double LogL_f (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; @@ -325,7 +364,11 @@ double LogL_f (double l, void *params) gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); - if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (p->e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -348,13 +391,7 @@ double LogL_f (double l, void *params) return f; } - - - - - -double LogL_dev1 (double l, void *params) -{ +double LogL_dev1 (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; @@ -374,7 +411,11 @@ double LogL_dev1 (double l, void *params) gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); - if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (p->e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -407,18 +448,18 @@ double LogL_dev1 (double l, void *params) return dev1; } - - - -double LogL_dev2 (double l, void *params) -{ +double LogL_dev2 (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; size_t nc_total; - if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;} + if (p->calc_null==true) { + nc_total=n_cvt; + } else { + nc_total=n_cvt+1; + } double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0; size_t index_yy; @@ -433,7 +474,11 @@ double LogL_dev2 (double l, void *params) gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); - if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (p->e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -453,7 +498,8 @@ double LogL_dev2 (double l, void *params) CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); + CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, + PPPab); double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l); @@ -465,7 +511,8 @@ double LogL_dev2 (double l, void *params) double yPKPy=(P_yy-PP_yy)/l; double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); - dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); + dev2=0.5*trace_HiKHiK-0.5*(double)ni_test* + (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); gsl_matrix_free (Pab); gsl_matrix_free (PPab); @@ -478,12 +525,7 @@ double LogL_dev2 (double l, void *params) return dev2; } - - - - -void LogL_dev12 (double l, void *params, double *dev1, double *dev2) -{ +void LogL_dev12 (double l, void *params, double *dev1, double *dev2) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; @@ -505,7 +547,11 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2) gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); - if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (p->e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -525,7 +571,8 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2) CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); + CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, + PPPab); double trace_HiK=((double)ni_test-trace_Hi)/l; double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l); @@ -540,7 +587,8 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2) double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); *dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy; - *dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); + *dev2=0.5*trace_HiKHiK-0.5*(double)ni_test* + (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); gsl_matrix_free (Pab); gsl_matrix_free (PPab); @@ -553,10 +601,7 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2) return; } - - -double LogRL_f (double l, void *params) -{ +double LogRL_f (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; @@ -564,7 +609,9 @@ double LogRL_f (double l, void *params) double df; size_t nc_total; - if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; } + if (p->calc_null==true) { + nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; + } else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} double f=0.0, logdet_h=0.0, logdet_hiw=0.0, d; @@ -577,7 +624,11 @@ double LogRL_f (double l, void *params) gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); - if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (p->e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -590,7 +641,7 @@ double LogRL_f (double l, void *params) gsl_vector_set_all (v_temp, 1.0); CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab); - //calculate |WHiW|-|WW| + // Calculate |WHiW|-|WW|. logdet_hiw=0.0; for (size_t i=0; i<nc_total; ++i) { index_ww=GetabIndex (i+1, i+1, n_cvt); @@ -612,10 +663,7 @@ double LogRL_f (double l, void *params) return f; } - - -double LogRL_dev1 (double l, void *params) -{ +double LogRL_dev1 (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; @@ -623,8 +671,14 @@ double LogRL_dev1 (double l, void *params) double df; size_t nc_total; - if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; } - else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} + if (p->calc_null==true) { + nc_total=n_cvt; + df=(double)ni_test-(double)n_cvt; + } + else { + nc_total=n_cvt+1; + df=(double)ni_test-(double)n_cvt-1.0; + } double dev1=0.0, trace_Hi=0.0; size_t index_ww; @@ -637,7 +691,11 @@ double LogRL_dev1 (double l, void *params) gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); - if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (p->e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -654,7 +712,7 @@ double LogRL_dev1 (double l, void *params) CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - //calculate tracePK and trace PKPK + // Calculate tracePK and trace PKPK. double trace_P=trace_Hi; double ps_ww, ps2_ww; for (size_t i=0; i<nc_total; ++i) { @@ -665,7 +723,7 @@ double LogRL_dev1 (double l, void *params) } double trace_PK=(df-trace_P)/l; - //calculate yPKPy, yPKPKPy + // Calculate yPKPy, yPKPKPy. index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); double P_yy=gsl_matrix_get (Pab, nc_total, index_ww); double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww); @@ -682,11 +740,7 @@ double LogRL_dev1 (double l, void *params) return dev1; } - - - -double LogRL_dev2 (double l, void *params) -{ +double LogRL_dev2 (double l, void *params) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; @@ -694,8 +748,14 @@ double LogRL_dev2 (double l, void *params) double df; size_t nc_total; - if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; } - else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} + if (p->calc_null==true) { + nc_total=n_cvt; + df=(double)ni_test-(double)n_cvt; + } + else { + nc_total=n_cvt+1; + df=(double)ni_test-(double)n_cvt-1.0; + } double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0; size_t index_ww; @@ -710,7 +770,11 @@ double LogRL_dev2 (double l, void *params) gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); - if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (p->e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -730,9 +794,10 @@ double LogRL_dev2 (double l, void *params) CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); + CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, + PPab, PPPab); - //calculate tracePK and trace PKPK + // Calculate tracePK and trace PKPK. double trace_P=trace_Hi, trace_PP=trace_HiHi; double ps_ww, ps2_ww, ps3_ww; for (size_t i=0; i<nc_total; ++i) { @@ -745,7 +810,7 @@ double LogRL_dev2 (double l, void *params) } double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l); - //calculate yPKPy, yPKPKPy + // Calculate yPKPy, yPKPKPy. index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); double P_yy=gsl_matrix_get (Pab, nc_total, index_ww); double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww); @@ -766,11 +831,7 @@ double LogRL_dev2 (double l, void *params) return dev2; } - - - -void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) -{ +void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) { FUNC_PARAM *p=(FUNC_PARAM *) params; size_t n_cvt=p->n_cvt; size_t ni_test=p->ni_test; @@ -778,8 +839,14 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) double df; size_t nc_total; - if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; } - else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} + if (p->calc_null==true) { + nc_total=n_cvt; + df=(double)ni_test-(double)n_cvt; + } + else { + nc_total=n_cvt+1; + df=(double)ni_test-(double)n_cvt-1.0; + } double trace_Hi=0.0, trace_HiHi=0.0; size_t index_ww; @@ -794,7 +861,11 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) gsl_vector_memcpy (v_temp, p->eval); gsl_vector_scale (v_temp, l); - if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (p->e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -814,9 +885,10 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); + CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, + PPab, PPPab); - //calculate tracePK and trace PKPK + // Calculate tracePK and trace PKPK. double trace_P=trace_Hi, trace_PP=trace_HiHi; double ps_ww, ps2_ww, ps3_ww; for (size_t i=0; i<nc_total; ++i) { @@ -830,7 +902,7 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) double trace_PK=(df-trace_P)/l; double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l); - //calculate yPKPy, yPKPKPy + // Calculate yPKPy, yPKPKPy. index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); double P_yy=gsl_matrix_get (Pab, nc_total, index_ww); double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww); @@ -839,7 +911,8 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); *dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy; - *dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); + *dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/ + (P_yy*P_yy); gsl_matrix_free (Pab); gsl_matrix_free (PPab); @@ -849,18 +922,11 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) gsl_vector_free (HiHiHi_eval); gsl_vector_free (v_temp); - return ; + return; } - - - - - - - -void LMM::CalcRLWald (const double &l, const FUNC_PARAM ¶ms, double &beta, double &se, double &p_wald) -{ +void LMM::CalcRLWald (const double &l, const FUNC_PARAM ¶ms, + double &beta, double &se, double &p_wald) { size_t n_cvt=params.n_cvt; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; @@ -872,7 +938,11 @@ void LMM::CalcRLWald (const double &l, const FUNC_PARAM ¶ms, double &beta, d gsl_vector_memcpy (v_temp, params.eval); gsl_vector_scale (v_temp, l); - if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (params.e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -890,17 +960,15 @@ void LMM::CalcRLWald (const double &l, const FUNC_PARAM ¶ms, double &beta, d double tau=(double)df/Px_yy; se=sqrt(1.0/(tau*P_xx)); p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df); -// p_wald=gsl_cdf_chisq_Q ((P_yy-Px_yy)*tau, 1); gsl_matrix_free (Pab); gsl_vector_free (Hi_eval); gsl_vector_free (v_temp); - return ; + return; } - -void LMM::CalcRLScore (const double &l, const FUNC_PARAM ¶ms, double &beta, double &se, double &p_score) -{ +void LMM::CalcRLScore (const double &l, const FUNC_PARAM ¶ms, + double &beta, double &se, double &p_score) { size_t n_cvt=params.n_cvt; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; @@ -912,7 +980,11 @@ void LMM::CalcRLScore (const double &l, const FUNC_PARAM ¶ms, double &beta, gsl_vector_memcpy (v_temp, params.eval); gsl_vector_scale (v_temp, l); - if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);} + if (params.e_mode==0) { + gsl_vector_set_all (Hi_eval, 1.0); + } else { + gsl_vector_memcpy (Hi_eval, v_temp); + } gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); @@ -930,24 +1002,16 @@ void LMM::CalcRLScore (const double &l, const FUNC_PARAM ¶ms, double &beta, double tau=(double)df/Px_yy; se=sqrt(1.0/(tau*P_xx)); - p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1.0, df); -// p_score=gsl_cdf_chisq_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1); + p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), + 1.0, df); gsl_matrix_free (Pab); gsl_vector_free (Hi_eval); gsl_vector_free (v_temp); - return ; + return; } - - - - - - - -void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) -{ +void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) { size_t index_ab; size_t n_cvt=UtW->size2; @@ -958,20 +1022,26 @@ void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) if (a==n_cvt+2) {gsl_vector_memcpy (u_a, Uty);} else { - gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, a-1); - gsl_vector_memcpy (u_a, &UtW_col.vector); + gsl_vector_const_view UtW_col= + gsl_matrix_const_column (UtW, a-1); + gsl_vector_memcpy (u_a, &UtW_col.vector); } for (size_t b=a; b>=1; --b) { if (b==n_cvt+1) {continue;} index_ab=GetabIndex (a, b, n_cvt); - gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab); + gsl_vector_view Uab_col= + gsl_matrix_column (Uab, index_ab); - if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);} + if (b==n_cvt+2) { + gsl_vector_memcpy (&Uab_col.vector, Uty); + } else { - gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1); - gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector); + gsl_vector_const_view UtW_col= + gsl_matrix_const_column (UtW, b-1); + gsl_vector_memcpy (&Uab_col.vector, + &UtW_col.vector); } gsl_vector_mul(&Uab_col.vector, u_a); @@ -982,9 +1052,8 @@ void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) return; } - -void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Utx, gsl_matrix *Uab) -{ +void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_vector *Utx, gsl_matrix *Uab) { size_t index_ab; size_t n_cvt=UtW->size2; @@ -993,10 +1062,13 @@ void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Ut gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab); if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);} - else if (b==n_cvt+1) {gsl_vector_memcpy (&Uab_col.vector, Utx);} + else if (b==n_cvt+1) { + gsl_vector_memcpy (&Uab_col.vector, Utx); + } else { - gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1); - gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector); + gsl_vector_const_view UtW_col= + gsl_matrix_const_column (UtW, b-1); + gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector); } gsl_vector_mul(&Uab_col.vector, Utx); @@ -1005,10 +1077,7 @@ void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Ut return; } - - -void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) -{ +void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) { size_t index_ab; size_t n_cvt=W->size2; @@ -1019,10 +1088,12 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) for (size_t a=1; a<=n_cvt+2; ++a) { if (a==n_cvt+1) {continue;} - if (a==n_cvt+2) {gsl_vector_memcpy (v_a, y);} + if (a==n_cvt+2) { + gsl_vector_memcpy (v_a, y); + } else { - gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1); - gsl_vector_memcpy (v_a, &W_col.vector); + gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1); + gsl_vector_memcpy (v_a, &W_col.vector); } for (size_t b=a; b>=1; --b) { @@ -1030,10 +1101,13 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) index_ab=GetabIndex (a, b, n_cvt); - if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);} + if (b==n_cvt+2) { + gsl_vector_memcpy (v_b, y); + } else { - gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1); - gsl_vector_memcpy (v_b, &W_col.vector); + gsl_vector_const_view W_col= + gsl_matrix_const_column (W, b-1); + gsl_vector_memcpy (v_b, &W_col.vector); } gsl_blas_ddot (v_a, v_b, &d); @@ -1046,9 +1120,8 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) return; } - -void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_vector *ab) -{ +void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, + gsl_vector *ab) { size_t index_ab; size_t n_cvt=W->size2; @@ -1061,8 +1134,8 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_ if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);} else if (b==n_cvt+1) {gsl_vector_memcpy (v_b, x);} else { - gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1); - gsl_vector_memcpy (v_b, &W_col.vector); + gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1); + gsl_vector_memcpy (v_b, &W_col.vector); } gsl_blas_ddot (x, v_b, &d); @@ -1070,31 +1143,31 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_ } gsl_vector_free (v_b); - return; } - - - - -void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x) -{ +void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Utx, + const gsl_matrix *W, const gsl_vector *x) { igzstream infile (file_gene.c_str(), igzstream::in); - if (!infile) {cout<<"error reading gene expression file:"<<file_gene<<endl; return;} + if (!infile) { + cout<<"error reading gene expression file:"<<file_gene<<endl; + return; + } clock_t time_start=clock(); string line; char *ch_ptr; - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; + double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; + double p_lrt=0, p_score=0; double logl_H1=0.0, logl_H0=0.0, l_H0; int c_phen; - string rs; //gene id + string rs; // Gene id. double d; - //Calculate basic quantities + // Calculate basic quantities. size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; gsl_vector *y=gsl_vector_alloc (U->size1); @@ -1102,12 +1175,14 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); - //header + // Header. getline(infile, line); for (size_t t=0; t<ng_total; t++) { !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==ng_total-1) {ProgressBar ("Performing Analysis ", t, ng_total-1);} + if (t%d_pace==0 || t==ng_total-1) { + ProgressBar ("Performing Analysis ", t, ng_total-1); + } ch_ptr=strtok ((char *)line.c_str(), " , \t"); rs=ch_ptr; @@ -1126,7 +1201,7 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //calculate null + // Calculate null. time_start=clock(); gsl_matrix_set_zero (Uab); @@ -1135,32 +1210,36 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma FUNC_PARAM param0={false, ni_test, n_cvt, eval, Uab, ab, 0}; if (a_mode==2 || a_mode==3 || a_mode==4) { - CalcLambda('L', param0, l_min, l_max, n_region, l_H0, logl_H0); + CalcLambda('L', param0, l_min, l_max, n_region, + l_H0, logl_H0); } - //calculate alternative + // Calculate alternative. CalcUab(UtW, Uty, Utx, Uab); FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0}; - //3 is before 1 + //3 is before 1. if (a_mode==3 || a_mode==4) { CalcRLScore (l_H0, param1, beta, se, p_score); } if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); - CalcRLWald (lambda_remle, param1, beta, se, p_wald); + CalcLambda ('R', param1, l_min, l_max, n_region, + lambda_remle, logl_H1); + CalcRLWald (lambda_remle, param1, beta, se, p_wald); } if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); + CalcLambda ('L', param1, l_min, l_max, n_region, + lambda_mle, logl_H1); + p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); } time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //store summary data - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + // Store summary data. + SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } cout<<endl; @@ -1176,27 +1255,27 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma return; } - - - - -void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y) -{ +void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y) { igzstream infile (file_geno.c_str(), igzstream::in); -// ifstream infile (file_geno.c_str(), ifstream::in); - if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;} + if (!infile) { + cout<<"error reading genotype file:"<<file_geno<<endl; + return; + } clock_t time_start=clock(); string line; char *ch_ptr; - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; + double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; + double p_lrt=0, p_score=0; double logl_H1=0.0; int n_miss, c_phen; double geno, x_mean; - //Calculate basic quantities + // Calculate basic quantities. size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; gsl_vector *x=gsl_vector_alloc (U->size1); @@ -1205,7 +1284,7 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); - //create a large matrix + // Create a large matrix. size_t msize=10000; gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); @@ -1213,10 +1292,6 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_ gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); -// if (e_mode!=0) { -// gsl_vector_set_zero (ab); -// Calcab (W, y, ab); -// } //start reading genotypes and analyze size_t c=0, t_last=0; @@ -1225,9 +1300,10 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_ t_last++; } for (size_t t=0; t<indicator_snp.size(); ++t) { -// if (t>1) {break;} !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);} + if (t%d_pace==0 || t==(ns_total-1)) { + ProgressBar ("Reading SNPs ", t, ns_total-1); + } if (indicator_snp[t]==0) {continue;} ch_ptr=strtok ((char *)line.c_str(), " , \t"); @@ -1240,7 +1316,9 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_ ch_ptr=strtok (NULL, " , \t"); if (indicator_idv[i]==0) {continue;} - if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;} + if (strcmp(ch_ptr, "NA")==0) { + gsl_vector_set(x_miss, c_phen, 0.0); n_miss++; + } else { geno=atof(ch_ptr); @@ -1254,20 +1332,11 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_ x_mean/=(double)(ni_test-n_miss); for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);} - //geno=gsl_vector_get(x, i); - //if (x_mean>1) { - // gsl_vector_set(x, i, 2-geno); - //} + if (gsl_vector_get (x_miss, i)==0) { + gsl_vector_set(x, i, x_mean); + } } - /* - //calculate statistics - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - */ - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); gsl_vector_memcpy (&Xlarge_col.vector, x); c++; @@ -1276,49 +1345,52 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_ size_t l=0; if (c%msize==0) {l=msize;} else {l=c%msize;} - gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + gsl_matrix_view Xlarge_sub= + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub= + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix); + eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, + 0.0, &UtXlarge_sub.matrix); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); gsl_matrix_set_zero (Xlarge); for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i); + gsl_vector_view UtXlarge_col= + gsl_matrix_column (UtXlarge, i); gsl_vector_memcpy (Utx, &UtXlarge_col.vector); CalcUab(UtW, Uty, Utx, Uab); - // if (e_mode!=0) { - // Calcab (W, y, x, ab); - // } time_start=clock(); - FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0}; + FUNC_PARAM param1= + {false, ni_test, n_cvt, eval, Uab, ab, 0}; - //3 is before 1 + // 3 is before 1. if (a_mode==3 || a_mode==4) { CalcRLScore (l_mle_null, param1, beta, se, p_score); } if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); + CalcLambda ('R', param1, l_min, l_max, n_region, + lambda_remle, logl_H1); CalcRLWald (lambda_remle, param1, beta, se, p_wald); } if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + CalcLambda ('L', param1, l_min, l_max, n_region, + lambda_mle, logl_H1); p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1); } - //if (p_wald<0) {cout<<t<<"\t"<<i<<"\t"<<l<<endl;} - //if (x_mean>1) {beta*=-1;} + time_opt+=(clock()-time_start)/ + (double(CLOCKS_PER_SEC)*60.0); - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //store summary data - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + // Store summary data. + SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } @@ -1341,14 +1413,9 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_ return; } - - - - - - -void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y) -{ +void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y) { string file_bed=file_bfile+".bed"; ifstream infile (file_bed.c_str(), ios::binary); if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;} @@ -1358,12 +1425,13 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m char ch[1]; bitset<8> b; - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; + double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; + double p_lrt=0, p_score=0; double logl_H1=0.0; int n_bit, n_miss, ci_total, ci_test; double geno, x_mean; - //Calculate basic quantities + // Calculate basic quantities. size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; gsl_vector *x=gsl_vector_alloc (U->size1); @@ -1371,7 +1439,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); - //create a large matrix + // Create a large matrix. size_t msize=10000; gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); @@ -1379,16 +1447,12 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); -// if (e_mode!=0) { -// gsl_vector_set_zero (ab); -// Calcab (W, y, ab); -// } - //calculate n_bit and c, the number of bit for each snp + // Calculate n_bit and c, the number of bit for each SNP. if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1; } - //print the first three majic numbers + // Print the first three magic numbers. for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; @@ -1400,31 +1464,44 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m t_last++; } for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);} + if (t%d_pace==0 || t==snpInfo.size()-1) { + ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); + } if (indicator_snp[t]==0) {continue;} - infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t*n_bit+3); - //read genotypes + // Read genotypes. x_mean=0.0; n_miss=0; ci_total=0; ci_test=0; for (int i=0; i<n_bit; ++i) { infile.read(ch,1); b=ch[0]; - for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0; - if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;} - if (indicator_idv[ci_total]==0) {ci_total++; continue;} - if (b[2*j]==0) { - if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; } - else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } - } - else { - if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } - else {gsl_vector_set(x, ci_test, -9); n_miss++; } - } - - ci_total++; - ci_test++; + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j=0; j<4; ++j) { + if ((i==(n_bit-1)) && ci_total==(int)ni_total) { + break; + } + if (indicator_idv[ci_total]==0) { + ci_total++; + continue; + } + + if (b[2*j]==0) { + if (b[2*j+1]==0) { + gsl_vector_set(x, ci_test, 2); + x_mean+=2.0; + } + else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } + } + else { + if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } + else {gsl_vector_set(x, ci_test, -9); n_miss++; } + } + + ci_total++; + ci_test++; } } @@ -1432,19 +1509,12 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m for (size_t i=0; i<ni_test; ++i) { geno=gsl_vector_get(x,i); - if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;} - //if (x_mean>1) { - //gsl_vector_set(x, i, 2-geno); - //} + if (geno==-9) { + gsl_vector_set(x, i, x_mean); + geno=x_mean; + } } - /* - //calculate statistics - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - */ - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); gsl_vector_memcpy (&Xlarge_col.vector, x); c++; @@ -1453,52 +1523,56 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m size_t l=0; if (c%msize==0) {l=msize;} else {l=c%msize;} - gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + gsl_matrix_view Xlarge_sub= + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub= + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix); + eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, + 0.0, &UtXlarge_sub.matrix); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); gsl_matrix_set_zero (Xlarge); for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i); + gsl_vector_view UtXlarge_col= + gsl_matrix_column (UtXlarge, i); gsl_vector_memcpy (Utx, &UtXlarge_col.vector); CalcUab(UtW, Uty, Utx, Uab); - // if (e_mode!=0) { - // Calcab (W, y, x, ab); - // } time_start=clock(); - FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0}; + FUNC_PARAM param1={false, ni_test, n_cvt, eval, + Uab, ab, 0}; - //3 is before 1, for beta + // 3 is before 1, for beta. if (a_mode==3 || a_mode==4) { CalcRLScore (l_mle_null, param1, beta, se, p_score); } if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); + CalcLambda ('R', param1, l_min, l_max, n_region, + lambda_remle, logl_H1); CalcRLWald (lambda_remle, param1, beta, se, p_wald); } if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + CalcLambda ('L', param1, l_min, l_max, n_region, + lambda_mle, logl_H1); p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1); } - //if (x_mean>1) {beta*=-1;} + time_opt+=(clock()-time_start)/ + (double(CLOCKS_PER_SEC)*60.0); - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //store summary data - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + // Store summary data. + SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } } - } + } cout<<endl; gsl_vector_free (x); @@ -1515,25 +1589,25 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m return; } - -// WJA added -#include <assert.h> -void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y) -{ +// WJA added. +void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y) { string file_bgen=file_oxford+".bgen"; ifstream infile (file_bgen.c_str(), ios::binary); - if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;} - + if (!infile) { + cout<<"error reading bgen file:"<<file_bgen<<endl; + return; + } clock_t time_start=clock(); - - - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; + double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; + double p_lrt=0, p_score=0; double logl_H1=0.0; int n_miss, c_phen; double geno, x_mean; - //Calculate basic quantities + // Calculate basic quantities. size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; gsl_vector *x=gsl_vector_alloc (U->size1); @@ -1542,7 +1616,7 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); gsl_vector *ab=gsl_vector_alloc (n_index); - //create a large matrix + // Create a large matrix. size_t msize=10000; gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); @@ -1550,12 +1624,8 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); -// if (e_mode!=0) { -// gsl_vector_set_zero (ab); -// Calcab (W, y, ab); -// } - // read in header + // Read in header. uint32_t bgen_snp_block_offset; uint32_t bgen_header_length; uint32_t bgen_nsamples; @@ -1573,11 +1643,11 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma infile.read(reinterpret_cast<char*>(&bgen_flags),4); bgen_snp_block_offset-=4; bool CompressedSNPBlocks=bgen_flags&0x1; -// bool LongIds=bgen_flags&0x4; infile.ignore(bgen_snp_block_offset); - double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss; + double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB; + double bgen_geno_prob_non_miss; uint32_t bgen_N; uint16_t bgen_LS; @@ -1593,11 +1663,10 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma string id; string rs; string chr; - std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<<std::endl; + std::cout << "Warning: WJA hard coded SNP missingness " << + "threshold of 10%"<<std::endl; - - - //start reading genotypes and analyze + // Start reading genotypes and analyze. size_t c=0, t_last=0; for (size_t t=0; t<indicator_snp.size(); ++t) { if (indicator_snp[t]==0) {continue;} @@ -1605,11 +1674,12 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma } for (size_t t=0; t<indicator_snp.size(); ++t) { - -// if (t>1) {break;} - if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);} + if (t%d_pace==0 || t==(ns_total-1)) { + ProgressBar ("Reading SNPs ", t, ns_total-1); + } if (indicator_snp[t]==0) {continue;} - // read SNP header + + // Read SNP header. id.clear(); rs.clear(); chr.clear(); @@ -1641,89 +1711,86 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma bgen_B_allele.resize(bgen_LB); infile.read(&bgen_B_allele[0], bgen_LB); - - - uint16_t unzipped_data[3*bgen_N]; if (indicator_snp[t]==0) { if(CompressedSNPBlocks) - infile.read(reinterpret_cast<char*>(&bgen_P),4); + infile.read(reinterpret_cast<char*>(&bgen_P),4); else - bgen_P=6*bgen_N; + bgen_P=6*bgen_N; infile.ignore(static_cast<size_t>(bgen_P)); continue; } - - if(CompressedSNPBlocks) - { - - + if(CompressedSNPBlocks) { infile.read(reinterpret_cast<char*>(&bgen_P),4); uint8_t zipped_data[bgen_P]; unzipped_data_size=6*bgen_N; - infile.read(reinterpret_cast<char*>(zipped_data),bgen_P); + infile.read(reinterpret_cast<char*>(zipped_data), + bgen_P); - int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P)); + int result= + uncompress(reinterpret_cast<Bytef*>(unzipped_data), + reinterpret_cast<uLongf*>(&unzipped_data_size), + reinterpret_cast<Bytef*>(zipped_data), + static_cast<uLong> (bgen_P)); assert(result == Z_OK); } else { - bgen_P=6*bgen_N; - infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P); + bgen_P=6*bgen_N; + infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P); } x_mean=0.0; c_phen=0; n_miss=0; gsl_vector_set_zero(x_miss); for (size_t i=0; i<bgen_N; ++i) { - if (indicator_idv[i]==0) {continue;} - - - bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0; - bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0; - bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0; - // WJA - bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;} - else { - - bgen_geno_prob_AA/=bgen_geno_prob_non_miss; - bgen_geno_prob_AB/=bgen_geno_prob_non_miss; - bgen_geno_prob_BB/=bgen_geno_prob_non_miss; - - geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; + if (indicator_idv[i]==0) {continue;} + + bgen_geno_prob_AA= + static_cast<double>(unzipped_data[i*3])/32768.0; + bgen_geno_prob_AB= + static_cast<double>(unzipped_data[i*3+1])/32768.0; + bgen_geno_prob_BB= + static_cast<double>(unzipped_data[i*3+2])/32768.0; + + // WJA. + bgen_geno_prob_non_miss = bgen_geno_prob_AA + + bgen_geno_prob_AB+bgen_geno_prob_BB; + if (bgen_geno_prob_non_miss<0.9) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } + else { + + bgen_geno_prob_AA/=bgen_geno_prob_non_miss; + bgen_geno_prob_AB/=bgen_geno_prob_non_miss; + bgen_geno_prob_BB/=bgen_geno_prob_non_miss; + + geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean+=geno; + } + c_phen++; } x_mean/=static_cast<double>(ni_test-n_miss); for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);} + if (gsl_vector_get (x_miss, i)==0) { + gsl_vector_set(x, i, x_mean); + } geno=gsl_vector_get(x, i); - //if (x_mean>1) { - //gsl_vector_set(x, i, 2-geno); - //} } - /* - //calculate statistics - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - */ - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); gsl_vector_memcpy (&Xlarge_col.vector, x); c++; @@ -1732,48 +1799,51 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma size_t l=0; if (c%msize==0) {l=msize;} else {l=c%msize;} - gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + gsl_matrix_view Xlarge_sub= + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub= + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix); + eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, + 0.0, &UtXlarge_sub.matrix); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); gsl_matrix_set_zero (Xlarge); for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i); + gsl_vector_view UtXlarge_col= + gsl_matrix_column (UtXlarge, i); gsl_vector_memcpy (Utx, &UtXlarge_col.vector); CalcUab(UtW, Uty, Utx, Uab); - // if (e_mode!=0) { - // Calcab (W, y, x, ab); - // } time_start=clock(); - FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0}; + FUNC_PARAM param1={false,ni_test,n_cvt,eval,Uab,ab,0}; - //3 is before 1 + // 3 is before 1. if (a_mode==3 || a_mode==4) { CalcRLScore (l_mle_null, param1, beta, se, p_score); } if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); + CalcLambda ('R', param1, l_min, l_max, n_region, + lambda_remle, logl_H1); CalcRLWald (lambda_remle, param1, beta, se, p_wald); } if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + CalcLambda ('L', param1, l_min, l_max, n_region, + lambda_mle, logl_H1); p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1); } - //if (x_mean>1) {beta*=-1;} - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + time_opt+=(clock()-time_start)/ + (double(CLOCKS_PER_SEC)*60.0); - //store summary data - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + // Store summary data. + SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); } } @@ -1793,13 +1863,13 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma infile.clear(); return; - } - - -void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, vector<pair<size_t, double> > &pos_loglr) -{ +void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const double l_min, const double l_max, + const size_t n_region, + vector<pair<size_t, double> > &pos_loglr) { double logl_H0, logl_H1, log_lr, lambda0, lambda1; gsl_vector *w=gsl_vector_alloc (Uty->size); @@ -1812,7 +1882,7 @@ void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0); gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector); - CalcUab (Utw, Uty, Uab) ; + CalcUab (Utw, Uty, Uab); FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0}; CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0); @@ -1822,7 +1892,8 @@ void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector CalcUab(Utw, Uty, &UtX_col.vector, Uab); FUNC_PARAM param1={false, UtX->size1, 1, K_eval, Uab, ab, 0}; - CalcLambda ('L', param1, l_min, l_max, n_region, lambda1, logl_H1); + CalcLambda ('L', param1, l_min, l_max, n_region, lambda1, + logl_H1); log_lr=logl_H1-logl_H0; pos_loglr.push_back(make_pair(i,log_lr) ); @@ -1836,17 +1907,21 @@ void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector return; } - - - -void CalcLambda (const char func_name, FUNC_PARAM ¶ms, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logf) -{ - if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return;} +void CalcLambda (const char func_name, FUNC_PARAM ¶ms, + const double l_min, const double l_max, + const size_t n_region, double &lambda, double &logf) { + if (func_name!='R' && func_name!='L' && func_name!='r' && + func_name!='l') { + cout << "func_name only takes 'R' or 'L': 'R' for " << + "log-restricted likelihood, 'L' for log-likelihood." << endl; + return; + } vector<pair<double, double> > lambda_lh; - //evaluate first order derivates in different intervals - double lambda_l, lambda_h, lambda_interval=log(l_max/l_min)/(double)n_region; + // Evaluate first-order derivates in different intervals. + double lambda_l, lambda_h, lambda_interval= + log(l_max/l_min)/(double)n_region; double dev1_l, dev1_h, logf_l, logf_h; for (size_t i=0; i<n_region; ++i) { @@ -1867,7 +1942,7 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms, const double l_min, c } } - //if derivates do not change signs in any interval + // If derivates do not change signs in any interval. if (lambda_lh.empty()) { if (func_name=='R' || func_name=='r') { logf_l=LogRL_f (l_min, ¶ms); @@ -1878,10 +1953,17 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms, const double l_min, c logf_h=LogL_f (l_max, ¶ms); } - if (logf_l>=logf_h) {lambda=l_min; logf=logf_l;} else {lambda=l_max; logf=logf_h;} + if (logf_l>=logf_h) { + lambda=l_min; + logf=logf_l; + } else { + lambda=l_max; + logf=logf_h; + } } else { - //if derivates change signs + + // If derivates change signs. int status; int iter=0, max_iter=100; double l, l_temp; @@ -1916,41 +1998,46 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms, const double l_min, c s_fdf=gsl_root_fdfsolver_alloc(T_fdf); for (vector<double>::size_type i=0; i<lambda_lh.size(); ++i) { - lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second; - - gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h); - - do { - iter++; - status=gsl_root_fsolver_iterate (s_f); - l=gsl_root_fsolver_root (s_f); - lambda_l=gsl_root_fsolver_x_lower (s_f); - lambda_h=gsl_root_fsolver_x_upper (s_f); - status=gsl_root_test_interval (lambda_l, lambda_h, 0, 1e-1); - } - while (status==GSL_CONTINUE && iter<max_iter); - - iter=0; - - gsl_root_fdfsolver_set (s_fdf, &FDF, l); - - do { - iter++; - status=gsl_root_fdfsolver_iterate (s_fdf); - l_temp=l; - l=gsl_root_fdfsolver_root (s_fdf); - status=gsl_root_test_delta (l, l_temp, 0, 1e-5); - } - while (status==GSL_CONTINUE && iter<max_iter && l>l_min && l<l_max); - - l=l_temp; - if (l<l_min) {l=l_min;} - if (l>l_max) {l=l_max;} - if (func_name=='R' || func_name=='r') {logf_l=LogRL_f (l, ¶ms);} else {logf_l=LogL_f (l, ¶ms);} - - if (i==0) {logf=logf_l; lambda=l;} - else if (logf<logf_l) {logf=logf_l; lambda=l;} - else {} + lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second; + gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h); + + do { + iter++; + status=gsl_root_fsolver_iterate (s_f); + l=gsl_root_fsolver_root (s_f); + lambda_l=gsl_root_fsolver_x_lower (s_f); + lambda_h=gsl_root_fsolver_x_upper (s_f); + status=gsl_root_test_interval(lambda_l,lambda_h,0,1e-1); + } + while (status==GSL_CONTINUE && iter<max_iter); + + iter=0; + + gsl_root_fdfsolver_set (s_fdf, &FDF, l); + + do { + iter++; + status=gsl_root_fdfsolver_iterate (s_fdf); + l_temp=l; + l=gsl_root_fdfsolver_root (s_fdf); + status=gsl_root_test_delta (l, l_temp, 0, 1e-5); + } + while (status==GSL_CONTINUE && + iter<max_iter && + l>l_min && l<l_max); + + l=l_temp; + if (l<l_min) {l=l_min;} + if (l>l_max) {l=l_max;} + if (func_name=='R' || func_name=='r') { + logf_l=LogRL_f (l, ¶ms); + } else { + logf_l=LogL_f (l, ¶ms); + } + + if (i==0) {logf=logf_l; lambda=l;} + else if (logf<logf_l) {logf=logf_l; lambda=l;} + else {} } gsl_root_fsolver_free (s_f); gsl_root_fdfsolver_free (s_fdf); @@ -1971,14 +2058,17 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms, const double l_min, c return; } - - - - -//calculate lambda in the null model -void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logl_H0) -{ - if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return;} +// Calculate lambda in the null model. +void CalcLambda (const char func_name, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const double l_min, const double l_max, + const size_t n_region, double &lambda, double &logl_H0) { + if (func_name!='R' && func_name!='L' && func_name!='r' && + func_name!='l') { + cout<<"func_name only takes 'R' or 'L': 'R' for " << + "log-restricted likelihood, 'L' for log-likelihood." << endl; + return; + } size_t n_cvt=UtW->size2, ni_test=UtW->size1; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; @@ -1988,10 +2078,6 @@ void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); -// if (e_mode!=0) { -// gsl_vector_set_zero (ab); -// Calcab (W, y, ab); -// } FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0}; @@ -2003,10 +2089,10 @@ void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix return; } - -//obtain REMLE estimate for PVE using lambda_remle -void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, const double trace_G, double &pve, double &pve_se) -{ +// Obtain REMLE estimate for PVE using lambda_remle. +void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_vector *Uty, const double lambda, + const double trace_G, double &pve, double &pve_se) { size_t n_cvt=UtW->size2, ni_test=UtW->size1; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; @@ -2015,10 +2101,6 @@ void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *U gsl_matrix_set_zero (Uab); CalcUab (UtW, Uty, Uab); - // if (e_mode!=0) { - // gsl_vector_set_zero (ab); - // Calcab (W, y, ab); - // } FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0}; @@ -2032,11 +2114,13 @@ void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *U return; } -//obtain REML estimate for Vg and Ve using lambda_remle -//obtain beta and se(beta) for coefficients -//ab is not used when e_mode==0 -void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, double &vg, double &ve, gsl_vector *beta, gsl_vector *se_beta) -{ +// Obtain REML estimate for Vg and Ve using lambda_remle. +// Obtain beta and se(beta) for coefficients. +// ab is not used when e_mode==0. +void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_vector *Uty, const double lambda, + double &vg, double &ve, gsl_vector *beta, + gsl_vector *se_beta) { size_t n_cvt=UtW->size2, ni_test=UtW->size1; size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; @@ -2059,7 +2143,7 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v gsl_vector_add_constant (v_temp, 1.0); gsl_vector_div (Hi_eval, v_temp); - //calculate beta + // Calculate beta. gsl_matrix_memcpy (HiW, UtW); for (size_t i=0; i<UtW->size2; i++) { gsl_vector_view HiW_col=gsl_matrix_column(HiW, i); @@ -2074,7 +2158,7 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v LUSolve (WHiW, pmt, WHiy, beta); LUInvert (WHiW, pmt, Vbeta); - //calculate vg and ve + // Calculate vg and ve. CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab); size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); @@ -2083,12 +2167,12 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v ve=P_yy/(double)(ni_test-n_cvt); vg=ve*lambda; - //with ve, calculate se(beta) + // With ve, calculate se(beta). gsl_matrix_scale(Vbeta, ve); - //obtain se_beta + // Obtain se_beta. for (size_t i=0; i<Vbeta->size1; i++) { - gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i) ) ); + gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta,i,i))); } gsl_matrix_free(Uab); @@ -2105,29 +2189,28 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v return; } - - - - - - -void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env) -{ +void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const gsl_vector *env) { igzstream infile (file_geno.c_str(), igzstream::in); -// ifstream infile (file_geno.c_str(), ifstream::in); - if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;} + if (!infile) { + cout<<"error reading genotype file:"<<file_geno<<endl; + return; + } clock_t time_start=clock(); string line; char *ch_ptr; - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; + double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; + double p_lrt=0, p_score=0; double logl_H1=0.0, logl_H0=0.0; int n_miss, c_phen; double geno, x_mean; - //Calculate basic quantities + // Calculate basic quantities. size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2; gsl_vector *x=gsl_vector_alloc (U->size1); @@ -2137,24 +2220,21 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g gsl_vector *ab=gsl_vector_alloc (n_index); gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2); - gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); + gsl_matrix_view UtW_expand_mat= + gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW); - gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2); + gsl_vector_view UtW_expand_env= + gsl_matrix_column(UtW_expand, UtW->size2); gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); - gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1); - - //gsl_matrix_set_zero (Uab); - // CalcUab (UtW, Uty, Uab); -// if (e_mode!=0) { -// gsl_vector_set_zero (ab); -// Calcab (W, y, ab); -// } - - //start reading genotypes and analyze + gsl_vector_view UtW_expand_x= + gsl_matrix_column(UtW_expand, UtW->size2+1); + + // Start reading genotypes and analyze. for (size_t t=0; t<indicator_snp.size(); ++t) { -// if (t>1) {break;} !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);} + if (t%d_pace==0 || t==(ns_total-1)) { + ProgressBar ("Reading SNPs ", t, ns_total-1); + } if (indicator_snp[t]==0) {continue;} ch_ptr=strtok ((char *)line.c_str(), " , \t"); @@ -2167,7 +2247,10 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g ch_ptr=strtok (NULL, " , \t"); if (indicator_idv[i]==0) {continue;} - if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;} + if (strcmp(ch_ptr, "NA")==0) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { geno=atof(ch_ptr); @@ -2181,17 +2264,19 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g x_mean/=(double)(ni_test-n_miss); for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);} + if (gsl_vector_get (x_miss, i)==0) { + gsl_vector_set(x, i, x_mean); + } geno=gsl_vector_get(x, i); if (x_mean>1) { gsl_vector_set(x, i, 2-geno); } } - - //calculate statistics + // Calculate statistics. time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector); + gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, + &UtW_expand_x.vector); gsl_vector_mul (x, env); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); @@ -2201,29 +2286,29 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g if (a_mode==2 || a_mode==4) { FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0}; - CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0); + CalcLambda ('L', param0, l_min, l_max, n_region, + lambda_mle, logl_H0); } CalcUab(UtW_expand, Uty, Utx, Uab); -// if (e_mode!=0) { -// Calcab (W, y, x, ab); -// } time_start=clock(); FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0}; - //3 is before 1 + // 3 is before 1. if (a_mode==3 || a_mode==4) { CalcRLScore (l_mle_null, param1, beta, se, p_score); } if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); + CalcLambda ('R', param1, l_min, l_max, n_region, + lambda_remle, logl_H1); CalcRLWald (lambda_remle, param1, beta, se, p_wald); } if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + CalcLambda ('L', param1, l_min, l_max, n_region, + lambda_mle, logl_H1); p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); } @@ -2231,10 +2316,11 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //store summary data - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + // Store summary data. + SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; sumStat.push_back(SNPs); - } + } cout<<endl; gsl_vector_free (x); @@ -2251,14 +2337,10 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g return; } - - - - - - -void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env) -{ +void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const gsl_vector *env) { string file_bed=file_bfile+".bed"; ifstream infile (file_bed.c_str(), ios::binary); if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;} @@ -2268,12 +2350,13 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gs char ch[1]; bitset<8> b; - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; + double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; + double p_lrt=0, p_score=0; double logl_H1=0.0, logl_H0=0.0; int n_bit, n_miss, ci_total, ci_test; double geno, x_mean; - //Calculate basic quantities + // Calculate basic quantities. size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2; gsl_vector *x=gsl_vector_alloc (U->size1); @@ -2282,56 +2365,64 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gs gsl_vector *ab=gsl_vector_alloc (n_index); gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2); - gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); + gsl_matrix_view UtW_expand_mat= + gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW); - gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2); + gsl_vector_view UtW_expand_env= + gsl_matrix_column(UtW_expand, UtW->size2); gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); - gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1); - - //gsl_matrix_set_zero (Uab); - //CalcUab (UtW, Uty, Uab); -// if (e_mode!=0) { -// gsl_vector_set_zero (ab); -// Calcab (W, y, ab); -// } + gsl_vector_view UtW_expand_x= + gsl_matrix_column(UtW_expand, UtW->size2+1); - //calculate n_bit and c, the number of bit for each snp + // Calculate n_bit and c, the number of bit for each SNP. if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1; } - //print the first three majic numbers + // Print the first three magic numbers. for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; } - for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);} - if (indicator_snp[t]==0) {continue;} + if (t%d_pace==0 || t==snpInfo.size()-1) { + ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); + } + if (indicator_snp[t]==0) {continue;} - infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers + // n_bit, and 3 is the number of magic numbers + infile.seekg(t*n_bit+3); - //read genotypes + // Read genotypes. x_mean=0.0; n_miss=0; ci_total=0; ci_test=0; for (int i=0; i<n_bit; ++i) { infile.read(ch,1); b=ch[0]; - for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0; - if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;} - if (indicator_idv[ci_total]==0) {ci_total++; continue;} - - if (b[2*j]==0) { - if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; } - else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } - } - else { - if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } - else {gsl_vector_set(x, ci_test, -9); n_miss++; } - } - ci_total++; - ci_test++; + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j=0; j<4; ++j) { + if ((i==(n_bit-1)) && ci_total==(int)ni_total) { + break; + } + if (indicator_idv[ci_total]==0) { + ci_total++; + continue; + } + + if (b[2*j]==0) { + if (b[2*j+1]==0) { + gsl_vector_set(x, ci_test, 2); + x_mean+=2.0; + } + else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } + } + else { + if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } + else {gsl_vector_set(x, ci_test, -9); n_miss++; } + } + + ci_total++; + ci_test++; } } @@ -2339,15 +2430,19 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gs for (size_t i=0; i<ni_test; ++i) { geno=gsl_vector_get(x,i); - if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;} + if (geno==-9) { + gsl_vector_set(x, i, x_mean); + geno=x_mean; + } if (x_mean>1) { - gsl_vector_set(x, i, 2-geno); + gsl_vector_set(x, i, 2-geno); } } - //calculate statistics + // Calculate statistics. time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector); + gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, + &UtW_expand_x.vector); gsl_vector_mul (x, env); gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); @@ -2357,39 +2452,39 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gs if (a_mode==2 || a_mode==4) { FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0}; - CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0); + CalcLambda ('L', param0, l_min, l_max, n_region, + lambda_mle, logl_H0); } CalcUab(UtW_expand, Uty, Utx, Uab); -// if (e_mode!=0) { -// Calcab (W, y, x, ab); -// } - time_start=clock(); FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0}; - //3 is before 1, for beta + // 3 is before 1, for beta. if (a_mode==3 || a_mode==4) { CalcRLScore (l_mle_null, param1, beta, se, p_score); } if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); - CalcRLWald (lambda_remle, param1, beta, se, p_wald); + CalcLambda ('R', param1, l_min, l_max, n_region, + lambda_remle, logl_H1); + CalcRLWald (lambda_remle, param1, beta, se, p_wald); } if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); + CalcLambda ('L', param1, l_min, l_max, n_region, + lambda_mle, logl_H1); + p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); } if (x_mean>1) {beta*=-1;} time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //store summary data - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + // Store summary data. + SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, + p_lrt, p_score}; sumStat.push_back(SNPs); } cout<<endl; diff --git a/src/param.cpp b/src/param.cpp index 0a1d721..a7be178 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -208,7 +208,6 @@ void PARAM::ReadFiles (void) { ns_total=indicator_snp.size(); } - // Read genotype and phenotype file for PLINK format. if (!file_bfile.empty()) { file_str=file_bfile+".bim"; @@ -348,8 +347,6 @@ void PARAM::ReadFiles (void) { infile.clear(); } - - // Read genotype and phenotype file for multiple BIMBAM files. if (!file_mgeno.empty()) { @@ -403,8 +400,6 @@ void PARAM::ReadFiles (void) { infile.clear(); } - - if (!file_gene.empty()) { if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} @@ -433,7 +428,6 @@ void PARAM::ReadFiles (void) { } } - // Read is after gene file. if (!file_read.empty() ) { if (ReadFile_column (file_read, indicator_read, @@ -471,8 +465,7 @@ void PARAM::ReadFiles (void) { return; } -void PARAM::CheckParam (void) -{ +void PARAM::CheckParam (void) { struct stat fileInfo; string str; @@ -482,15 +475,49 @@ void PARAM::CheckParam (void) k_mode<<endl; error=true; } - if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 && a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 && a_mode!=71) - {cout<<"error! unknown analysis mode: "<<a_mode<<". make sure -gk or -eigen or -lmm or -bslmm -predict or -calccov is sepcified correctly."<<endl; error=true;} - if (miss_level>1) {cout<<"error! missing level needs to be between 0 and 1. current value = "<<miss_level<<endl; error=true;} - if (maf_level>0.5) {cout<<"error! maf level needs to be between 0 and 0.5. current value = "<<maf_level<<endl; error=true;} - if (hwe_level>1) {cout<<"error! hwe level needs to be between 0 and 1. current value = "<<hwe_level<<endl; error=true;} - if (r2_level>1) {cout<<"error! r2 level needs to be between 0 and 1. current value = "<<r2_level<<endl; error=true;} + if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 + && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && + a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 && + a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && + a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && + a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && + a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 && + a_mode!=71) { + cout<<"error! unknown analysis mode: "<<a_mode<< + ". make sure -gk or -eigen or -lmm or -bslmm -predict or " << + "-calccov is sepcified correctly."<<endl; + error=true; + } + if (miss_level>1) { + cout<<"error! missing level needs to be between 0 and 1. " << + "current value = "<<miss_level<<endl; + error=true; + } + if (maf_level>0.5) { + cout<<"error! maf level needs to be between 0 and 0.5. " << + "current value = "<<maf_level<<endl; + error=true; + } + if (hwe_level>1) { + cout<<"error! hwe level needs to be between 0 and 1. " << + "current value = "<<hwe_level<<endl; + error=true; + } + if (r2_level>1) { + cout<<"error! r2 level needs to be between 0 and 1. " << + "current value = "<<r2_level<<endl; + error=true; + } - if (l_max<l_min) {cout<<"error! maximum lambda value must be larger than the minimal value. current values = "<<l_max<<" and "<<l_min<<endl; error=true;} - if (h_max<h_min) {cout<<"error! maximum h value must be larger than the minimal value. current values = "<<h_max<<" and "<<h_min<<endl; error=true;} + if (l_max<l_min) { + cout<<"error! maximum lambda value must be larger than the " << + "minimal value. current values = "<<l_max<<" and "<<l_min<<endl; + error=true; + } + if (h_max<h_min) { + cout<<"error! maximum h value must be larger than the minimal value. current values = "<<h_max<<" and "<<h_min<<endl; + error=true; + } if (s_max<s_min) {cout<<"error! maximum s value must be larger than the minimal value. current values = "<<s_max<<" and "<<s_min<<endl; error=true;} if (rho_max<rho_min) {cout<<"error! maximum rho value must be larger than the minimal value. current values = "<<rho_max<<" and "<<rho_min<<endl; error=true;} if (logp_max<logp_min) {cout<<"error! maximum logp value must be larger than the minimal value. current values = "<<logp_max/log(10)<<" and "<<logp_min/log(10)<<endl; error=true;} |