aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gemma.cpp923
-rw-r--r--src/lmm.cpp1373
-rw-r--r--src/param.cpp59
3 files changed, 1249 insertions, 1106 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp
index bc29e9d..11b33c1 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -1,6 +1,6 @@
/*
- Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011 Xiang Zhou
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011-2017, Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -13,7 +13,7 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <iostream>
@@ -31,22 +31,7 @@
#include "gsl/gsl_eigen.h"
#include "gsl/gsl_cdf.h"
-#include "lapack.h" //for functions EigenDecomp
-
-#ifdef FORCE_FLOAT
-#include "io_float.h" //for function ReadFile_kin
-#include "gemma_float.h"
-#include "vc_float.h"
-#include "lm_float.h" //for LM class
-#include "bslmm_float.h" //for BSLMM class
-#include "bslmmdap_float.h" //for BSLMMDAP class
-#include "ldr_float.h" //for LDR class
-#include "lmm_float.h" //for LMM class, and functions CalcLambda, CalcPve, CalcVgVe
-#include "mvlmm_float.h" //for MVLMM class
-#include "prdt_float.h" //for PRDT class
-#include "varcov_float.h" //for MVLMM class
-#include "mathfunc_float.h" //for a few functions
-#else
+#include "lapack.h"
#include "io.h"
#include "gemma.h"
#include "vc.h"
@@ -59,410 +44,440 @@
#include "prdt.h"
#include "varcov.h"
#include "mathfunc.h"
-#endif
-
using namespace std;
-
-
GEMMA::GEMMA(void):
version("0.96"), date("05/17/2017"), year("2017")
{}
-void GEMMA::PrintHeader (void)
-{
- cout<<endl;
- cout<<"*********************************************************"<<endl;
- cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl;
- cout<<" Version "<<version<<", "<<date<<" "<<endl;
- cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl;
- cout<<" (C) "<<year<<" Xiang Zhou "<<endl;
- cout<<" GNU General Public License "<<endl;
- cout<<" For Help, Type ./gemma -h "<<endl;
- cout<<"*********************************************************"<<endl;
- cout<<endl;
-
- return;
+void GEMMA::PrintHeader (void) {
+ cout<<endl;
+ cout<<"*********************************************************"<<endl;
+ cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl;
+ cout<<" Version "<<version<<", "<<date<<" "<<
+ endl;
+ cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl;
+ cout<<" (C) "<<year<<" Xiang Zhou "<<endl;
+ cout<<" GNU General Public License "<<endl;
+ cout<<" For Help, Type ./gemma -h "<<endl;
+ cout<<"*********************************************************"<<endl;
+ cout<<endl;
+
+ return;
}
-
-void GEMMA::PrintLicense (void)
-{
+void GEMMA::PrintLicense (void) {
cout<<endl;
- cout<<"The Software Is Distributed Under GNU General Public License, But May Also Require The Following Notifications."<<endl;
+ cout<<"The Software Is Distributed Under GNU General Public "<<
+ "License, But May Also Require The Following Notifications."<<endl;
cout<<endl;
- cout<<"Including Lapack Routines In The Software May Require The Following Notification:"<<endl;
- cout<<"Copyright (c) 1992-2010 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved."<<endl;
- cout<<"Copyright (c) 2000-2010 The University of California Berkeley. All rights reserved."<<endl;
- cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. All rights reserved."<<endl;
+ cout<<"Including Lapack Routines In The Software May Require"<<
+ " The Following Notification:"<<endl;
+ cout<<"Copyright (c) 1992-2010 The University of Tennessee and "<<
+ "The University of Tennessee Research Foundation. All rights "<<
+ "reserved."<<endl;
+ cout<<"Copyright (c) 2000-2010 The University of California "<<
+ "Berkeley. All rights reserved."<<endl;
+ cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. "<<
+ "All rights reserved."<<endl;
cout<<endl;
cout<<"$COPYRIGHT$"<<endl;
cout<<"Additional copyrights may follow"<<endl;
cout<<"$HEADER$"<<endl;
- cout<<"Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:"<<endl;
- cout<<"- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer."<<endl;
- cout<<"- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution."<<endl;
- cout<<"- Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission."<<endl;
- cout<<"The copyright holders provide no reassurances that the source code provided does not infringe any patent, copyright, or any other "
- <<"intellectual property rights of third parties. The copyright holders disclaim any liability to any recipient for claims brought against "
- <<"recipient by any third party for infringement of that parties intellectual property rights. "<<endl;
- cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT "
- <<"LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT "
- <<"OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT "
- <<"LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY "
- <<"THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE "
- <<"OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."<<endl;
+ cout<<"Redistribution and use in source and binary forms, with or "<<
+ "without modification, are permitted provided that the following "<<
+ " conditions are met:"<<endl;
+ cout<<"- Redistributions of source code must retain the above "<<
+ "copyright notice, this list of conditions and the following "<<
+ "disclaimer."<<endl;
+ cout<<"- Redistributions in binary form must reproduce the above "<<
+ "copyright notice, this list of conditions and the following "<<
+ "disclaimer listed in this license in the documentation and/or "<<
+ "other materials provided with the distribution."<<endl;
+ cout<<"- Neither the name of the copyright holders nor the names "<<
+ "of its contributors may be used to endorse or promote products "<<
+ "derived from this software without specific prior written "<<
+ "permission."<<endl;
+ cout<<"The copyright holders provide no reassurances that the "<<
+ "source code provided does not infringe any patent, copyright, "<<
+ "or any other "<<
+ "intellectual property rights of third parties. "<<
+ "The copyright holders disclaim any liability to any recipient "<<
+ "for claims brought against "<<
+ "recipient by any third party for infringement of that parties "<<
+ "intellectual property rights. "<<endl;
+ cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "<<
+ "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "<<
+ "INCLUDING, BUT NOT "<<
+ "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "<<
+ "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "<<
+ "SHALL THE COPYRIGHT "<<
+ "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "<<
+ "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "<<
+ "(INCLUDING, BUT NOT "<<
+ "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "<<
+ "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "<<
+ "HOWEVER CAUSED AND ON ANY "<<
+ "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "<<
+ "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "<<
+ "OUT OF THE USE "<<
+ "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "<<
+ "SUCH DAMAGE."<<endl;
cout<<endl;
-
-
return;
}
-
-
-void GEMMA::PrintHelp(size_t option)
-{
- if (option==0) {
- cout<<endl;
- cout<<" GEMMA version "<<version<<", released on "<<date<<endl;
- cout<<" implemented by Xiang Zhou"<<endl;
- cout<<endl;
- cout<<" type ./gemma -h [num] for detailed helps"<<endl;
- cout<<" options: " << endl;
- cout<<" 1: quick guide"<<endl;
- cout<<" 2: file I/O related"<<endl;
- cout<<" 3: SNP QC"<<endl;
- cout<<" 4: calculate relatedness matrix"<<endl;
- cout<<" 5: perform eigen decomposition"<<endl;
- cout<<" 6: perform variance component estimation"<<endl;
- cout<<" 7: fit a linear model"<<endl;
- cout<<" 8: fit a linear mixed model"<<endl;
- cout<<" 9: fit a multivariate linear mixed model"<<endl;
- cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl;
- cout<<" 11: obtain predicted values"<<endl;
- cout<<" 12: calculate snp variance covariance"<<endl;
- cout<<" 13: note"<<endl;
- cout<<endl;
- }
-
- if (option==1) {
- cout<<" QUICK GUIDE" << endl;
- cout<<" to generate a relatedness matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl;
- cout<<" to generate the S matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl;
- cout<<" to generate the q vector: "<<endl;
- cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl;
- cout<<" to generate the ldsc weigthts: "<<endl;
- cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl;
- cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl;
- cout<<" to estimate variance components: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" options for the above two commands: -crt -windowbp [num]"<<endl;
- cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl;
- cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl;
- cout<<" to fit a linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a linear mixed model to test g by e effects: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl;
- cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a multivariate linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
- cout<<" to fit a Bayesian sparse linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl;
- cout<<" to obtain predicted values: "<<endl;
- cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
- cout<<" to calculate correlations between SNPs: "<<endl;
- cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl;
- cout<<endl;
- }
-
- if (option==2) {
- cout<<" FILE I/O RELATED OPTIONS" << endl;
- cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl;
- cout<<" requires: *.fam, *.bim and *.bed files"<<endl;
- cout<<" missing value: -9"<<endl;
- cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl;
- cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
- cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl;
- cout<<" format: phenotype for individual 1"<<endl;
- cout<<" phenotype for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl;
- cout<<" format: rs#1, base_position, chr_number"<<endl;
- cout<<" rs#2, base_position, chr_number"<<endl;
- cout<<" ..."<<endl;
- // WJA added
- cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl;
- cout<<" requires: *.bgen, *.sample files"<<endl;
-
- cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl;
- cout<<" format: variable for individual 1"<<endl;
- cout<<" variable for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl;
- cout<<" format: variable for individual 1"<<endl;
- cout<<" variable for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl;
- cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl;
- cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl;
- cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl;
- cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl;
- cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl;
- cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl;
- cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" note: the intercept (a column of 1s) may need to be included"<<endl;
- cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl;
- cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl;
- cout<<" options: n1: rs column number"<<endl;
- cout<<" n2: estimated alpha column number (0 to ignore)"<<endl;
- cout<<" n3: estimated beta column number (0 to ignore)"<<endl;
- cout<<" n4: estimated gamma column number (0 to ignore)"<<endl;
- cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl;
- cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl;
- cout<<" format: value for individual 1"<<endl;
- cout<<" value for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl;
- cout<<" -mu [num] "<<" specify input estimated mean value"<<endl;
- cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl;
- cout<<" format: header"<<endl;
- cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl;
- cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: not allowed"<<endl;
- cout<<" -r [filename] "<<" specify input total read count file name"<<endl;
- cout<<" format: total read count for individual 1"<<endl;
- cout<<" total read count for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl;
- cout<<" format: rs#1"<<endl;
- cout<<" rs#2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -silence "<<" silent terminal display"<<endl;
- cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl;
- cout<<" options: 1: \"n by n matrix\" format"<<endl;
- cout<<" 2: \"id id value\" format"<<endl;
- cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl;
- cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl;
- cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl;
- cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl;
- cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl;
- cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl;
- cout<<endl;
- }
-
- if (option==3) {
- cout<<" SNP QC OPTIONS" << endl;
- cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl;
- cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl;
- cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl;
- cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl;
- cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl;
- cout<<endl;
- }
-
- if (option==4) {
- cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
- cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl;
- cout<<" options: 1: centered XX^T/p"<<endl;
- cout<<" 2: standardized XX^T/p"<<endl;
- cout<<" note: non-polymorphic SNPs are excluded "<<endl;
- cout<<endl;
- }
-
- if (option==5) {
- cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl;
- cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl;
- cout<<endl;
- }
-
- if (option==6) {
- cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
- cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl;
- cout<<" options (with kinship file): 1: HE regression (default)"<<endl;
- cout<<" 2: REML"<<endl;
- cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl;
- cout<<" 2: Standardized genotypes"<<endl;
- cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl;
- cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl;
- cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl;
- cout<<endl;
- }
-
- if (option==7) {
- cout<<" LINEAR MODEL OPTIONS" << endl;
- cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: Wald test"<<endl;
- cout<<" 2: Likelihood ratio test"<<endl;
- cout<<" 3: Score test"<<endl;
- cout<<" 4: 1-3"<<endl;
- cout<<endl;
- }
-
- if (option==8) {
- cout<<" LINEAR MIXED MODEL OPTIONS" << endl;
- cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: Wald test"<<endl;
- cout<<" 2: Likelihood ratio test"<<endl;
- cout<<" 3: Score test"<<endl;
- cout<<" 4: 1-3"<<endl;
- cout<<" 5: Parameter estimation in the null model only"<<endl;
- cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl;
- cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl;
- cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl;
- cout<<endl;
- }
-
- if (option==9) {
- cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
- cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl;
- cout<<" -emi "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl;
- cout<<" -nri "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl;
- cout<<" -emp "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl;
- cout<<" -nrp "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl;
- cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl;
- cout<<endl;
- }
-
- if (option==10) {
- cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl;
- cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: BSLMM"<<endl;
- cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl;
- cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
- cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl;
- cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl;
-
- cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: LDR"<<endl;
-
- cout<<" MCMC OPTIONS" << endl;
- cout<<" Prior" << endl;
- cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl;
- cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl;
- cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl;
- cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl;
- cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl;
- cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl;
- cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl;
- cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl;
-
- cout<<" Proposal" << endl;
- cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl;
- cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
- cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
- cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl;
-
- cout<<" Others" << endl;
- cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl;
- cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl;
- cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl;
- cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl;
- cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl;
- cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl;
- cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl;
- cout<<endl;
- }
-
- if (option==11) {
- cout<<" PREDICTION OPTIONS" << endl;
- cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl;
- cout<<" options: 1: predict for individuals with missing phenotypes"<<endl;
- cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl;
- cout<<endl;
- }
-
- if (option==12) {
- cout<<" CALC CORRELATION OPTIONS" << endl;
- cout<<" -calccor "<<endl;
- cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl;
- cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl;
- cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl;
- cout<<endl;
- }
-
- if (option==13) {
- cout<<" NOTE"<<endl;
- cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl;
- cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl;
- cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl;
- cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl;
- cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl;
- cout<<endl;
- }
-
- return;
+void GEMMA::PrintHelp(size_t option) {
+ if (option==0) {
+ cout<<endl;
+ cout<<" GEMMA version "<<version<<", released on "<<date<<endl;
+ cout<<" implemented by Xiang Zhou"<<endl;
+ cout<<endl;
+ cout<<" type ./gemma -h [num] for detailed helps"<<endl;
+ cout<<" options: " << endl;
+ cout<<" 1: quick guide"<<endl;
+ cout<<" 2: file I/O related"<<endl;
+ cout<<" 3: SNP QC"<<endl;
+ cout<<" 4: calculate relatedness matrix"<<endl;
+ cout<<" 5: perform eigen decomposition"<<endl;
+ cout<<" 6: perform variance component estimation"<<endl;
+ cout<<" 7: fit a linear model"<<endl;
+ cout<<" 8: fit a linear mixed model"<<endl;
+ cout<<" 9: fit a multivariate linear mixed model"<<endl;
+ cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl;
+ cout<<" 11: obtain predicted values"<<endl;
+ cout<<" 12: calculate snp variance covariance"<<endl;
+ cout<<" 13: note"<<endl;
+ cout<<endl;
+ }
+
+ if (option==1) {
+ cout<<" QUICK GUIDE" << endl;
+ cout<<" to generate a relatedness matrix: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl;
+ cout<<" to generate the S matrix: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl;
+ cout<<" to generate the q vector: "<<endl;
+ cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl;
+ cout<<" to generate the ldsc weigthts: "<<endl;
+ cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl;
+ cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl;
+ cout<<" to estimate variance components: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" options for the above two commands: -crt -windowbp [num]"<<endl;
+ cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl;
+ cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl;
+ cout<<" to fit a linear mixed model: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" to fit a linear mixed model to test g by e effects: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" to fit a multivariate linear mixed model: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
+ cout<<" to fit a Bayesian sparse linear mixed model: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl;
+ cout<<" to obtain predicted values: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
+ cout<<" to calculate correlations between SNPs: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl;
+ cout<<endl;
+ }
+
+ if (option==2) {
+ cout<<" FILE I/O RELATED OPTIONS" << endl;
+ cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl;
+ cout<<" requires: *.fam, *.bim and *.bed files"<<endl;
+ cout<<" missing value: -9"<<endl;
+ cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl;
+ cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
+ cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl;
+ cout<<" format: phenotype for individual 1"<<endl;
+ cout<<" phenotype for individual 2"<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl;
+ cout<<" format: rs#1, base_position, chr_number"<<endl;
+ cout<<" rs#2, base_position, chr_number"<<endl;
+ cout<<" ..."<<endl;
+
+ // WJA added.
+ cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl;
+ cout<<" requires: *.bgen, *.sample files"<<endl;
+
+ cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl;
+ cout<<" format: variable for individual 1"<<endl;
+ cout<<" variable for individual 2"<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl;
+ cout<<" format: variable for individual 1"<<endl;
+ cout<<" variable for individual 2"<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl;
+ cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl;
+ cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl;
+ cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl;
+ cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl;
+ cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl;
+ cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl;
+ cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" note: the intercept (a column of 1s) may need to be included"<<endl;
+ cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl;
+ cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl;
+ cout<<" options: n1: rs column number"<<endl;
+ cout<<" n2: estimated alpha column number (0 to ignore)"<<endl;
+ cout<<" n3: estimated beta column number (0 to ignore)"<<endl;
+ cout<<" n4: estimated gamma column number (0 to ignore)"<<endl;
+ cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl;
+ cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl;
+ cout<<" format: value for individual 1"<<endl;
+ cout<<" value for individual 2"<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl;
+ cout<<" -mu [num] "<<" specify input estimated mean value"<<endl;
+ cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl;
+ cout<<" format: header"<<endl;
+ cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl;
+ cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: not allowed"<<endl;
+ cout<<" -r [filename] "<<" specify input total read count file name"<<endl;
+ cout<<" format: total read count for individual 1"<<endl;
+ cout<<" total read count for individual 2"<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl;
+ cout<<" format: rs#1"<<endl;
+ cout<<" rs#2"<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -silence "<<" silent terminal display"<<endl;
+ cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl;
+ cout<<" options: 1: \"n by n matrix\" format"<<endl;
+ cout<<" 2: \"id id value\" format"<<endl;
+ cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl;
+ cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl;
+ cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl;
+ cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl;
+ cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl;
+ cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl;
+ cout<<endl;
+ }
+
+ if (option==3) {
+ cout<<" SNP QC OPTIONS" << endl;
+ cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl;
+ cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl;
+ cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl;
+ cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl;
+ cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl;
+ cout<<endl;
+ }
+
+ if (option==4) {
+ cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
+ cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl;
+ cout<<" options: 1: centered XX^T/p"<<endl;
+ cout<<" 2: standardized XX^T/p"<<endl;
+ cout<<" note: non-polymorphic SNPs are excluded "<<endl;
+ cout<<endl;
+ }
+
+ if (option==5) {
+ cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl;
+ cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl;
+ cout<<endl;
+ }
+
+ if (option==6) {
+ cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
+ cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl;
+ cout<<" options (with kinship file): 1: HE regression (default)"<<endl;
+ cout<<" 2: REML"<<endl;
+ cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl;
+ cout<<" 2: Standardized genotypes"<<endl;
+ cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl;
+ cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl;
+ cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl;
+ cout<<endl;
+ }
+
+ if (option==7) {
+ cout<<" LINEAR MODEL OPTIONS" << endl;
+ cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl;
+ cout<<" options: 1: Wald test"<<endl;
+ cout<<" 2: Likelihood ratio test"<<endl;
+ cout<<" 3: Score test"<<endl;
+ cout<<" 4: 1-3"<<endl;
+ cout<<endl;
+ }
+
+ if (option==8) {
+ cout<<" LINEAR MIXED MODEL OPTIONS" << endl;
+ cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl;
+ cout<<" options: 1: Wald test"<<endl;
+ cout<<" 2: Likelihood ratio test"<<endl;
+ cout<<" 3: Score test"<<endl;
+ cout<<" 4: 1-3"<<endl;
+ cout<<" 5: Parameter estimation in the null model only"<<endl;
+ cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl;
+ cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl;
+ cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl;
+ cout<<endl;
+ }
+
+ if (option==9) {
+ cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
+ cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl;
+ cout<<" -emi "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl;
+ cout<<" -nri "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl;
+ cout<<" -emp "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl;
+ cout<<" -nrp "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl;
+ cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl;
+ cout<<endl;
+ }
+
+ if (option==10) {
+ cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl;
+ cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl;
+ cout<<" options: 1: BSLMM"<<endl;
+ cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl;
+ cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
+ cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl;
+ cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl;
+
+ cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl;
+ cout<<" options: 1: LDR"<<endl;
+
+ cout<<" MCMC OPTIONS" << endl;
+ cout<<" Prior" << endl;
+ cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl;
+ cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl;
+ cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl;
+ cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl;
+ cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl;
+ cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl;
+ cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl;
+ cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl;
+
+ cout<<" Proposal" << endl;
+ cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl;
+ cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
+ cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
+ cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl;
+
+ cout<<" Others" << endl;
+ cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl;
+ cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl;
+ cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl;
+ cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl;
+ cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl;
+ cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl;
+ cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl;
+ cout<<endl;
+ }
+
+ if (option==11) {
+ cout<<" PREDICTION OPTIONS" << endl;
+ cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl;
+ cout<<" options: 1: predict for individuals with missing phenotypes"<<endl;
+ cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl;
+ cout<<endl;
+ }
+
+ if (option==12) {
+ cout<<" CALC CORRELATION OPTIONS" << endl;
+ cout<<" -calccor "<<endl;
+ cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl;
+ cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl;
+ cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl;
+ cout<<endl;
+ }
+
+ if (option==13) {
+ cout<<" NOTE"<<endl;
+ cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl;
+ cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl;
+ cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl;
+ cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl;
+ cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl;
+ cout<<endl;
+ }
+
+ return;
}
-//options
-//gk: 21-22
-//gs: 25-26
-//gq: 27-28
-//eigen: 31-32
-//lmm: 1-5
-//bslmm: 11-15
-//predict: 41-43
-//lm: 51
-//vc: 61
-//ci: 66-67
-//calccor: 71
-//gw: 72
-
-void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
-{
+// OPTIONS
+// -------
+// gk: 21-22
+// gs: 25-26
+// gq: 27-28
+// eigen: 31-32
+// lmm: 1-5
+// bslmm: 11-15
+// predict: 41-43
+// lm: 51
+// vc: 61
+// ci: 66-67
+// calccor: 71
+// gw: 72
+
+void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) {
string str;
for(int i = 1; i < argc; i++) {
- if (strcmp(argv[i], "-bfile")==0 || strcmp(argv[i], "--bfile")==0 || strcmp(argv[i], "-b")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if (strcmp(argv[i], "-bfile")==0 ||
+ strcmp(argv[i], "--bfile")==0 ||
+ strcmp(argv[i], "-b")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_bfile=str;
}
- else if (strcmp(argv[i], "-mbfile")==0 || strcmp(argv[i], "--mbfile")==0 || strcmp(argv[i], "-mb")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ else if (strcmp(argv[i], "-mbfile")==0 ||
+ strcmp(argv[i], "--mbfile")==0 ||
+ strcmp(argv[i], "-mb")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
@@ -472,113 +487,148 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
cPar.mode_silence=true;
}
else if (strcmp(argv[i], "-g")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_geno=str;
}
else if (strcmp(argv[i], "-mg")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_mgeno=str;
}
else if (strcmp(argv[i], "-p")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_pheno=str;
}
else if (strcmp(argv[i], "-a")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_anno=str;
}
- // WJA added
- else if (strcmp(argv[i], "-oxford")==0 || strcmp(argv[i], "--oxford")==0 || strcmp(argv[i], "-x")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+
+ // WJA added.
+ else if (strcmp(argv[i], "-oxford")==0 ||
+ strcmp(argv[i], "--oxford")==0 ||
+ strcmp(argv[i], "-x")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_oxford=str;
}
else if (strcmp(argv[i], "-gxe")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_gxe=str;
}
else if (strcmp(argv[i], "-widv")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_weight=str;
}
else if (strcmp(argv[i], "-wsnp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_wsnp=str;
}
else if (strcmp(argv[i], "-wcat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_wcat=str;
}
else if (strcmp(argv[i], "-k")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_kin=str;
}
else if (strcmp(argv[i], "-mk")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_mk=str;
}
else if (strcmp(argv[i], "-u")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_ku=str;
}
else if (strcmp(argv[i], "-d")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_kd=str;
}
else if (strcmp(argv[i], "-c")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_cvt=str;
}
else if (strcmp(argv[i], "-cat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
cPar.file_cat=str;
}
else if (strcmp(argv[i], "-mcat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {
+ continue;
+ }
++i;
str.clear();
str.assign(argv[i]);
@@ -956,16 +1006,6 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
str.assign(argv[i]);
cPar.a_mode=10+atoi(str.c_str());
}
- /*
- else if (strcmp(argv[i], "-ldr")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=14; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=13+atoi(str.c_str());
- }
- */
else if (strcmp(argv[i], "-hmin")==0) {
if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
++i;
@@ -1124,20 +1164,19 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
else {cout<<"error! unrecognized option: "<<argv[i]<<endl; cPar.error=true; continue;}
}
- //change prediction mode to 43, if the epm file is not provided
- if (cPar.a_mode==41 && cPar.file_epm.empty()) {cPar.a_mode=43;}
+ // Change prediction mode to 43 if the epm file is not provided.
+ if (cPar.a_mode==41 && cPar.file_epm.empty()) {
+ cPar.a_mode=43;
+ }
return;
}
-
-
-void GEMMA::BatchRun (PARAM &cPar)
-{
+void GEMMA::BatchRun (PARAM &cPar) {
clock_t time_begin, time_start;
time_begin=clock();
- //Read Files
+ // Read Files.
cout<<"Reading Files ... "<<endl;
cPar.ReadFiles();
if (cPar.error==true) {cout<<"error! fail to read files. "<<endl; return;}
@@ -1210,7 +1249,6 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_vector_free(y_prdt);
}
-
//Prediction with kinship matrix only; for one or more phenotypes
if (cPar.a_mode==43) {
//first, use individuals with full phenotypes to obtain estimates of Vg and Ve
@@ -1224,6 +1262,7 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph);
gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt);
+
//set covariates matrix W and phenotype matrix Y
//an intercept should be included in W,
cPar.CopyCvtPhen (W, Y, 0);
@@ -1401,8 +1440,8 @@ void GEMMA::BatchRun (PARAM &cPar)
cVarcov.CopyToParam(cPar);
}
-
- //Compute the S matrix (and its variance), that is used for variance component estimation using summary statistics
+ // Compute the S matrix (and its variance), that is used for
+ // variance component estimation using summary statistics.
if (cPar.a_mode==25 || cPar.a_mode==26) {
cout<<"Calculating the S Matrix ... "<<endl;
@@ -1440,22 +1479,7 @@ void GEMMA::BatchRun (PARAM &cPar)
cPar.WriteMatrix (S, "S");
cPar.WriteVector (ns, "size");
cPar.WriteVar ("snps");
- /*
- cout<<scientific;
- for (size_t i=0; i<cPar.n_vc; i++) {
- for (size_t j=0; j<cPar.n_vc; j++) {
- cout<<gsl_matrix_get(S, i, j)<<" ";
- }
- cout<<endl;
- }
- for (size_t i=cPar.n_vc; i<cPar.n_vc*2; i++) {
- for (size_t j=0; j<cPar.n_vc; j++) {
- cout<<gsl_matrix_get(S, i, j)<<" ";
- }
- cout<<endl;
- }
- */
gsl_matrix_free (S);
gsl_vector_free (ns);
@@ -1508,8 +1532,7 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_vector_free (s);
}
-
- //Calculate SNP covariance
+ // Calculate SNP covariance.
if (cPar.a_mode==71) {
VARCOV cVarcov;
cVarcov.CopyFromParam(cPar);
@@ -1522,9 +1545,8 @@ void GEMMA::BatchRun (PARAM &cPar)
cVarcov.CopyToParam(cPar);
}
-
-
- //LM
+
+ // LM.
if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM
gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
@@ -1573,7 +1595,6 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_matrix_free (W);
}
-
//VC estimation with one or multiple kinship matrices
//REML approach only
//if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp
@@ -1591,7 +1612,7 @@ void GEMMA::BatchRun (PARAM &cPar)
cPar.UpdateSNP (mapRS2wK);
- //setup matrices and vectors
+ // Setup matrices and vectors.
gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
diff --git a/src/lmm.cpp b/src/lmm.cpp
index a707534..73a9232 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -1,6 +1,6 @@
/*
- Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011 Xiang Zhou
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011-2017, Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -16,8 +16,6 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-
-
#include <iostream>
#include <fstream>
#include <sstream>
@@ -25,6 +23,7 @@
#include <iomanip>
#include <cmath>
#include <iostream>
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <bitset>
@@ -34,8 +33,6 @@
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
-
-
#include "gsl/gsl_cdf.h"
#include "gsl/gsl_roots.h"
#include "gsl/gsl_min.h"
@@ -45,22 +42,11 @@
#include "eigenlib.h"
#include "lapack.h"
#include "gzstream.h"
-
-#ifdef FORCE_FLOAT
-#include "lmm_float.h"
-#else
#include "lmm.h"
-#endif
-
using namespace std;
-
-
-
-
-void LMM::CopyFromParam (PARAM &cPar)
-{
+void LMM::CopyFromParam (PARAM &cPar) {
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
@@ -69,7 +55,8 @@ void LMM::CopyFromParam (PARAM &cPar)
file_out=cPar.file_out;
path_out=cPar.path_out;
file_gene=cPar.file_gene;
- // WJA added
+
+ // WJA added.
file_oxford=cPar.file_oxford;
l_min=cPar.l_min;
@@ -97,9 +84,7 @@ void LMM::CopyFromParam (PARAM &cPar)
return;
}
-
-void LMM::CopyToParam (PARAM &cPar)
-{
+void LMM::CopyToParam (PARAM &cPar) {
cPar.time_UtX=time_UtX;
cPar.time_opt=time_opt;
@@ -108,83 +93,120 @@ void LMM::CopyToParam (PARAM &cPar)
return;
}
-
-
-void LMM::WriteFiles ()
-{
+void LMM::WriteFiles () {
string file_str;
file_str=path_out+"/"+file_out;
file_str+=".assoc.txt";
ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+ if (!outfile) {
+ cout<<"error writing file: "<<file_str.c_str()<<endl;
+ return;
+ }
if (!file_gene.empty()) {
outfile<<"geneID"<<"\t";
if (a_mode==1) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"p_wald"<<endl;
+ outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<
+ "\t"<<"p_wald"<<endl;
} else if (a_mode==2) {
outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl;
} else if (a_mode==3) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
} else if (a_mode==4) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
+ outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<
+ "\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<
+ "\t"<<"p_score"<<endl;
} else {}
for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
outfile<<snpInfo[t].rs_number<<"\t";
if (a_mode==1) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].p_wald <<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<
+ sumStat[t].lambda_remle<<"\t"<<
+ sumStat[t].p_wald <<endl;
} else if (a_mode==2) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_lrt<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].lambda_mle<<"\t"<<
+ sumStat[t].p_lrt<<endl;
} else if (a_mode==3) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_score<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].p_score<<endl;
} else if (a_mode==4) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<
+ sumStat[t].lambda_remle<<"\t"<<
+ sumStat[t].lambda_mle<<"\t"<<
+ sumStat[t].p_wald <<"\t"<<
+ sumStat[t].p_lrt<<"\t"<<
+ sumStat[t].p_score<<endl;
} else {}
}
} else {
- outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
+ outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"
+ <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
if (a_mode==1) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"p_wald"<<endl;
+ outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"
+ <<"p_wald"<<endl;
} else if (a_mode==2) {
outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl;
} else if (a_mode==3) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
} else if (a_mode==4) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
+ outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"
+ <<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<
+ "\t"<<"p_score"<<endl;
} else {}
size_t t=0;
for (size_t i=0; i<snpInfo.size(); ++i) {
if (indicator_snp[i]==0) {continue;}
- outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
+ outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<
+ "\t"<<snpInfo[i].base_position<<"\t"<<
+ snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<
+ snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<
+ snpInfo[i].maf<<"\t";
if (a_mode==1) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].p_wald <<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].lambda_remle<<"\t"<<
+ sumStat[t].p_wald <<endl;
} else if (a_mode==2) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_lrt<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].lambda_mle<<"\t"<<
+ sumStat[t].p_lrt<<endl;
} else if (a_mode==3) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_score<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].p_score<<endl;
} else if (a_mode==4) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].lambda_remle<<"\t"<<
+ sumStat[t].lambda_mle<<"\t"<<
+ sumStat[t].p_wald <<"\t"<<
+ sumStat[t].p_lrt<<"\t"<<
+ sumStat[t].p_score<<endl;
} else {}
t++;
}
}
-
outfile.close();
outfile.clear();
return;
}
-void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, const gsl_matrix *Uab, const gsl_vector *ab, gsl_matrix *Pab)
-{
+void CalcPab (const size_t n_cvt, const size_t e_mode,
+ const gsl_vector *Hi_eval, const gsl_matrix *Uab,
+ const gsl_vector *ab, gsl_matrix *Pab) {
size_t index_ab, index_aw, index_bw, index_ww;
double p_ab;
double ps_ab, ps_aw, ps_bw, ps_ww;
@@ -194,23 +216,26 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
if (p==0) {
- gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
- gsl_blas_ddot (Hi_eval, &Uab_col.vector, &p_ab);
- if (e_mode!=0) {p_ab=gsl_vector_get (ab, index_ab)-p_ab;}
- gsl_matrix_set (Pab, 0, index_ab, p_ab);
+ gsl_vector_const_view Uab_col=
+ gsl_matrix_const_column (Uab, index_ab);
+ gsl_blas_ddot(Hi_eval,&Uab_col.vector,&p_ab);
+ if (e_mode!=0) {
+ p_ab=gsl_vector_get (ab, index_ab)-p_ab;
+ }
+ gsl_matrix_set (Pab, 0, index_ab, p_ab);
}
else {
- index_aw=GetabIndex (a, p, n_cvt);
- index_bw=GetabIndex (b, p, n_cvt);
- index_ww=GetabIndex (p, p, n_cvt);
-
- ps_ab=gsl_matrix_get (Pab, p-1, index_ab);
- ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
- ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
- ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-
- p_ab=ps_ab-ps_aw*ps_bw/ps_ww;
- gsl_matrix_set (Pab, p, index_ab, p_ab);
+ index_aw=GetabIndex (a, p, n_cvt);
+ index_bw=GetabIndex (b, p, n_cvt);
+ index_ww=GetabIndex (p, p, n_cvt);
+
+ ps_ab=gsl_matrix_get (Pab, p-1, index_ab);
+ ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
+ ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
+ ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
+
+ p_ab=ps_ab-ps_aw*ps_bw/ps_ww;
+ gsl_matrix_set (Pab, p, index_ab, p_ab);
}
}
}
@@ -218,9 +243,9 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval
return;
}
-
-void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_eval, const gsl_matrix *Uab, const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab)
-{
+void CalcPPab (const size_t n_cvt, const size_t e_mode,
+ const gsl_vector *HiHi_eval, const gsl_matrix *Uab,
+ const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) {
size_t index_ab, index_aw, index_bw, index_ww;
double p2_ab;
double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww;
@@ -230,28 +255,33 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
if (p==0) {
- gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
- gsl_blas_ddot (HiHi_eval, &Uab_col.vector, &p2_ab);
- if (e_mode!=0) {p2_ab=p2_ab-gsl_vector_get (ab, index_ab)+2.0*gsl_matrix_get (Pab, 0, index_ab);}
- gsl_matrix_set (PPab, 0, index_ab, p2_ab);
+ gsl_vector_const_view Uab_col=
+ gsl_matrix_const_column (Uab, index_ab);
+ gsl_blas_ddot (HiHi_eval, &Uab_col.vector,
+ &p2_ab);
+ if (e_mode!=0) {
+ p2_ab=p2_ab-gsl_vector_get(ab,index_ab) +
+ 2.0*gsl_matrix_get (Pab, 0, index_ab);
+ }
+ gsl_matrix_set (PPab, 0, index_ab, p2_ab);
}
else {
- index_aw=GetabIndex (a, p, n_cvt);
- index_bw=GetabIndex (b, p, n_cvt);
- index_ww=GetabIndex (p, p, n_cvt);
-
- ps2_ab=gsl_matrix_get (PPab, p-1, index_ab);
- ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
- ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
- ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
- ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
- ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
- ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
-
- p2_ab=ps2_ab+ps_aw*ps_bw*ps2_ww/(ps_ww*ps_ww);
- p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww;
- gsl_matrix_set (PPab, p, index_ab, p2_ab);
-
+ index_aw=GetabIndex (a, p, n_cvt);
+ index_bw=GetabIndex (b, p, n_cvt);
+ index_ww=GetabIndex (p, p, n_cvt);
+
+ ps2_ab=gsl_matrix_get (PPab, p-1, index_ab);
+ ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
+ ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
+ ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
+ ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
+ ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
+ ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
+
+ p2_ab=ps2_ab+ps_aw*ps_bw*
+ ps2_ww/(ps_ww*ps_ww);
+ p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww;
+ gsl_matrix_set (PPab, p, index_ab, p2_ab);
}
}
}
@@ -259,44 +289,56 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e
return;
}
-
-void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab, const gsl_vector *ab, const gsl_matrix *Pab, const gsl_matrix *PPab, gsl_matrix *PPPab)
-{
+void CalcPPPab (const size_t n_cvt, const size_t e_mode,
+ const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab,
+ const gsl_vector *ab, const gsl_matrix *Pab,
+ const gsl_matrix *PPab, gsl_matrix *PPPab) {
size_t index_ab, index_aw, index_bw, index_ww;
double p3_ab;
- double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw, ps3_ww;
+ double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww,
+ ps3_aw, ps3_bw, ps3_ww;
for (size_t p=0; p<=n_cvt+1; ++p) {
for (size_t a=p+1; a<=n_cvt+2; ++a) {
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
if (p==0) {
- gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
- gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector, &p3_ab);
- if (e_mode!=0) {p3_ab=gsl_vector_get (ab, index_ab)-p3_ab+3.0*gsl_matrix_get (PPab, 0, index_ab)-3.0*gsl_matrix_get (Pab, 0, index_ab);}
- gsl_matrix_set (PPPab, 0, index_ab, p3_ab);
+ gsl_vector_const_view Uab_col=
+ gsl_matrix_const_column (Uab, index_ab);
+ gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector,
+ &p3_ab);
+ if (e_mode!=0) {
+ p3_ab=gsl_vector_get (ab, index_ab)-
+ p3_ab+3.0*gsl_matrix_get(PPab,0,index_ab)
+ -3.0*gsl_matrix_get (Pab, 0, index_ab);
+ }
+ gsl_matrix_set (PPPab, 0, index_ab, p3_ab);
}
else {
- index_aw=GetabIndex (a, p, n_cvt);
- index_bw=GetabIndex (b, p, n_cvt);
- index_ww=GetabIndex (p, p, n_cvt);
-
- ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab);
- ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
- ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
- ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
- ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
- ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
- ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
- ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw);
- ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw);
- ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww);
-
- p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww/(ps_ww*ps_ww*ps_ww);
- p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw+ps2_aw*ps2_bw)/ps_ww;
- p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/(ps_ww*ps_ww);
-
- gsl_matrix_set (PPPab, p, index_ab, p3_ab);
+ index_aw=GetabIndex (a, p, n_cvt);
+ index_bw=GetabIndex (b, p, n_cvt);
+ index_ww=GetabIndex (p, p, n_cvt);
+
+ ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab);
+ ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
+ ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
+ ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
+ ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
+ ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
+ ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
+ ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw);
+ ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw);
+ ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww);
+
+ p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww
+ /(ps_ww*ps_ww*ps_ww);
+ p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw +
+ ps2_aw*ps2_bw)/ps_ww;
+ p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*
+ ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/
+ (ps_ww*ps_ww);
+
+ gsl_matrix_set (PPPab, p, index_ab, p3_ab);
}
}
}
@@ -304,10 +346,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiH
return;
}
-
-
-double LogL_f (double l, void *params)
-{
+double LogL_f (double l, void *params) {
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
size_t ni_test=p->ni_test;
@@ -325,7 +364,11 @@ double LogL_f (double l, void *params)
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (p->e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -348,13 +391,7 @@ double LogL_f (double l, void *params)
return f;
}
-
-
-
-
-
-double LogL_dev1 (double l, void *params)
-{
+double LogL_dev1 (double l, void *params) {
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
size_t ni_test=p->ni_test;
@@ -374,7 +411,11 @@ double LogL_dev1 (double l, void *params)
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (p->e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -407,18 +448,18 @@ double LogL_dev1 (double l, void *params)
return dev1;
}
-
-
-
-double LogL_dev2 (double l, void *params)
-{
+double LogL_dev2 (double l, void *params) {
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
size_t nc_total;
- if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
+ if (p->calc_null==true) {
+ nc_total=n_cvt;
+ } else {
+ nc_total=n_cvt+1;
+ }
double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
size_t index_yy;
@@ -433,7 +474,11 @@ double LogL_dev2 (double l, void *params)
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (p->e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -453,7 +498,8 @@ double LogL_dev2 (double l, void *params)
CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab,
+ PPPab);
double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
@@ -465,7 +511,8 @@ double LogL_dev2 (double l, void *params)
double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
- dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
+ dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*
+ (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
@@ -478,12 +525,7 @@ double LogL_dev2 (double l, void *params)
return dev2;
}
-
-
-
-
-void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
-{
+void LogL_dev12 (double l, void *params, double *dev1, double *dev2) {
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
size_t ni_test=p->ni_test;
@@ -505,7 +547,11 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (p->e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -525,7 +571,8 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab,
+ PPPab);
double trace_HiK=((double)ni_test-trace_Hi)/l;
double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
@@ -540,7 +587,8 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
*dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
- *dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
+ *dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*
+ (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
@@ -553,10 +601,7 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
return;
}
-
-
-double LogRL_f (double l, void *params)
-{
+double LogRL_f (double l, void *params) {
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
size_t ni_test=p->ni_test;
@@ -564,7 +609,9 @@ double LogRL_f (double l, void *params)
double df;
size_t nc_total;
- if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
+ if (p->calc_null==true) {
+ nc_total=n_cvt; df=(double)ni_test-(double)n_cvt;
+ }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
double f=0.0, logdet_h=0.0, logdet_hiw=0.0, d;
@@ -577,7 +624,11 @@ double LogRL_f (double l, void *params)
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (p->e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -590,7 +641,7 @@ double LogRL_f (double l, void *params)
gsl_vector_set_all (v_temp, 1.0);
CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
- //calculate |WHiW|-|WW|
+ // Calculate |WHiW|-|WW|.
logdet_hiw=0.0;
for (size_t i=0; i<nc_total; ++i) {
index_ww=GetabIndex (i+1, i+1, n_cvt);
@@ -612,10 +663,7 @@ double LogRL_f (double l, void *params)
return f;
}
-
-
-double LogRL_dev1 (double l, void *params)
-{
+double LogRL_dev1 (double l, void *params) {
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
size_t ni_test=p->ni_test;
@@ -623,8 +671,14 @@ double LogRL_dev1 (double l, void *params)
double df;
size_t nc_total;
- if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
- else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
+ if (p->calc_null==true) {
+ nc_total=n_cvt;
+ df=(double)ni_test-(double)n_cvt;
+ }
+ else {
+ nc_total=n_cvt+1;
+ df=(double)ni_test-(double)n_cvt-1.0;
+ }
double dev1=0.0, trace_Hi=0.0;
size_t index_ww;
@@ -637,7 +691,11 @@ double LogRL_dev1 (double l, void *params)
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (p->e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -654,7 +712,7 @@ double LogRL_dev1 (double l, void *params)
CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- //calculate tracePK and trace PKPK
+ // Calculate tracePK and trace PKPK.
double trace_P=trace_Hi;
double ps_ww, ps2_ww;
for (size_t i=0; i<nc_total; ++i) {
@@ -665,7 +723,7 @@ double LogRL_dev1 (double l, void *params)
}
double trace_PK=(df-trace_P)/l;
- //calculate yPKPy, yPKPKPy
+ // Calculate yPKPy, yPKPKPy.
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
@@ -682,11 +740,7 @@ double LogRL_dev1 (double l, void *params)
return dev1;
}
-
-
-
-double LogRL_dev2 (double l, void *params)
-{
+double LogRL_dev2 (double l, void *params) {
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
size_t ni_test=p->ni_test;
@@ -694,8 +748,14 @@ double LogRL_dev2 (double l, void *params)
double df;
size_t nc_total;
- if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
- else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
+ if (p->calc_null==true) {
+ nc_total=n_cvt;
+ df=(double)ni_test-(double)n_cvt;
+ }
+ else {
+ nc_total=n_cvt+1;
+ df=(double)ni_test-(double)n_cvt-1.0;
+ }
double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
size_t index_ww;
@@ -710,7 +770,11 @@ double LogRL_dev2 (double l, void *params)
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (p->e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -730,9 +794,10 @@ double LogRL_dev2 (double l, void *params)
CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab,
+ PPab, PPPab);
- //calculate tracePK and trace PKPK
+ // Calculate tracePK and trace PKPK.
double trace_P=trace_Hi, trace_PP=trace_HiHi;
double ps_ww, ps2_ww, ps3_ww;
for (size_t i=0; i<nc_total; ++i) {
@@ -745,7 +810,7 @@ double LogRL_dev2 (double l, void *params)
}
double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
- //calculate yPKPy, yPKPKPy
+ // Calculate yPKPy, yPKPKPy.
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
@@ -766,11 +831,7 @@ double LogRL_dev2 (double l, void *params)
return dev2;
}
-
-
-
-void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
-{
+void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) {
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
size_t ni_test=p->ni_test;
@@ -778,8 +839,14 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
double df;
size_t nc_total;
- if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
- else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
+ if (p->calc_null==true) {
+ nc_total=n_cvt;
+ df=(double)ni_test-(double)n_cvt;
+ }
+ else {
+ nc_total=n_cvt+1;
+ df=(double)ni_test-(double)n_cvt-1.0;
+ }
double trace_Hi=0.0, trace_HiHi=0.0;
size_t index_ww;
@@ -794,7 +861,11 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (p->e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -814,9 +885,10 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab,
+ PPab, PPPab);
- //calculate tracePK and trace PKPK
+ // Calculate tracePK and trace PKPK.
double trace_P=trace_Hi, trace_PP=trace_HiHi;
double ps_ww, ps2_ww, ps3_ww;
for (size_t i=0; i<nc_total; ++i) {
@@ -830,7 +902,7 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
double trace_PK=(df-trace_P)/l;
double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
- //calculate yPKPy, yPKPKPy
+ // Calculate yPKPy, yPKPKPy.
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
@@ -839,7 +911,8 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
*dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
- *dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
+ *dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/
+ (P_yy*P_yy);
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
@@ -849,18 +922,11 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
gsl_vector_free (HiHiHi_eval);
gsl_vector_free (v_temp);
- return ;
+ return;
}
-
-
-
-
-
-
-
-void LMM::CalcRLWald (const double &l, const FUNC_PARAM &params, double &beta, double &se, double &p_wald)
-{
+void LMM::CalcRLWald (const double &l, const FUNC_PARAM &params,
+ double &beta, double &se, double &p_wald) {
size_t n_cvt=params.n_cvt;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
@@ -872,7 +938,11 @@ void LMM::CalcRLWald (const double &l, const FUNC_PARAM &params, double &beta, d
gsl_vector_memcpy (v_temp, params.eval);
gsl_vector_scale (v_temp, l);
- if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (params.e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -890,17 +960,15 @@ void LMM::CalcRLWald (const double &l, const FUNC_PARAM &params, double &beta, d
double tau=(double)df/Px_yy;
se=sqrt(1.0/(tau*P_xx));
p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df);
-// p_wald=gsl_cdf_chisq_Q ((P_yy-Px_yy)*tau, 1);
gsl_matrix_free (Pab);
gsl_vector_free (Hi_eval);
gsl_vector_free (v_temp);
- return ;
+ return;
}
-
-void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta, double &se, double &p_score)
-{
+void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params,
+ double &beta, double &se, double &p_score) {
size_t n_cvt=params.n_cvt;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
@@ -912,7 +980,11 @@ void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta,
gsl_vector_memcpy (v_temp, params.eval);
gsl_vector_scale (v_temp, l);
- if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
+ if (params.e_mode==0) {
+ gsl_vector_set_all (Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy (Hi_eval, v_temp);
+ }
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
@@ -930,24 +1002,16 @@ void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta,
double tau=(double)df/Px_yy;
se=sqrt(1.0/(tau*P_xx));
- p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1.0, df);
-// p_score=gsl_cdf_chisq_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1);
+ p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx),
+ 1.0, df);
gsl_matrix_free (Pab);
gsl_vector_free (Hi_eval);
gsl_vector_free (v_temp);
- return ;
+ return;
}
-
-
-
-
-
-
-
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab)
-{
+void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
size_t index_ab;
size_t n_cvt=UtW->size2;
@@ -958,20 +1022,26 @@ void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab)
if (a==n_cvt+2) {gsl_vector_memcpy (u_a, Uty);}
else {
- gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, a-1);
- gsl_vector_memcpy (u_a, &UtW_col.vector);
+ gsl_vector_const_view UtW_col=
+ gsl_matrix_const_column (UtW, a-1);
+ gsl_vector_memcpy (u_a, &UtW_col.vector);
}
for (size_t b=a; b>=1; --b) {
if (b==n_cvt+1) {continue;}
index_ab=GetabIndex (a, b, n_cvt);
- gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab);
+ gsl_vector_view Uab_col=
+ gsl_matrix_column (Uab, index_ab);
- if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);}
+ if (b==n_cvt+2) {
+ gsl_vector_memcpy (&Uab_col.vector, Uty);
+ }
else {
- gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1);
- gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
+ gsl_vector_const_view UtW_col=
+ gsl_matrix_const_column (UtW, b-1);
+ gsl_vector_memcpy (&Uab_col.vector,
+ &UtW_col.vector);
}
gsl_vector_mul(&Uab_col.vector, u_a);
@@ -982,9 +1052,8 @@ void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab)
return;
}
-
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Utx, gsl_matrix *Uab)
-{
+void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_vector *Utx, gsl_matrix *Uab) {
size_t index_ab;
size_t n_cvt=UtW->size2;
@@ -993,10 +1062,13 @@ void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Ut
gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab);
if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);}
- else if (b==n_cvt+1) {gsl_vector_memcpy (&Uab_col.vector, Utx);}
+ else if (b==n_cvt+1) {
+ gsl_vector_memcpy (&Uab_col.vector, Utx);
+ }
else {
- gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1);
- gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
+ gsl_vector_const_view UtW_col=
+ gsl_matrix_const_column (UtW, b-1);
+ gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
}
gsl_vector_mul(&Uab_col.vector, Utx);
@@ -1005,10 +1077,7 @@ void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Ut
return;
}
-
-
-void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab)
-{
+void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
size_t index_ab;
size_t n_cvt=W->size2;
@@ -1019,10 +1088,12 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab)
for (size_t a=1; a<=n_cvt+2; ++a) {
if (a==n_cvt+1) {continue;}
- if (a==n_cvt+2) {gsl_vector_memcpy (v_a, y);}
+ if (a==n_cvt+2) {
+ gsl_vector_memcpy (v_a, y);
+ }
else {
- gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1);
- gsl_vector_memcpy (v_a, &W_col.vector);
+ gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1);
+ gsl_vector_memcpy (v_a, &W_col.vector);
}
for (size_t b=a; b>=1; --b) {
@@ -1030,10 +1101,13 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab)
index_ab=GetabIndex (a, b, n_cvt);
- if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);}
+ if (b==n_cvt+2) {
+ gsl_vector_memcpy (v_b, y);
+ }
else {
- gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
- gsl_vector_memcpy (v_b, &W_col.vector);
+ gsl_vector_const_view W_col=
+ gsl_matrix_const_column (W, b-1);
+ gsl_vector_memcpy (v_b, &W_col.vector);
}
gsl_blas_ddot (v_a, v_b, &d);
@@ -1046,9 +1120,8 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab)
return;
}
-
-void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_vector *ab)
-{
+void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
+ gsl_vector *ab) {
size_t index_ab;
size_t n_cvt=W->size2;
@@ -1061,8 +1134,8 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_
if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);}
else if (b==n_cvt+1) {gsl_vector_memcpy (v_b, x);}
else {
- gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
- gsl_vector_memcpy (v_b, &W_col.vector);
+ gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
+ gsl_vector_memcpy (v_b, &W_col.vector);
}
gsl_blas_ddot (x, v_b, &d);
@@ -1070,31 +1143,31 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_
}
gsl_vector_free (v_b);
-
return;
}
-
-
-
-
-void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x)
-{
+void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Utx,
+ const gsl_matrix *W, const gsl_vector *x) {
igzstream infile (file_gene.c_str(), igzstream::in);
- if (!infile) {cout<<"error reading gene expression file:"<<file_gene<<endl; return;}
+ if (!infile) {
+ cout<<"error reading gene expression file:"<<file_gene<<endl;
+ return;
+ }
clock_t time_start=clock();
string line;
char *ch_ptr;
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
+ double p_lrt=0, p_score=0;
double logl_H1=0.0, logl_H0=0.0, l_H0;
int c_phen;
- string rs; //gene id
+ string rs; // Gene id.
double d;
- //Calculate basic quantities
+ // Calculate basic quantities.
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
gsl_vector *y=gsl_vector_alloc (U->size1);
@@ -1102,12 +1175,14 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
gsl_vector *ab=gsl_vector_alloc (n_index);
- //header
+ // Header.
getline(infile, line);
for (size_t t=0; t<ng_total; t++) {
!safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==ng_total-1) {ProgressBar ("Performing Analysis ", t, ng_total-1);}
+ if (t%d_pace==0 || t==ng_total-1) {
+ ProgressBar ("Performing Analysis ", t, ng_total-1);
+ }
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
@@ -1126,7 +1201,7 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- //calculate null
+ // Calculate null.
time_start=clock();
gsl_matrix_set_zero (Uab);
@@ -1135,32 +1210,36 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
FUNC_PARAM param0={false, ni_test, n_cvt, eval, Uab, ab, 0};
if (a_mode==2 || a_mode==3 || a_mode==4) {
- CalcLambda('L', param0, l_min, l_max, n_region, l_H0, logl_H0);
+ CalcLambda('L', param0, l_min, l_max, n_region,
+ l_H0, logl_H0);
}
- //calculate alternative
+ // Calculate alternative.
CalcUab(UtW, Uty, Utx, Uab);
FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
- //3 is before 1
+ //3 is before 1.
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_H0, param1, beta, se, p_score);
}
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
- CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+ CalcLambda ('R', param1, l_min, l_max, n_region,
+ lambda_remle, logl_H1);
+ CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
+ CalcLambda ('L', param1, l_min, l_max, n_region,
+ lambda_mle, logl_H1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
}
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- //store summary data
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ // Store summary data.
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
+ p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
cout<<endl;
@@ -1176,27 +1255,27 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
return;
}
-
-
-
-
-void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
-{
+void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y) {
igzstream infile (file_geno.c_str(), igzstream::in);
-// ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+ if (!infile) {
+ cout<<"error reading genotype file:"<<file_geno<<endl;
+ return;
+ }
clock_t time_start=clock();
string line;
char *ch_ptr;
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
+ double p_lrt=0, p_score=0;
double logl_H1=0.0;
int n_miss, c_phen;
double geno, x_mean;
- //Calculate basic quantities
+ // Calculate basic quantities.
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
gsl_vector *x=gsl_vector_alloc (U->size1);
@@ -1205,7 +1284,7 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
gsl_vector *ab=gsl_vector_alloc (n_index);
- //create a large matrix
+ // Create a large matrix.
size_t msize=10000;
gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
@@ -1213,10 +1292,6 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
-// if (e_mode!=0) {
-// gsl_vector_set_zero (ab);
-// Calcab (W, y, ab);
-// }
//start reading genotypes and analyze
size_t c=0, t_last=0;
@@ -1225,9 +1300,10 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
t_last++;
}
for (size_t t=0; t<indicator_snp.size(); ++t) {
-// if (t>1) {break;}
!safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ if (t%d_pace==0 || t==(ns_total-1)) {
+ ProgressBar ("Reading SNPs ", t, ns_total-1);
+ }
if (indicator_snp[t]==0) {continue;}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -1240,7 +1316,9 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;
+ }
else {
geno=atof(ch_ptr);
@@ -1254,20 +1332,11 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
x_mean/=(double)(ni_test-n_miss);
for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
- //geno=gsl_vector_get(x, i);
- //if (x_mean>1) {
- // gsl_vector_set(x, i, 2-geno);
- //}
+ if (gsl_vector_get (x_miss, i)==0) {
+ gsl_vector_set(x, i, x_mean);
+ }
}
- /*
- //calculate statistics
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- */
-
gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
gsl_vector_memcpy (&Xlarge_col.vector, x);
c++;
@@ -1276,49 +1345,52 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
size_t l=0;
if (c%msize==0) {l=msize;} else {l=c%msize;}
- gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+ gsl_matrix_view Xlarge_sub=
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub=
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+ eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
+ 0.0, &UtXlarge_sub.matrix);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
gsl_matrix_set_zero (Xlarge);
for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+ gsl_vector_view UtXlarge_col=
+ gsl_matrix_column (UtXlarge, i);
gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
CalcUab(UtW, Uty, Utx, Uab);
- // if (e_mode!=0) {
- // Calcab (W, y, x, ab);
- // }
time_start=clock();
- FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+ FUNC_PARAM param1=
+ {false, ni_test, n_cvt, eval, Uab, ab, 0};
- //3 is before 1
+ // 3 is before 1.
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcLambda ('R', param1, l_min, l_max, n_region,
+ lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ CalcLambda ('L', param1, l_min, l_max, n_region,
+ lambda_mle, logl_H1);
p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
}
- //if (p_wald<0) {cout<<t<<"\t"<<i<<"\t"<<l<<endl;}
- //if (x_mean>1) {beta*=-1;}
+ time_opt+=(clock()-time_start)/
+ (double(CLOCKS_PER_SEC)*60.0);
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //store summary data
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ // Store summary data.
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
+ p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
@@ -1341,14 +1413,9 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
return;
}
-
-
-
-
-
-
-void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
-{
+void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y) {
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
@@ -1358,12 +1425,13 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
char ch[1];
bitset<8> b;
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
+ double p_lrt=0, p_score=0;
double logl_H1=0.0;
int n_bit, n_miss, ci_total, ci_test;
double geno, x_mean;
- //Calculate basic quantities
+ // Calculate basic quantities.
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
gsl_vector *x=gsl_vector_alloc (U->size1);
@@ -1371,7 +1439,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
gsl_vector *ab=gsl_vector_alloc (n_index);
- //create a large matrix
+ // Create a large matrix.
size_t msize=10000;
gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
@@ -1379,16 +1447,12 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
-// if (e_mode!=0) {
-// gsl_vector_set_zero (ab);
-// Calcab (W, y, ab);
-// }
- //calculate n_bit and c, the number of bit for each snp
+ // Calculate n_bit and c, the number of bit for each SNP.
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
- //print the first three majic numbers
+ // Print the first three magic numbers.
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
@@ -1400,31 +1464,44 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
t_last++;
}
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
+ if (t%d_pace==0 || t==snpInfo.size()-1) {
+ ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
+ }
if (indicator_snp[t]==0) {continue;}
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t*n_bit+3);
- //read genotypes
+ // Read genotypes.
x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
-
- ci_total++;
- ci_test++;
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j=0; j<4; ++j) {
+ if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total]==0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean+=2.0;
+ }
+ else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ else {gsl_vector_set(x, ci_test, -9); n_miss++; }
+ }
+
+ ci_total++;
+ ci_test++;
}
}
@@ -1432,19 +1509,12 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
for (size_t i=0; i<ni_test; ++i) {
geno=gsl_vector_get(x,i);
- if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
- //if (x_mean>1) {
- //gsl_vector_set(x, i, 2-geno);
- //}
+ if (geno==-9) {
+ gsl_vector_set(x, i, x_mean);
+ geno=x_mean;
+ }
}
- /*
- //calculate statistics
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- */
-
gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
gsl_vector_memcpy (&Xlarge_col.vector, x);
c++;
@@ -1453,52 +1523,56 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
size_t l=0;
if (c%msize==0) {l=msize;} else {l=c%msize;}
- gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+ gsl_matrix_view Xlarge_sub=
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub=
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+ eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
+ 0.0, &UtXlarge_sub.matrix);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
gsl_matrix_set_zero (Xlarge);
for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+ gsl_vector_view UtXlarge_col=
+ gsl_matrix_column (UtXlarge, i);
gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
CalcUab(UtW, Uty, Utx, Uab);
- // if (e_mode!=0) {
- // Calcab (W, y, x, ab);
- // }
time_start=clock();
- FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+ FUNC_PARAM param1={false, ni_test, n_cvt, eval,
+ Uab, ab, 0};
- //3 is before 1, for beta
+ // 3 is before 1, for beta.
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcLambda ('R', param1, l_min, l_max, n_region,
+ lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ CalcLambda ('L', param1, l_min, l_max, n_region,
+ lambda_mle, logl_H1);
p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
}
- //if (x_mean>1) {beta*=-1;}
+ time_opt+=(clock()-time_start)/
+ (double(CLOCKS_PER_SEC)*60.0);
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //store summary data
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ // Store summary data.
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
+ p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
}
- }
+ }
cout<<endl;
gsl_vector_free (x);
@@ -1515,25 +1589,25 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
return;
}
-
-// WJA added
-#include <assert.h>
-void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
-{
+// WJA added.
+void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y) {
string file_bgen=file_oxford+".bgen";
ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;}
-
+ if (!infile) {
+ cout<<"error reading bgen file:"<<file_bgen<<endl;
+ return;
+ }
clock_t time_start=clock();
-
-
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
+ double p_lrt=0, p_score=0;
double logl_H1=0.0;
int n_miss, c_phen;
double geno, x_mean;
- //Calculate basic quantities
+ // Calculate basic quantities.
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
gsl_vector *x=gsl_vector_alloc (U->size1);
@@ -1542,7 +1616,7 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
gsl_vector *ab=gsl_vector_alloc (n_index);
- //create a large matrix
+ // Create a large matrix.
size_t msize=10000;
gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
@@ -1550,12 +1624,8 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
-// if (e_mode!=0) {
-// gsl_vector_set_zero (ab);
-// Calcab (W, y, ab);
-// }
- // read in header
+ // Read in header.
uint32_t bgen_snp_block_offset;
uint32_t bgen_header_length;
uint32_t bgen_nsamples;
@@ -1573,11 +1643,11 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
infile.read(reinterpret_cast<char*>(&bgen_flags),4);
bgen_snp_block_offset-=4;
bool CompressedSNPBlocks=bgen_flags&0x1;
-// bool LongIds=bgen_flags&0x4;
infile.ignore(bgen_snp_block_offset);
- double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
+ double bgen_geno_prob_non_miss;
uint32_t bgen_N;
uint16_t bgen_LS;
@@ -1593,11 +1663,10 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
string id;
string rs;
string chr;
- std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<<std::endl;
+ std::cout << "Warning: WJA hard coded SNP missingness " <<
+ "threshold of 10%"<<std::endl;
-
-
- //start reading genotypes and analyze
+ // Start reading genotypes and analyze.
size_t c=0, t_last=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
if (indicator_snp[t]==0) {continue;}
@@ -1605,11 +1674,12 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
}
for (size_t t=0; t<indicator_snp.size(); ++t)
{
-
-// if (t>1) {break;}
- if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ if (t%d_pace==0 || t==(ns_total-1)) {
+ ProgressBar ("Reading SNPs ", t, ns_total-1);
+ }
if (indicator_snp[t]==0) {continue;}
- // read SNP header
+
+ // Read SNP header.
id.clear();
rs.clear();
chr.clear();
@@ -1641,89 +1711,86 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
bgen_B_allele.resize(bgen_LB);
infile.read(&bgen_B_allele[0], bgen_LB);
-
-
-
uint16_t unzipped_data[3*bgen_N];
if (indicator_snp[t]==0) {
if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
else
- bgen_P=6*bgen_N;
+ bgen_P=6*bgen_N;
infile.ignore(static_cast<size_t>(bgen_P));
continue;
}
-
- if(CompressedSNPBlocks)
- {
-
-
+ if(CompressedSNPBlocks) {
infile.read(reinterpret_cast<char*>(&bgen_P),4);
uint8_t zipped_data[bgen_P];
unzipped_data_size=6*bgen_N;
- infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+ infile.read(reinterpret_cast<char*>(zipped_data),
+ bgen_P);
- int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ int result=
+ uncompress(reinterpret_cast<Bytef*>(unzipped_data),
+ reinterpret_cast<uLongf*>(&unzipped_data_size),
+ reinterpret_cast<Bytef*>(zipped_data),
+ static_cast<uLong> (bgen_P));
assert(result == Z_OK);
}
else
{
- bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
}
x_mean=0.0; c_phen=0; n_miss=0;
gsl_vector_set_zero(x_miss);
for (size_t i=0; i<bgen_N; ++i) {
- if (indicator_idv[i]==0) {continue;}
-
-
- bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
- // WJA
- bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
- else {
-
- bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
- bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
- bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
- geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
+ if (indicator_idv[i]==0) {continue;}
+
+ bgen_geno_prob_AA=
+ static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=
+ static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=
+ static_cast<double>(unzipped_data[i*3+2])/32768.0;
+
+ // WJA.
+ bgen_geno_prob_non_miss = bgen_geno_prob_AA +
+ bgen_geno_prob_AB+bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss<0.9) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ }
+ else {
+
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
}
x_mean/=static_cast<double>(ni_test-n_miss);
for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ if (gsl_vector_get (x_miss, i)==0) {
+ gsl_vector_set(x, i, x_mean);
+ }
geno=gsl_vector_get(x, i);
- //if (x_mean>1) {
- //gsl_vector_set(x, i, 2-geno);
- //}
}
- /*
- //calculate statistics
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- */
-
gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
gsl_vector_memcpy (&Xlarge_col.vector, x);
c++;
@@ -1732,48 +1799,51 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
size_t l=0;
if (c%msize==0) {l=msize;} else {l=c%msize;}
- gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+ gsl_matrix_view Xlarge_sub=
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub=
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+ eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
+ 0.0, &UtXlarge_sub.matrix);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
gsl_matrix_set_zero (Xlarge);
for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+ gsl_vector_view UtXlarge_col=
+ gsl_matrix_column (UtXlarge, i);
gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
CalcUab(UtW, Uty, Utx, Uab);
- // if (e_mode!=0) {
- // Calcab (W, y, x, ab);
- // }
time_start=clock();
- FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+ FUNC_PARAM param1={false,ni_test,n_cvt,eval,Uab,ab,0};
- //3 is before 1
+ // 3 is before 1.
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcLambda ('R', param1, l_min, l_max, n_region,
+ lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ CalcLambda ('L', param1, l_min, l_max, n_region,
+ lambda_mle, logl_H1);
p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
}
- //if (x_mean>1) {beta*=-1;}
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ time_opt+=(clock()-time_start)/
+ (double(CLOCKS_PER_SEC)*60.0);
- //store summary data
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ // Store summary data.
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
+ p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
}
@@ -1793,13 +1863,13 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
infile.clear();
return;
-
}
-
-
-void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, vector<pair<size_t, double> > &pos_loglr)
-{
+void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ const double l_min, const double l_max,
+ const size_t n_region,
+ vector<pair<size_t, double> > &pos_loglr) {
double logl_H0, logl_H1, log_lr, lambda0, lambda1;
gsl_vector *w=gsl_vector_alloc (Uty->size);
@@ -1812,7 +1882,7 @@ void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector
gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0);
gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
- CalcUab (Utw, Uty, Uab) ;
+ CalcUab (Utw, Uty, Uab);
FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0};
CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0);
@@ -1822,7 +1892,8 @@ void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector
CalcUab(Utw, Uty, &UtX_col.vector, Uab);
FUNC_PARAM param1={false, UtX->size1, 1, K_eval, Uab, ab, 0};
- CalcLambda ('L', param1, l_min, l_max, n_region, lambda1, logl_H1);
+ CalcLambda ('L', param1, l_min, l_max, n_region, lambda1,
+ logl_H1);
log_lr=logl_H1-logl_H0;
pos_loglr.push_back(make_pair(i,log_lr) );
@@ -1836,17 +1907,21 @@ void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector
return;
}
-
-
-
-void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logf)
-{
- if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return;}
+void CalcLambda (const char func_name, FUNC_PARAM &params,
+ const double l_min, const double l_max,
+ const size_t n_region, double &lambda, double &logf) {
+ if (func_name!='R' && func_name!='L' && func_name!='r' &&
+ func_name!='l') {
+ cout << "func_name only takes 'R' or 'L': 'R' for " <<
+ "log-restricted likelihood, 'L' for log-likelihood." << endl;
+ return;
+ }
vector<pair<double, double> > lambda_lh;
- //evaluate first order derivates in different intervals
- double lambda_l, lambda_h, lambda_interval=log(l_max/l_min)/(double)n_region;
+ // Evaluate first-order derivates in different intervals.
+ double lambda_l, lambda_h, lambda_interval=
+ log(l_max/l_min)/(double)n_region;
double dev1_l, dev1_h, logf_l, logf_h;
for (size_t i=0; i<n_region; ++i) {
@@ -1867,7 +1942,7 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
}
}
- //if derivates do not change signs in any interval
+ // If derivates do not change signs in any interval.
if (lambda_lh.empty()) {
if (func_name=='R' || func_name=='r') {
logf_l=LogRL_f (l_min, &params);
@@ -1878,10 +1953,17 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
logf_h=LogL_f (l_max, &params);
}
- if (logf_l>=logf_h) {lambda=l_min; logf=logf_l;} else {lambda=l_max; logf=logf_h;}
+ if (logf_l>=logf_h) {
+ lambda=l_min;
+ logf=logf_l;
+ } else {
+ lambda=l_max;
+ logf=logf_h;
+ }
}
else {
- //if derivates change signs
+
+ // If derivates change signs.
int status;
int iter=0, max_iter=100;
double l, l_temp;
@@ -1916,41 +1998,46 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
s_fdf=gsl_root_fdfsolver_alloc(T_fdf);
for (vector<double>::size_type i=0; i<lambda_lh.size(); ++i) {
- lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second;
-
- gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h);
-
- do {
- iter++;
- status=gsl_root_fsolver_iterate (s_f);
- l=gsl_root_fsolver_root (s_f);
- lambda_l=gsl_root_fsolver_x_lower (s_f);
- lambda_h=gsl_root_fsolver_x_upper (s_f);
- status=gsl_root_test_interval (lambda_l, lambda_h, 0, 1e-1);
- }
- while (status==GSL_CONTINUE && iter<max_iter);
-
- iter=0;
-
- gsl_root_fdfsolver_set (s_fdf, &FDF, l);
-
- do {
- iter++;
- status=gsl_root_fdfsolver_iterate (s_fdf);
- l_temp=l;
- l=gsl_root_fdfsolver_root (s_fdf);
- status=gsl_root_test_delta (l, l_temp, 0, 1e-5);
- }
- while (status==GSL_CONTINUE && iter<max_iter && l>l_min && l<l_max);
-
- l=l_temp;
- if (l<l_min) {l=l_min;}
- if (l>l_max) {l=l_max;}
- if (func_name=='R' || func_name=='r') {logf_l=LogRL_f (l, &params);} else {logf_l=LogL_f (l, &params);}
-
- if (i==0) {logf=logf_l; lambda=l;}
- else if (logf<logf_l) {logf=logf_l; lambda=l;}
- else {}
+ lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second;
+ gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h);
+
+ do {
+ iter++;
+ status=gsl_root_fsolver_iterate (s_f);
+ l=gsl_root_fsolver_root (s_f);
+ lambda_l=gsl_root_fsolver_x_lower (s_f);
+ lambda_h=gsl_root_fsolver_x_upper (s_f);
+ status=gsl_root_test_interval(lambda_l,lambda_h,0,1e-1);
+ }
+ while (status==GSL_CONTINUE && iter<max_iter);
+
+ iter=0;
+
+ gsl_root_fdfsolver_set (s_fdf, &FDF, l);
+
+ do {
+ iter++;
+ status=gsl_root_fdfsolver_iterate (s_fdf);
+ l_temp=l;
+ l=gsl_root_fdfsolver_root (s_fdf);
+ status=gsl_root_test_delta (l, l_temp, 0, 1e-5);
+ }
+ while (status==GSL_CONTINUE &&
+ iter<max_iter &&
+ l>l_min && l<l_max);
+
+ l=l_temp;
+ if (l<l_min) {l=l_min;}
+ if (l>l_max) {l=l_max;}
+ if (func_name=='R' || func_name=='r') {
+ logf_l=LogRL_f (l, &params);
+ } else {
+ logf_l=LogL_f (l, &params);
+ }
+
+ if (i==0) {logf=logf_l; lambda=l;}
+ else if (logf<logf_l) {logf=logf_l; lambda=l;}
+ else {}
}
gsl_root_fsolver_free (s_f);
gsl_root_fdfsolver_free (s_fdf);
@@ -1971,14 +2058,17 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
return;
}
-
-
-
-
-//calculate lambda in the null model
-void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logl_H0)
-{
- if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return;}
+// Calculate lambda in the null model.
+void CalcLambda (const char func_name, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const double l_min, const double l_max,
+ const size_t n_region, double &lambda, double &logl_H0) {
+ if (func_name!='R' && func_name!='L' && func_name!='r' &&
+ func_name!='l') {
+ cout<<"func_name only takes 'R' or 'L': 'R' for " <<
+ "log-restricted likelihood, 'L' for log-likelihood." << endl;
+ return;
+ }
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
@@ -1988,10 +2078,6 @@ void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
-// if (e_mode!=0) {
-// gsl_vector_set_zero (ab);
-// Calcab (W, y, ab);
-// }
FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
@@ -2003,10 +2089,10 @@ void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix
return;
}
-
-//obtain REMLE estimate for PVE using lambda_remle
-void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, const double trace_G, double &pve, double &pve_se)
-{
+// Obtain REMLE estimate for PVE using lambda_remle.
+void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_vector *Uty, const double lambda,
+ const double trace_G, double &pve, double &pve_se) {
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
@@ -2015,10 +2101,6 @@ void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *U
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
- // if (e_mode!=0) {
- // gsl_vector_set_zero (ab);
- // Calcab (W, y, ab);
- // }
FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
@@ -2032,11 +2114,13 @@ void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *U
return;
}
-//obtain REML estimate for Vg and Ve using lambda_remle
-//obtain beta and se(beta) for coefficients
-//ab is not used when e_mode==0
-void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, double &vg, double &ve, gsl_vector *beta, gsl_vector *se_beta)
-{
+// Obtain REML estimate for Vg and Ve using lambda_remle.
+// Obtain beta and se(beta) for coefficients.
+// ab is not used when e_mode==0.
+void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_vector *Uty, const double lambda,
+ double &vg, double &ve, gsl_vector *beta,
+ gsl_vector *se_beta) {
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
@@ -2059,7 +2143,7 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
- //calculate beta
+ // Calculate beta.
gsl_matrix_memcpy (HiW, UtW);
for (size_t i=0; i<UtW->size2; i++) {
gsl_vector_view HiW_col=gsl_matrix_column(HiW, i);
@@ -2074,7 +2158,7 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
LUSolve (WHiW, pmt, WHiy, beta);
LUInvert (WHiW, pmt, Vbeta);
- //calculate vg and ve
+ // Calculate vg and ve.
CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab);
size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
@@ -2083,12 +2167,12 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
ve=P_yy/(double)(ni_test-n_cvt);
vg=ve*lambda;
- //with ve, calculate se(beta)
+ // With ve, calculate se(beta).
gsl_matrix_scale(Vbeta, ve);
- //obtain se_beta
+ // Obtain se_beta.
for (size_t i=0; i<Vbeta->size1; i++) {
- gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i) ) );
+ gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta,i,i)));
}
gsl_matrix_free(Uab);
@@ -2105,29 +2189,28 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
return;
}
-
-
-
-
-
-
-void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env)
-{
+void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const gsl_vector *env) {
igzstream infile (file_geno.c_str(), igzstream::in);
-// ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+ if (!infile) {
+ cout<<"error reading genotype file:"<<file_geno<<endl;
+ return;
+ }
clock_t time_start=clock();
string line;
char *ch_ptr;
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
+ double p_lrt=0, p_score=0;
double logl_H1=0.0, logl_H0=0.0;
int n_miss, c_phen;
double geno, x_mean;
- //Calculate basic quantities
+ // Calculate basic quantities.
size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
gsl_vector *x=gsl_vector_alloc (U->size1);
@@ -2137,24 +2220,21 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g
gsl_vector *ab=gsl_vector_alloc (n_index);
gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
- gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+ gsl_matrix_view UtW_expand_mat=
+ gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
- gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2);
+ gsl_vector_view UtW_expand_env=
+ gsl_matrix_column(UtW_expand, UtW->size2);
gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
- gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1);
-
- //gsl_matrix_set_zero (Uab);
- // CalcUab (UtW, Uty, Uab);
-// if (e_mode!=0) {
-// gsl_vector_set_zero (ab);
-// Calcab (W, y, ab);
-// }
-
- //start reading genotypes and analyze
+ gsl_vector_view UtW_expand_x=
+ gsl_matrix_column(UtW_expand, UtW->size2+1);
+
+ // Start reading genotypes and analyze.
for (size_t t=0; t<indicator_snp.size(); ++t) {
-// if (t>1) {break;}
!safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ if (t%d_pace==0 || t==(ns_total-1)) {
+ ProgressBar ("Reading SNPs ", t, ns_total-1);
+ }
if (indicator_snp[t]==0) {continue;}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -2167,7 +2247,10 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ }
else {
geno=atof(ch_ptr);
@@ -2181,17 +2264,19 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g
x_mean/=(double)(ni_test-n_miss);
for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ if (gsl_vector_get (x_miss, i)==0) {
+ gsl_vector_set(x, i, x_mean);
+ }
geno=gsl_vector_get(x, i);
if (x_mean>1) {
gsl_vector_set(x, i, 2-geno);
}
}
-
- //calculate statistics
+ // Calculate statistics.
time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0,
+ &UtW_expand_x.vector);
gsl_vector_mul (x, env);
gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
@@ -2201,29 +2286,29 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g
if (a_mode==2 || a_mode==4) {
FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
- CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+ CalcLambda ('L', param0, l_min, l_max, n_region,
+ lambda_mle, logl_H0);
}
CalcUab(UtW_expand, Uty, Utx, Uab);
-// if (e_mode!=0) {
-// Calcab (W, y, x, ab);
-// }
time_start=clock();
FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
- //3 is before 1
+ // 3 is before 1.
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcLambda ('R', param1, l_min, l_max, n_region,
+ lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ CalcLambda ('L', param1, l_min, l_max, n_region,
+ lambda_mle, logl_H1);
p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
}
@@ -2231,10 +2316,11 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- //store summary data
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ // Store summary data.
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
+ p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
gsl_vector_free (x);
@@ -2251,14 +2337,10 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const g
return;
}
-
-
-
-
-
-
-void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env)
-{
+void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const gsl_vector *env) {
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
@@ -2268,12 +2350,13 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gs
char ch[1];
bitset<8> b;
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
+ double p_lrt=0, p_score=0;
double logl_H1=0.0, logl_H0=0.0;
int n_bit, n_miss, ci_total, ci_test;
double geno, x_mean;
- //Calculate basic quantities
+ // Calculate basic quantities.
size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
gsl_vector *x=gsl_vector_alloc (U->size1);
@@ -2282,56 +2365,64 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gs
gsl_vector *ab=gsl_vector_alloc (n_index);
gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
- gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+ gsl_matrix_view UtW_expand_mat=
+ gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
- gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2);
+ gsl_vector_view UtW_expand_env=
+ gsl_matrix_column(UtW_expand, UtW->size2);
gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
- gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1);
-
- //gsl_matrix_set_zero (Uab);
- //CalcUab (UtW, Uty, Uab);
-// if (e_mode!=0) {
-// gsl_vector_set_zero (ab);
-// Calcab (W, y, ab);
-// }
+ gsl_vector_view UtW_expand_x=
+ gsl_matrix_column(UtW_expand, UtW->size2+1);
- //calculate n_bit and c, the number of bit for each snp
+ // Calculate n_bit and c, the number of bit for each SNP.
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
- //print the first three majic numbers
+ // Print the first three magic numbers.
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
}
-
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
- if (indicator_snp[t]==0) {continue;}
+ if (t%d_pace==0 || t==snpInfo.size()-1) {
+ ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
+ }
+ if (indicator_snp[t]==0) {continue;}
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+ // n_bit, and 3 is the number of magic numbers
+ infile.seekg(t*n_bit+3);
- //read genotypes
+ // Read genotypes.
x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
- ci_total++;
- ci_test++;
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j=0; j<4; ++j) {
+ if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total]==0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean+=2.0;
+ }
+ else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ else {gsl_vector_set(x, ci_test, -9); n_miss++; }
+ }
+
+ ci_total++;
+ ci_test++;
}
}
@@ -2339,15 +2430,19 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gs
for (size_t i=0; i<ni_test; ++i) {
geno=gsl_vector_get(x,i);
- if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
+ if (geno==-9) {
+ gsl_vector_set(x, i, x_mean);
+ geno=x_mean;
+ }
if (x_mean>1) {
- gsl_vector_set(x, i, 2-geno);
+ gsl_vector_set(x, i, 2-geno);
}
}
- //calculate statistics
+ // Calculate statistics.
time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0,
+ &UtW_expand_x.vector);
gsl_vector_mul (x, env);
gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
@@ -2357,39 +2452,39 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gs
if (a_mode==2 || a_mode==4) {
FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
- CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+ CalcLambda ('L', param0, l_min, l_max, n_region,
+ lambda_mle, logl_H0);
}
CalcUab(UtW_expand, Uty, Utx, Uab);
-// if (e_mode!=0) {
-// Calcab (W, y, x, ab);
-// }
-
time_start=clock();
FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
- //3 is before 1, for beta
+ // 3 is before 1, for beta.
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
- CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+ CalcLambda ('R', param1, l_min, l_max, n_region,
+ lambda_remle, logl_H1);
+ CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
+ CalcLambda ('L', param1, l_min, l_max, n_region,
+ lambda_mle, logl_H1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
}
if (x_mean>1) {beta*=-1;}
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- //store summary data
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ // Store summary data.
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald,
+ p_lrt, p_score};
sumStat.push_back(SNPs);
}
cout<<endl;
diff --git a/src/param.cpp b/src/param.cpp
index 0a1d721..a7be178 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -208,7 +208,6 @@ void PARAM::ReadFiles (void) {
ns_total=indicator_snp.size();
}
-
// Read genotype and phenotype file for PLINK format.
if (!file_bfile.empty()) {
file_str=file_bfile+".bim";
@@ -348,8 +347,6 @@ void PARAM::ReadFiles (void) {
infile.clear();
}
-
-
// Read genotype and phenotype file for multiple BIMBAM files.
if (!file_mgeno.empty()) {
@@ -403,8 +400,6 @@ void PARAM::ReadFiles (void) {
infile.clear();
}
-
-
if (!file_gene.empty()) {
if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
p_column)==false) {error=true;}
@@ -433,7 +428,6 @@ void PARAM::ReadFiles (void) {
}
}
-
// Read is after gene file.
if (!file_read.empty() ) {
if (ReadFile_column (file_read, indicator_read,
@@ -471,8 +465,7 @@ void PARAM::ReadFiles (void) {
return;
}
-void PARAM::CheckParam (void)
-{
+void PARAM::CheckParam (void) {
struct stat fileInfo;
string str;
@@ -482,15 +475,49 @@ void PARAM::CheckParam (void)
k_mode<<endl;
error=true;
}
- if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 && a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 && a_mode!=71)
- {cout<<"error! unknown analysis mode: "<<a_mode<<". make sure -gk or -eigen or -lmm or -bslmm -predict or -calccov is sepcified correctly."<<endl; error=true;}
- if (miss_level>1) {cout<<"error! missing level needs to be between 0 and 1. current value = "<<miss_level<<endl; error=true;}
- if (maf_level>0.5) {cout<<"error! maf level needs to be between 0 and 0.5. current value = "<<maf_level<<endl; error=true;}
- if (hwe_level>1) {cout<<"error! hwe level needs to be between 0 and 1. current value = "<<hwe_level<<endl; error=true;}
- if (r2_level>1) {cout<<"error! r2 level needs to be between 0 and 1. current value = "<<r2_level<<endl; error=true;}
+ if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5
+ && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 &&
+ a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 &&
+ a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 &&
+ a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 &&
+ a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 &&
+ a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 &&
+ a_mode!=71) {
+ cout<<"error! unknown analysis mode: "<<a_mode<<
+ ". make sure -gk or -eigen or -lmm or -bslmm -predict or " <<
+ "-calccov is sepcified correctly."<<endl;
+ error=true;
+ }
+ if (miss_level>1) {
+ cout<<"error! missing level needs to be between 0 and 1. " <<
+ "current value = "<<miss_level<<endl;
+ error=true;
+ }
+ if (maf_level>0.5) {
+ cout<<"error! maf level needs to be between 0 and 0.5. " <<
+ "current value = "<<maf_level<<endl;
+ error=true;
+ }
+ if (hwe_level>1) {
+ cout<<"error! hwe level needs to be between 0 and 1. " <<
+ "current value = "<<hwe_level<<endl;
+ error=true;
+ }
+ if (r2_level>1) {
+ cout<<"error! r2 level needs to be between 0 and 1. " <<
+ "current value = "<<r2_level<<endl;
+ error=true;
+ }
- if (l_max<l_min) {cout<<"error! maximum lambda value must be larger than the minimal value. current values = "<<l_max<<" and "<<l_min<<endl; error=true;}
- if (h_max<h_min) {cout<<"error! maximum h value must be larger than the minimal value. current values = "<<h_max<<" and "<<h_min<<endl; error=true;}
+ if (l_max<l_min) {
+ cout<<"error! maximum lambda value must be larger than the " <<
+ "minimal value. current values = "<<l_max<<" and "<<l_min<<endl;
+ error=true;
+ }
+ if (h_max<h_min) {
+ cout<<"error! maximum h value must be larger than the minimal value. current values = "<<h_max<<" and "<<h_min<<endl;
+ error=true;
+ }
if (s_max<s_min) {cout<<"error! maximum s value must be larger than the minimal value. current values = "<<s_max<<" and "<<s_min<<endl; error=true;}
if (rho_max<rho_min) {cout<<"error! maximum rho value must be larger than the minimal value. current values = "<<rho_max<<" and "<<rho_min<<endl; error=true;}
if (logp_max<logp_min) {cout<<"error! maximum logp value must be larger than the minimal value. current values = "<<logp_max/log(10)<<" and "<<logp_min/log(10)<<endl; error=true;}