/* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <iostream> #include <fstream> #include <string> #include <cstring> #include <sys/stat.h> #include <ctime> #include <cmath> #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_eigen.h" #include "gsl/gsl_cdf.h" #include "lapack.h" //for functions EigenDecomp #ifdef FORCE_FLOAT #include "io_float.h" //for function ReadFile_kin #include "gemma_float.h" #include "vc_float.h" #include "lm_float.h" //for LM class #include "bslmm_float.h" //for BSLMM class #include "bslmmdap_float.h" //for BSLMMDAP class #include "ldr_float.h" //for LDR class #include "lmm_float.h" //for LMM class, and functions CalcLambda, CalcPve, CalcVgVe #include "mvlmm_float.h" //for MVLMM class #include "prdt_float.h" //for PRDT class #include "varcov_float.h" //for MVLMM class #include "mathfunc_float.h" //for a few functions #else #include "io.h" #include "gemma.h" #include "vc.h" #include "lm.h" #include "bslmm.h" #include "bslmmdap.h" #include "ldr.h" #include "lmm.h" #include "mvlmm.h" #include "prdt.h" #include "varcov.h" #include "mathfunc.h" #endif using namespace std; GEMMA::GEMMA(void): version("0.96"), date("05/17/2017"), year("2017") {} void GEMMA::PrintHeader (void) { cout<<endl; cout<<"*********************************************************"<<endl; cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl; cout<<" Version "<<version<<", "<<date<<" "<<endl; cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl; cout<<" (C) "<<year<<" Xiang Zhou "<<endl; cout<<" GNU General Public License "<<endl; cout<<" For Help, Type ./gemma -h "<<endl; cout<<"*********************************************************"<<endl; cout<<endl; return; } void GEMMA::PrintLicense (void) { cout<<endl; cout<<"The Software Is Distributed Under GNU General Public License, But May Also Require The Following Notifications."<<endl; cout<<endl; cout<<"Including Lapack Routines In The Software May Require The Following Notification:"<<endl; cout<<"Copyright (c) 1992-2010 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved."<<endl; cout<<"Copyright (c) 2000-2010 The University of California Berkeley. All rights reserved."<<endl; cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. All rights reserved."<<endl; cout<<endl; cout<<"$COPYRIGHT$"<<endl; cout<<"Additional copyrights may follow"<<endl; cout<<"$HEADER$"<<endl; cout<<"Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:"<<endl; cout<<"- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer."<<endl; cout<<"- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution."<<endl; cout<<"- Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission."<<endl; cout<<"The copyright holders provide no reassurances that the source code provided does not infringe any patent, copyright, or any other " <<"intellectual property rights of third parties. The copyright holders disclaim any liability to any recipient for claims brought against " <<"recipient by any third party for infringement of that parties intellectual property rights. "<<endl; cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT " <<"LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT " <<"OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT " <<"LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY " <<"THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE " <<"OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."<<endl; cout<<endl; return; } void GEMMA::PrintHelp(size_t option) { if (option==0) { cout<<endl; cout<<" GEMMA version "<<version<<", released on "<<date<<endl; cout<<" implemented by Xiang Zhou"<<endl; cout<<endl; cout<<" type ./gemma -h [num] for detailed helps"<<endl; cout<<" options: " << endl; cout<<" 1: quick guide"<<endl; cout<<" 2: file I/O related"<<endl; cout<<" 3: SNP QC"<<endl; cout<<" 4: calculate relatedness matrix"<<endl; cout<<" 5: perform eigen decomposition"<<endl; cout<<" 6: perform variance component estimation"<<endl; cout<<" 7: fit a linear model"<<endl; cout<<" 8: fit a linear mixed model"<<endl; cout<<" 9: fit a multivariate linear mixed model"<<endl; cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl; cout<<" 11: obtain predicted values"<<endl; cout<<" 12: calculate snp variance covariance"<<endl; cout<<" 13: note"<<endl; cout<<endl; } if (option==1) { cout<<" QUICK GUIDE" << endl; cout<<" to generate a relatedness matrix: "<<endl; cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl; cout<<" to generate the S matrix: "<<endl; cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl; cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl; cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl; cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl; cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl; cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl; cout<<" to generate the q vector: "<<endl; cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl; cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl; cout<<" to generate the ldsc weigthts: "<<endl; cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl; cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl; cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl; cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl; cout<<" to estimate variance components: "<<endl; cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl; cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl; cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl; cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl; cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl; cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl; cout<<" options for the above two commands: -crt -windowbp [num]"<<endl; cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl; cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl; cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl; cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl; cout<<" to fit a linear mixed model: "<<endl; cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; cout<<" to fit a linear mixed model to test g by e effects: "<<endl; cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl; cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; cout<<" to fit a multivariate linear mixed model: "<<endl; cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl; cout<<" to fit a Bayesian sparse linear mixed model: "<<endl; cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl; cout<<" to obtain predicted values: "<<endl; cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl; cout<<" to calculate correlations between SNPs: "<<endl; cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl; cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl; cout<<endl; } if (option==2) { cout<<" FILE I/O RELATED OPTIONS" << endl; cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl; cout<<" requires: *.fam, *.bim and *.bed files"<<endl; cout<<" missing value: -9"<<endl; cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl; cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl; cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl; cout<<" ..."<<endl; cout<<" missing value: NA"<<endl; cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl; cout<<" format: phenotype for individual 1"<<endl; cout<<" phenotype for individual 2"<<endl; cout<<" ..."<<endl; cout<<" missing value: NA"<<endl; cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl; cout<<" format: rs#1, base_position, chr_number"<<endl; cout<<" rs#2, base_position, chr_number"<<endl; cout<<" ..."<<endl; // WJA added cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl; cout<<" requires: *.bgen, *.sample files"<<endl; cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl; cout<<" format: variable for individual 1"<<endl; cout<<" variable for individual 2"<<endl; cout<<" ..."<<endl; cout<<" missing value: NA"<<endl; cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl; cout<<" format: variable for individual 1"<<endl; cout<<" variable for individual 2"<<endl; cout<<" ..."<<endl; cout<<" missing value: NA"<<endl; cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl; cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl; cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl; cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl; cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl; cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl; cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl; cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl; cout<<" missing value: NA"<<endl; cout<<" note: the intercept (a column of 1s) may need to be included"<<endl; cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl; cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl; cout<<" options: n1: rs column number"<<endl; cout<<" n2: estimated alpha column number (0 to ignore)"<<endl; cout<<" n3: estimated beta column number (0 to ignore)"<<endl; cout<<" n4: estimated gamma column number (0 to ignore)"<<endl; cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl; cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl; cout<<" format: value for individual 1"<<endl; cout<<" value for individual 2"<<endl; cout<<" ..."<<endl; cout<<" missing value: NA"<<endl; cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl; cout<<" -mu [num] "<<" specify input estimated mean value"<<endl; cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl; cout<<" format: header"<<endl; cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl; cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl; cout<<" ..."<<endl; cout<<" missing value: not allowed"<<endl; cout<<" -r [filename] "<<" specify input total read count file name"<<endl; cout<<" format: total read count for individual 1"<<endl; cout<<" total read count for individual 2"<<endl; cout<<" ..."<<endl; cout<<" missing value: NA"<<endl; cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl; cout<<" format: rs#1"<<endl; cout<<" rs#2"<<endl; cout<<" ..."<<endl; cout<<" missing value: NA"<<endl; cout<<" -silence "<<" silent terminal display"<<endl; cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl; cout<<" options: 1: \"n by n matrix\" format"<<endl; cout<<" 2: \"id id value\" format"<<endl; cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl; cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl; cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl; cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl; cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl; cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl; cout<<endl; } if (option==3) { cout<<" SNP QC OPTIONS" << endl; cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl; cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl; cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl; cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl; cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl; cout<<endl; } if (option==4) { cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl; cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl; cout<<" options: 1: centered XX^T/p"<<endl; cout<<" 2: standardized XX^T/p"<<endl; cout<<" note: non-polymorphic SNPs are excluded "<<endl; cout<<endl; } if (option==5) { cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl; cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl; cout<<endl; } if (option==6) { cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl; cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl; cout<<" options (with kinship file): 1: HE regression (default)"<<endl; cout<<" 2: REML"<<endl; cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl; cout<<" 2: Standardized genotypes"<<endl; cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl; cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl; cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl; cout<<endl; } if (option==7) { cout<<" LINEAR MODEL OPTIONS" << endl; cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl; cout<<" options: 1: Wald test"<<endl; cout<<" 2: Likelihood ratio test"<<endl; cout<<" 3: Score test"<<endl; cout<<" 4: 1-3"<<endl; cout<<endl; } if (option==8) { cout<<" LINEAR MIXED MODEL OPTIONS" << endl; cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl; cout<<" options: 1: Wald test"<<endl; cout<<" 2: Likelihood ratio test"<<endl; cout<<" 3: Score test"<<endl; cout<<" 4: 1-3"<<endl; cout<<" 5: Parameter estimation in the null model only"<<endl; cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl; cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl; cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl; cout<<endl; } if (option==9) { cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl; cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl; cout<<" -emi "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl; cout<<" -nri "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl; cout<<" -emp "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl; cout<<" -nrp "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl; cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl; cout<<endl; } if (option==10) { cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl; cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl; cout<<" options: 1: BSLMM"<<endl; cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl; cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl; cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl; cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl; cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl; cout<<" options: 1: LDR"<<endl; cout<<" MCMC OPTIONS" << endl; cout<<" Prior" << endl; cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl; cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl; cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl; cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl; cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl; cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl; cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl; cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl; cout<<" Proposal" << endl; cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl; cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl; cout<<" Others" << endl; cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl; cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl; cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl; cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl; cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl; cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl; cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl; cout<<endl; } if (option==11) { cout<<" PREDICTION OPTIONS" << endl; cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl; cout<<" options: 1: predict for individuals with missing phenotypes"<<endl; cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl; cout<<endl; } if (option==12) { cout<<" CALC CORRELATION OPTIONS" << endl; cout<<" -calccor "<<endl; cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl; cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl; cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl; cout<<endl; } if (option==13) { cout<<" NOTE"<<endl; cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl; cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl; cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl; cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl; cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl; cout<<endl; } return; } //options //gk: 21-22 //gs: 25-26 //gq: 27-28 //eigen: 31-32 //lmm: 1-5 //bslmm: 11-15 //predict: 41-43 //lm: 51 //vc: 61 //ci: 66-67 //calccor: 71 //gw: 72 void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) { string str; for(int i = 1; i < argc; i++) { if (strcmp(argv[i], "-bfile")==0 || strcmp(argv[i], "--bfile")==0 || strcmp(argv[i], "-b")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_bfile=str; } else if (strcmp(argv[i], "-mbfile")==0 || strcmp(argv[i], "--mbfile")==0 || strcmp(argv[i], "-mb")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_mbfile=str; } else if (strcmp(argv[i], "-silence")==0) { cPar.mode_silence=true; } else if (strcmp(argv[i], "-g")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_geno=str; } else if (strcmp(argv[i], "-mg")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_mgeno=str; } else if (strcmp(argv[i], "-p")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_pheno=str; } else if (strcmp(argv[i], "-a")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_anno=str; } // WJA added else if (strcmp(argv[i], "-oxford")==0 || strcmp(argv[i], "--oxford")==0 || strcmp(argv[i], "-x")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_oxford=str; } else if (strcmp(argv[i], "-gxe")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_gxe=str; } else if (strcmp(argv[i], "-widv")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_weight=str; } else if (strcmp(argv[i], "-wsnp")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_wsnp=str; } else if (strcmp(argv[i], "-wcat")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_wcat=str; } else if (strcmp(argv[i], "-k")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_kin=str; } else if (strcmp(argv[i], "-mk")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_mk=str; } else if (strcmp(argv[i], "-u")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_ku=str; } else if (strcmp(argv[i], "-d")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_kd=str; } else if (strcmp(argv[i], "-c")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_cvt=str; } else if (strcmp(argv[i], "-cat")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_cat=str; } else if (strcmp(argv[i], "-mcat")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_mcat=str; } else if (strcmp(argv[i], "-catc")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_catc=str; } else if (strcmp(argv[i], "-mcatc")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_mcatc=str; } else if (strcmp(argv[i], "-beta")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_beta=str; } else if (strcmp(argv[i], "-bf")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_bf=str; } else if (strcmp(argv[i], "-hyp")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_hyp=str; } else if (strcmp(argv[i], "-cor")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_cor=str; } else if (strcmp(argv[i], "-study")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_study=str; } else if (strcmp(argv[i], "-ref")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_ref=str; } else if (strcmp(argv[i], "-mstudy")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_mstudy=str; } else if (strcmp(argv[i], "-mref")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_mref=str; } else if (strcmp(argv[i], "-epm")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_epm=str; } else if (strcmp(argv[i], "-en")==0) { while (argv[i+1] != NULL && argv[i+1][0] != '-') { ++i; str.clear(); str.assign(argv[i]); cPar.est_column.push_back(atoi(str.c_str())); } } else if (strcmp(argv[i], "-ebv")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_ebv=str; } else if (strcmp(argv[i], "-emu")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_log=str; } else if (strcmp(argv[i], "-mu")==0) { if(argv[i+1] == NULL) {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.pheno_mean=atof(str.c_str()); } else if (strcmp(argv[i], "-gene")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_gene=str; } else if (strcmp(argv[i], "-r")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_read=str; } else if (strcmp(argv[i], "-snps")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_snps=str; } else if (strcmp(argv[i], "-km")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.k_mode=atoi(str.c_str()); } else if (strcmp(argv[i], "-n")==0) { (cPar.p_column).clear(); while (argv[i+1] != NULL && argv[i+1][0] != '-') { ++i; str.clear(); str.assign(argv[i]); (cPar.p_column).push_back(atoi(str.c_str())); } } else if (strcmp(argv[i], "-pace")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.d_pace=atoi(str.c_str()); } else if (strcmp(argv[i], "-outdir")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.path_out=str; } else if (strcmp(argv[i], "-o")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.file_out=str; } else if (strcmp(argv[i], "-miss")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.miss_level=atof(str.c_str()); } else if (strcmp(argv[i], "-maf")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); if (cPar.maf_level!=-1) {cPar.maf_level=atof(str.c_str());} } else if (strcmp(argv[i], "-hwe")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.hwe_level=atof(str.c_str()); } else if (strcmp(argv[i], "-r2")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.r2_level=atof(str.c_str()); } else if (strcmp(argv[i], "-notsnp")==0) { cPar.maf_level=-1; } else if (strcmp(argv[i], "-gk")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=21; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=20+atoi(str.c_str()); } else if (strcmp(argv[i], "-gs")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=25; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=24+atoi(str.c_str()); } else if (strcmp(argv[i], "-gq")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=27; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=26+atoi(str.c_str()); } else if (strcmp(argv[i], "-gw")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=72; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=71+atoi(str.c_str()); } else if (strcmp(argv[i], "-sample")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.ni_subsample=atoi(str.c_str()); } else if (strcmp(argv[i], "-eigen")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=31; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=30+atoi(str.c_str()); } else if (strcmp(argv[i], "-calccor")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=71; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=70+atoi(str.c_str()); } else if (strcmp(argv[i], "-vc")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=61; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=60+atoi(str.c_str()); } else if (strcmp(argv[i], "-ci")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=66; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=65+atoi(str.c_str()); } else if (strcmp(argv[i], "-pve")==0) { double s=0; while (argv[i+1] != NULL && (argv[i+1][0] != '-' || !isalpha(argv[i+1][1]) ) ) { ++i; str.clear(); str.assign(argv[i]); cPar.v_pve.push_back(atof(str.c_str())); s+=atof(str.c_str()); } if (s==1) { cout<<"summation of pve equals one."<<endl; } } else if (strcmp(argv[i], "-blocks")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.n_block=atoi(str.c_str()); } else if (strcmp(argv[i], "-noconstrain")==0) { cPar.noconstrain=true; } else if (strcmp(argv[i], "-lm")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=51; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=50+atoi(str.c_str()); } else if (strcmp(argv[i], "-fa")==0 || strcmp(argv[i], "-lmm")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=1; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=atoi(str.c_str()); } else if (strcmp(argv[i], "-lmin")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.l_min=atof(str.c_str()); } else if (strcmp(argv[i], "-lmax")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.l_max=atof(str.c_str()); } else if (strcmp(argv[i], "-region")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.n_region=atoi(str.c_str()); } else if (strcmp(argv[i], "-pnr")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.p_nr=atof(str.c_str()); } else if (strcmp(argv[i], "-emi")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.em_iter=atoi(str.c_str()); } else if (strcmp(argv[i], "-nri")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.nr_iter=atoi(str.c_str()); } else if (strcmp(argv[i], "-emp")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.em_prec=atof(str.c_str()); } else if (strcmp(argv[i], "-nrp")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.nr_prec=atof(str.c_str()); } else if (strcmp(argv[i], "-crt")==0) { cPar.crt=1; } else if (strcmp(argv[i], "-bslmm")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=11; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=10+atoi(str.c_str()); } /* else if (strcmp(argv[i], "-ldr")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=14; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=13+atoi(str.c_str()); } */ else if (strcmp(argv[i], "-hmin")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.h_min=atof(str.c_str()); } else if (strcmp(argv[i], "-hmax")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.h_max=atof(str.c_str()); } else if (strcmp(argv[i], "-rmin")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.rho_min=atof(str.c_str()); } else if (strcmp(argv[i], "-rmax")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.rho_max=atof(str.c_str()); } else if (strcmp(argv[i], "-pmin")==0) { if(argv[i+1] == NULL) {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.logp_min=atof(str.c_str())*log(10.0); } else if (strcmp(argv[i], "-pmax")==0) { if(argv[i+1] == NULL) {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.logp_max=atof(str.c_str())*log(10.0); } else if (strcmp(argv[i], "-smin")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.s_min=atoi(str.c_str()); } else if (strcmp(argv[i], "-smax")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.s_max=atoi(str.c_str()); } else if (strcmp(argv[i], "-gmean")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.geo_mean=atof(str.c_str()); } else if (strcmp(argv[i], "-hscale")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.h_scale=atof(str.c_str()); } else if (strcmp(argv[i], "-rscale")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.rho_scale=atof(str.c_str()); } else if (strcmp(argv[i], "-pscale")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.logp_scale=atof(str.c_str())*log(10.0); } else if (strcmp(argv[i], "-w")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.w_step=atoi(str.c_str()); } else if (strcmp(argv[i], "-s")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.s_step=atoi(str.c_str()); } else if (strcmp(argv[i], "-rpace")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.r_pace=atoi(str.c_str()); } else if (strcmp(argv[i], "-wpace")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.w_pace=atoi(str.c_str()); } else if (strcmp(argv[i], "-seed")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.randseed=atol(str.c_str()); } else if (strcmp(argv[i], "-mh")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.n_mh=atoi(str.c_str()); } else if (strcmp(argv[i], "-predict")==0) { if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=41; continue;} ++i; str.clear(); str.assign(argv[i]); cPar.a_mode=40+atoi(str.c_str()); } else if (strcmp(argv[i], "-windowcm")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.window_cm=atof(str.c_str()); } else if (strcmp(argv[i], "-windowbp")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.window_bp=atoi(str.c_str()); } else if (strcmp(argv[i], "-windowns")==0) { if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} ++i; str.clear(); str.assign(argv[i]); cPar.window_ns=atoi(str.c_str()); } else {cout<<"error! unrecognized option: "<<argv[i]<<endl; cPar.error=true; continue;} } //change prediction mode to 43, if the epm file is not provided if (cPar.a_mode==41 && cPar.file_epm.empty()) {cPar.a_mode=43;} return; } void GEMMA::BatchRun (PARAM &cPar) { clock_t time_begin, time_start; time_begin=clock(); //Read Files cout<<"Reading Files ... "<<endl; cPar.ReadFiles(); if (cPar.error==true) {cout<<"error! fail to read files. "<<endl; return;} cPar.CheckData(); if (cPar.error==true) {cout<<"error! fail to check data. "<<endl; return;} //Prediction for bslmm if (cPar.a_mode==41 || cPar.a_mode==42) { gsl_vector *y_prdt; y_prdt=gsl_vector_alloc (cPar.ni_total-cPar.ni_test); //set to zero gsl_vector_set_zero (y_prdt); PRDT cPRDT; cPRDT.CopyFromParam(cPar); //add breeding value if needed if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) { cout<<"Adding Breeding Values ... "<<endl; gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total); gsl_vector *u_hat=gsl_vector_alloc (cPar.ni_test); //read kinship matrix and set u_hat vector<int> indicator_all; size_t c_bv=0; for (size_t i=0; i<cPar.indicator_idv.size(); i++) { indicator_all.push_back(1); if (cPar.indicator_bv[i]==1) {gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]); c_bv++;} } ReadFile_kin (cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} //read u cPRDT.AddBV(G, u_hat, y_prdt); gsl_matrix_free(G); gsl_vector_free(u_hat); } //add beta if (!cPar.file_bfile.empty()) { cPRDT.AnalyzePlink (y_prdt); } else { cPRDT.AnalyzeBimbam (y_prdt); } //add mu gsl_vector_add_constant(y_prdt, cPar.pheno_mean); //convert y to probability if needed if (cPar.a_mode==42) { double d; for (size_t i=0; i<y_prdt->size; i++) { d=gsl_vector_get(y_prdt, i); d=gsl_cdf_gaussian_P(d, 1.0); gsl_vector_set(y_prdt, i, d); } } cPRDT.CopyToParam(cPar); cPRDT.WriteFiles(y_prdt); gsl_vector_free(y_prdt); } //Prediction with kinship matrix only; for one or more phenotypes if (cPar.a_mode==43) { //first, use individuals with full phenotypes to obtain estimates of Vg and Ve gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1); gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1); gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2); gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2); gsl_vector *eval=gsl_vector_alloc (Y->size1); gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph); gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt); //set covariates matrix W and phenotype matrix Y //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); cPar.CopyCvtPhen (W_full, Y_full, 1); gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph); gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1); gsl_matrix *H_full=gsl_matrix_alloc (Y_full->size1*Y_hat->size2, Y_full->size1*Y_hat->size2); //read relatedness matrix G, and matrix G_full ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} ReadFile_kin (cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, cPar.error, G_full); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} //center matrix G CenterMatrix (G); CenterMatrix (G_full); //eigen-decomposition and calculate trace_G cout<<"Start Eigen-Decomposition..."<<endl; time_start=clock(); cPar.trace_G=EigenDecomp (G, U, eval, 0); cPar.trace_G=0.0; for (size_t i=0; i<eval->size; i++) { if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} cPar.trace_G+=gsl_vector_get (eval, i); } cPar.trace_G/=(double)eval->size; cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, Y, UtY); //calculate variance component and beta estimates //and then obtain predicted values if (cPar.n_ph==1) { gsl_vector *beta=gsl_vector_alloc (W->size2); gsl_vector *se_beta=gsl_vector_alloc (W->size2); double lambda, logl, vg, ve; gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); //obtain estimates CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, lambda, logl); CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, lambda, vg, ve, beta, se_beta); cout<<"REMLE estimate for vg in the null model = "<<vg<<endl; cout<<"REMLE estimate for ve in the null model = "<<ve<<endl; cPar.vg_remle_null=vg; cPar.ve_remle_null=ve; //obtain Y_hat from fixed effects gsl_vector_view Yhat_col=gsl_matrix_column (Y_hat, 0); gsl_blas_dgemv (CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector); //obtain H gsl_matrix_set_identity (H_full); gsl_matrix_scale (H_full, ve); gsl_matrix_scale (G_full, vg); gsl_matrix_add (H_full, G_full); //free matrices gsl_vector_free(beta); gsl_vector_free(se_beta); } else { gsl_matrix *Vg=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph); gsl_matrix *Ve=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph); gsl_matrix *B=gsl_matrix_alloc (cPar.n_ph, W->size2); gsl_matrix *se_B=gsl_matrix_alloc (cPar.n_ph, W->size2); //obtain estimates CalcMvLmmVgVeBeta (eval, UtW, UtY, cPar.em_iter, cPar.nr_iter, cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max, cPar.n_region, Vg, Ve, B, se_B); cout<<"REMLE estimate for Vg in the null model: "<<endl; for (size_t i=0; i<Vg->size1; i++) { for (size_t j=0; j<=i; j++) { cout<<gsl_matrix_get(Vg, i, j)<<"\t"; } cout<<endl; } cout<<"REMLE estimate for Ve in the null model: "<<endl; for (size_t i=0; i<Ve->size1; i++) { for (size_t j=0; j<=i; j++) { cout<<gsl_matrix_get(Ve, i, j)<<"\t"; } cout<<endl; } cPar.Vg_remle_null.clear(); cPar.Ve_remle_null.clear(); for (size_t i=0; i<Vg->size1; i++) { for (size_t j=i; j<Vg->size2; j++) { cPar.Vg_remle_null.push_back(gsl_matrix_get (Vg, i, j) ); cPar.Ve_remle_null.push_back(gsl_matrix_get (Ve, i, j) ); } } //obtain Y_hat from fixed effects gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat); //obtain H KroneckerSym(G_full, Vg, H_full); for (size_t i=0; i<G_full->size1; i++) { gsl_matrix_view H_sub=gsl_matrix_submatrix (H_full, i*Ve->size1, i*Ve->size2, Ve->size1, Ve->size2); gsl_matrix_add (&H_sub.matrix, Ve); } //free matrices gsl_matrix_free (Vg); gsl_matrix_free (Ve); gsl_matrix_free (B); gsl_matrix_free (se_B); } PRDT cPRDT; cPRDT.CopyFromParam(cPar); cout<<"Predicting Missing Phentypes ... "<<endl; time_start=clock(); cPRDT.MvnormPrdt(Y_hat, H_full, Y_full); cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); cPRDT.WriteFiles(Y_full); gsl_matrix_free(Y); gsl_matrix_free(W); gsl_matrix_free(G); gsl_matrix_free(U); gsl_matrix_free(UtW); gsl_matrix_free(UtY); gsl_vector_free(eval); gsl_matrix_free(Y_full); gsl_matrix_free(Y_hat); gsl_matrix_free(W_full); gsl_matrix_free(G_full); gsl_matrix_free(H_full); } //Generate Kinship matrix if (cPar.a_mode==21 || cPar.a_mode==22) { cout<<"Calculating Relatedness Matrix ... "<<endl; gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total); time_start=clock(); cPar.CalcKin (G); cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); if (cPar.error==true) {cout<<"error! fail to calculate relatedness matrix. "<<endl; return;} if (cPar.a_mode==21) { cPar.WriteMatrix (G, "cXX"); } else { cPar.WriteMatrix (G, "sXX"); } gsl_matrix_free (G); } //Compute the LDSC weights (not implemented yet) if (cPar.a_mode==72) { cout<<"Calculating Weights ... "<<endl; VARCOV cVarcov; cVarcov.CopyFromParam(cPar); if (!cPar.file_bfile.empty()) { cVarcov.AnalyzePlink (); } else { cVarcov.AnalyzeBimbam (); } cVarcov.CopyToParam(cPar); } //Compute the S matrix (and its variance), that is used for variance component estimation using summary statistics if (cPar.a_mode==25 || cPar.a_mode==26) { cout<<"Calculating the S Matrix ... "<<endl; gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc); gsl_vector *ns=gsl_vector_alloc (cPar.n_vc+1); gsl_matrix_set_zero(S); gsl_vector_set_zero(ns); gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); gsl_vector_view ns_vec=gsl_vector_subvector(ns, 0, cPar.n_vc); gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); gsl_matrix_set_zero (K); gsl_matrix_set_zero (A); gsl_vector *y=gsl_vector_alloc (cPar.ni_test); gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt); cPar.CopyCvtPhen (W, y, 0); set<string> setSnps_beta; map <string, double> mapRS2wA, mapRS2wK; cPar.ObtainWeight(setSnps_beta, mapRS2wK); time_start=clock(); cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &ns_vec.vector); cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;} gsl_vector_set (ns, cPar.n_vc, cPar.ni_test); cPar.WriteMatrix (S, "S"); cPar.WriteVector (ns, "size"); cPar.WriteVar ("snps"); /* cout<<scientific; for (size_t i=0; i<cPar.n_vc; i++) { for (size_t j=0; j<cPar.n_vc; j++) { cout<<gsl_matrix_get(S, i, j)<<" "; } cout<<endl; } for (size_t i=cPar.n_vc; i<cPar.n_vc*2; i++) { for (size_t j=0; j<cPar.n_vc; j++) { cout<<gsl_matrix_get(S, i, j)<<" "; } cout<<endl; } */ gsl_matrix_free (S); gsl_vector_free (ns); gsl_matrix_free (A); gsl_matrix_free (K); gsl_vector_free (y); gsl_matrix_free (K); } //Compute the q vector, that is used for variance component estimation using summary statistics if (cPar.a_mode==27 || cPar.a_mode==28) { gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); gsl_vector *q=gsl_vector_alloc (cPar.n_vc); gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1); gsl_vector_set_zero (q); gsl_vector_set_zero (s); gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc); vector<size_t> vec_cat, vec_ni; vector<double> vec_weight, vec_z2; map<string, double> mapRS2weight; mapRS2weight.clear(); time_start=clock(); ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, cPar.ns_test); cout<<"## number of total individuals = "<<cPar.ni_total<<endl; cout<<"## number of total SNPs = "<<cPar.ns_total<<endl; cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; cout<<"## number of variance components = "<<cPar.n_vc<<endl; cout<<"Calculating the q vector ... "<<endl; Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector); cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); if (cPar.error==true) {cout<<"error! fail to calculate the q vector. "<<endl; return;} gsl_vector_set (s, cPar.n_vc, cPar.ni_total); cPar.WriteMatrix (Vq, "Vq"); cPar.WriteVector (q, "q"); cPar.WriteVector (s, "size"); /* for (size_t i=0; i<cPar.n_vc; i++) { cout<<gsl_vector_get(q, i)<<endl; } */ gsl_matrix_free (Vq); gsl_vector_free (q); gsl_vector_free (s); } //Calculate SNP covariance if (cPar.a_mode==71) { VARCOV cVarcov; cVarcov.CopyFromParam(cPar); if (!cPar.file_bfile.empty()) { cVarcov.AnalyzePlink (); } else { cVarcov.AnalyzeBimbam (); } cVarcov.CopyToParam(cPar); } //LM if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); //set covariates matrix W and phenotype matrix Y //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); //Fit LM or mvLM if (cPar.n_ph==1) { LM cLm; cLm.CopyFromParam(cPar); gsl_vector_view Y_col=gsl_matrix_column (Y, 0); if (!cPar.file_gene.empty()) { cLm.AnalyzeGene (W, &Y_col.vector); //y is the predictor, not the phenotype } else if (!cPar.file_bfile.empty()) { cLm.AnalyzePlink (W, &Y_col.vector); } else if (!cPar.file_oxford.empty()) { cLm.Analyzebgen (W, &Y_col.vector); } else { cLm.AnalyzeBimbam (W, &Y_col.vector); } cLm.WriteFiles(); cLm.CopyToParam(cPar); } /* else { MVLM cMvlm; cMvlm.CopyFromParam(cPar); if (!cPar.file_bfile.empty()) { cMvlm.AnalyzePlink (W, Y); } else { cMvlm.AnalyzeBimbam (W, Y); } cMvlm.WriteFiles(); cMvlm.CopyToParam(cPar); } */ //release all matrices and vectors gsl_matrix_free (Y); gsl_matrix_free (W); } //VC estimation with one or multiple kinship matrices //REML approach only //if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp //for one phenotype only; if (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) { if (!cPar.file_beta.empty() ) { //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1 //update indicator_snps, so that the numbers are in accordance with mapRS2wK set<string> setSnps_beta; ReadFile_snps_header (cPar.file_beta, setSnps_beta); map <string, double> mapRS2wA, mapRS2wK; cPar.ObtainWeight(setSnps_beta, mapRS2wK); cPar.UpdateSNP (mapRS2wK); //setup matrices and vectors gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc); gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); gsl_vector *q=gsl_vector_alloc (cPar.n_vc); gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1); gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); gsl_vector *y=gsl_vector_alloc (cPar.ni_test); gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt); gsl_matrix_set_zero (K); gsl_matrix_set_zero (A); gsl_matrix_set_zero(S); gsl_matrix_set_zero(Vq); gsl_vector_set_zero (q); gsl_vector_set_zero (s); cPar.CopyCvtPhen (W, y, 0); gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc); vector<size_t> vec_cat, vec_ni; vector<double> vec_weight, vec_z2; //read beta, based on the mapRS2wK ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, cPar.ns_test); cout<<"Study Panel: "<<endl; cout<<"## number of total individuals = "<<cPar.ni_study<<endl; cout<<"## number of total SNPs = "<<cPar.ns_study<<endl; cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; cout<<"## number of variance components = "<<cPar.n_vc<<endl; //compute q Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector); //compute S time_start=clock(); cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector); cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;} //compute vc estimates CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); //if LDSC weights, then compute the weights and run the above steps again if (cPar.a_mode==62) { //compute the weights and normalize the weights for A cPar.UpdateWeight (1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA); //read beta file again, and update weigths vector ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_total, cPar.ns_test); //compute q Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector); //compute S time_start=clock(); cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector); cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;} //compute vc estimates CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); } gsl_vector_set (s, cPar.n_vc, cPar.ni_test); cPar.WriteMatrix (S, "S"); cPar.WriteMatrix (Vq, "Vq"); cPar.WriteVector (q, "q"); cPar.WriteVector (s, "size"); gsl_matrix_free (S); gsl_matrix_free (Vq); gsl_vector_free (q); gsl_vector_free (s); gsl_matrix_free (A); gsl_matrix_free (K); gsl_vector_free (y); gsl_matrix_free (W); } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) { if (!cPar.file_study.empty()) { string sfile=cPar.file_study+".size.txt"; CountFileLines (sfile, cPar.n_vc); } else { string file_name; igzstream infile (cPar.file_mstudy.c_str(), igzstream::in); if (!infile) {cout<<"error! fail to open mstudy file: "<<cPar.file_study<<endl; return;} safeGetline(infile, file_name); infile.clear(); infile.close(); string sfile=file_name+".size.txt"; CountFileLines (sfile, cPar.n_vc); } cPar.n_vc=cPar.n_vc-1; gsl_matrix *S=gsl_matrix_alloc (2*cPar.n_vc, cPar.n_vc); gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); //gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1, (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) ); //gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1, (n_lines*(n_lines+1))/2*(n_lines+1) ); gsl_vector *q=gsl_vector_alloc (cPar.n_vc); gsl_vector *s_study=gsl_vector_alloc (cPar.n_vc); gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc); gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1); gsl_matrix_set_zero(S); gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); gsl_matrix_set_zero(Vq); //gsl_matrix_set_zero(V); //gsl_matrix_set_zero(Vslope); gsl_vector_set_zero(q); gsl_vector_set_zero(s_study); gsl_vector_set_zero(s_ref); if (!cPar.file_study.empty()) { ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study); } else { ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study); } if (!cPar.file_ref.empty()) { ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref); } else { ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref); } cout<<"## number of variance components = "<<cPar.n_vc<<endl; cout<<"## number of individuals in the sample = "<<cPar.ni_study<<endl; cout<<"## number of individuals in the reference = "<<cPar.ni_ref<<endl; CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); gsl_vector_view s_sub=gsl_vector_subvector (s, 0, cPar.n_vc); gsl_vector_memcpy (&s_sub.vector, s_ref); gsl_vector_set (s, cPar.n_vc, cPar.ni_ref); cPar.WriteMatrix (S, "S"); cPar.WriteMatrix (Vq, "Vq"); cPar.WriteVector (q, "q"); cPar.WriteVector (s, "size"); gsl_matrix_free (S); gsl_matrix_free (Vq); //gsl_matrix_free (V); //gsl_matrix_free (Vslope); gsl_vector_free (q); gsl_vector_free (s_study); gsl_vector_free (s_ref); gsl_vector_free (s); } else { gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1*cPar.n_vc ); //set covariates matrix W and phenotype matrix Y //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); //read kinship matrices if (!(cPar.file_mk).empty()) { ReadFile_mk (cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} //center matrix G, and obtain v_traceG double d=0; (cPar.v_traceG).clear(); for (size_t i=0; i<cPar.n_vc; i++) { gsl_matrix_view G_sub=gsl_matrix_submatrix (G, 0, i*G->size1, G->size1, G->size1); CenterMatrix (&G_sub.matrix); d=0; for (size_t j=0; j<G->size1; j++) { d+=gsl_matrix_get (&G_sub.matrix, j, j); } d/=(double)G->size1; (cPar.v_traceG).push_back(d); } } else if (!(cPar.file_kin).empty()) { ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} //center matrix G CenterMatrix (G); (cPar.v_traceG).clear(); double d=0; for (size_t j=0; j<G->size1; j++) { d+=gsl_matrix_get (G, j, j); } d/=(double)G->size1; (cPar.v_traceG).push_back(d); } /* //eigen-decomposition and calculate trace_G cout<<"Start Eigen-Decomposition..."<<endl; time_start=clock(); if (cPar.a_mode==31) { cPar.trace_G=EigenDecomp (G, U, eval, 1); } else { cPar.trace_G=EigenDecomp (G, U, eval, 0); } cPar.trace_G=0.0; for (size_t i=0; i<eval->size; i++) { if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} cPar.trace_G+=gsl_vector_get (eval, i); } cPar.trace_G/=(double)eval->size; cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); } else { ReadFile_eigenU (cPar.file_ku, cPar.error, U); if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;} ReadFile_eigenD (cPar.file_kd, cPar.error, eval); if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;} cPar.trace_G=0.0; for (size_t i=0; i<eval->size; i++) { if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);} cPar.trace_G+=gsl_vector_get(eval, i); } cPar.trace_G/=(double)eval->size; } */ //fit multiple variance components if (cPar.n_ph==1) { // if (cPar.n_vc==1) { /* //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, Y, UtY); gsl_vector_view beta=gsl_matrix_row (B, 0); gsl_vector_view se_beta=gsl_matrix_row (se_B, 0); gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector); cPar.beta_mle_null.clear(); cPar.se_beta_mle_null.clear(); for (size_t i=0; i<B->size2; i++) { cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) ); cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) ); } CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector); cPar.beta_remle_null.clear(); cPar.se_beta_remle_null.clear(); for (size_t i=0; i<B->size2; i++) { cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) ); cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) ); } CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); cPar.PrintSummary(); //calculate and output residuals if (cPar.a_mode==5) { gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1); gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1); gsl_vector *u_hat=gsl_vector_alloc (Y->size1); gsl_vector *e_hat=gsl_vector_alloc (Y->size1); gsl_vector *y_hat=gsl_vector_alloc (Y->size1); //obtain Utu and Ute gsl_vector_memcpy (y_hat, &UtY_col.vector); gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat); double d, u, e; for (size_t i=0; i<eval->size; i++) { d=gsl_vector_get (eval, i); u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); gsl_vector_set (Utu_hat, i, u); gsl_vector_set (Ute_hat, i, e); } //obtain u and e gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat); gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat); //output residuals cPar.WriteVector(u_hat, "residU"); cPar.WriteVector(e_hat, "residE"); gsl_vector_free(u_hat); gsl_vector_free(e_hat); gsl_vector_free(y_hat); } */ // } else { gsl_vector_view Y_col=gsl_matrix_column (Y, 0); VC cVc; cVc.CopyFromParam(cPar); if (cPar.a_mode==61) { cVc.CalcVChe (G, W, &Y_col.vector); } else if (cPar.a_mode==62) { cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector); } else { cVc.CalcVCacl (G, W, &Y_col.vector); } cVc.CopyToParam(cPar); //obtain pve from sigma2 //obtain se_pve from se_sigma2 //} } } } //compute confidence intervals with additional summary statistics //we do not check the sign of z-scores here, but they have to be matched with the genotypes if (cPar.a_mode==66 || cPar.a_mode==67) { //read reference file first gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); gsl_matrix *Svar=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc); gsl_matrix_set_zero(S); gsl_matrix_set_zero(Svar); gsl_vector_set_zero(s_ref); if (!cPar.file_ref.empty()) { ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref); } else { ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref); } //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1 set<string> setSnps_beta; ReadFile_snps_header (cPar.file_beta, setSnps_beta); //obtain the weights for wA, which contains the SNP weights for SNPs used in the model map <string, double> mapRS2wK; cPar.ObtainWeight(setSnps_beta, mapRS2wK); //set up matrices and vector gsl_matrix *Xz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc); gsl_matrix *XWz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc); gsl_matrix *XtXWz=gsl_matrix_alloc (mapRS2wK.size(), cPar.n_vc*cPar.n_vc); gsl_vector *w=gsl_vector_alloc (mapRS2wK.size()); gsl_vector *w1=gsl_vector_alloc (mapRS2wK.size()); gsl_vector *z=gsl_vector_alloc (mapRS2wK.size()); gsl_vector *s_vec=gsl_vector_alloc (cPar.n_vc); vector<size_t> vec_cat, vec_size; vector<double> vec_z; map <string, double> mapRS2z, mapRS2wA; map <string, string> mapRS2A1; string file_str; //update s_vec, the number of snps in each category for (size_t i=0; i<cPar.n_vc; i++) { vec_size.push_back(0); } for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) { vec_size[cPar.mapRS2cat[it->first]]++; } for (size_t i=0; i<cPar.n_vc; i++) { gsl_vector_set(s_vec, i, vec_size[i]); } //update mapRS2wA using v_pve and s_vec if (cPar.a_mode==66) { for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) { mapRS2wA[it->first]=1; } } else { cPar.UpdateWeight (0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA); } //read in z-scores based on allele 0, and save that into a vector ReadFile_beta (cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z); //update snp indicator, save weights to w, save z-scores to vec_z, save category label to vec_cat //sign of z is determined by matching alleles cPar.UpdateSNPnZ (mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat); //compute an n by k matrix of X_iWz cout<<"Calculating Xz ... "<<endl; gsl_matrix_set_zero(Xz); gsl_vector_set_all (w1, 1); if (!cPar.file_bfile.empty() ) { file_str=cPar.file_bfile+".bed"; PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz); } else if (!cPar.file_geno.empty()) { BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz); } else if (!cPar.file_mbfile.empty() ){ MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz); } else if (!cPar.file_mgeno.empty()) { MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz); } /* cout<<"Xz: "<<endl; for (size_t i=0; i<5; i++) { for (size_t j=0; j<cPar.n_vc; j++) { cout<<gsl_matrix_get (Xz, i, j)<<" "; } cout<<endl; } */ if (cPar.a_mode==66) { gsl_matrix_memcpy (XWz, Xz); } else if (cPar.a_mode==67) { cout<<"Calculating XWz ... "<<endl; gsl_matrix_set_zero(XWz); if (!cPar.file_bfile.empty() ) { file_str=cPar.file_bfile+".bed"; PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz); } else if (!cPar.file_geno.empty()) { BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz); } else if (!cPar.file_mbfile.empty() ){ MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz); } else if (!cPar.file_mgeno.empty()) { MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz); } } /* cout<<"XWz: "<<endl; for (size_t i=0; i<5; i++) { cout<<gsl_vector_get (w, i)<<endl; for (size_t j=0; j<cPar.n_vc; j++) { cout<<gsl_matrix_get (XWz, i, j)<<" "; } cout<<endl; } */ //compute an p by k matrix of X_j^TWX_iWz cout<<"Calculating XtXWz ... "<<endl; gsl_matrix_set_zero(XtXWz); if (!cPar.file_bfile.empty() ) { file_str=cPar.file_bfile+".bed"; PlinkXtXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz); } else if (!cPar.file_geno.empty()) { BimbamXtXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz); } else if (!cPar.file_mbfile.empty() ){ MFILEXtXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz); } else if (!cPar.file_mgeno.empty()) { MFILEXtXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz); } /* cout<<"XtXWz: "<<endl; for (size_t i=0; i<5; i++) { for (size_t j=0; j<cPar.n_vc; j++) { cout<<gsl_matrix_get (XtXWz, i, j)<<" "; } cout<<endl; } */ //compute confidence intervals CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); //write files //cPar.WriteMatrix (XWz, "XWz"); //cPar.WriteMatrix (XtXWz, "XtXWz"); //cPar.WriteVector (w, "w"); gsl_matrix_free(S); gsl_matrix_free(Svar); gsl_vector_free(s_ref); gsl_matrix_free(Xz); gsl_matrix_free(XWz); gsl_matrix_free(XtXWz); gsl_vector_free(w); gsl_vector_free(w1); gsl_vector_free(z); gsl_vector_free(s_vec); } //LMM or mvLMM or Eigen-Decomposition if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==31) { //Fit LMM or mvLMM or eigen gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); gsl_matrix *B=gsl_matrix_alloc (Y->size2, W->size2); //B is a d by c matrix gsl_matrix *se_B=gsl_matrix_alloc (Y->size2, W->size2); gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1); gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1); gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2); gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2); gsl_vector *eval=gsl_vector_alloc (Y->size1); gsl_vector *env=gsl_vector_alloc (Y->size1); gsl_vector *weight=gsl_vector_alloc (Y->size1); //set covariates matrix W and phenotype matrix Y //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); if (!cPar.file_gxe.empty()) {cPar.CopyGxe (env);} //read relatedness matrix G if (!(cPar.file_kin).empty()) { ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} //center matrix G CenterMatrix (G); //is residual weights are provided, then if (!cPar.file_weight.empty()) { cPar.CopyWeight (weight); double d, wi, wj; for (size_t i=0; i<G->size1; i++) { wi=gsl_vector_get(weight, i); for (size_t j=i; j<G->size2; j++) { wj=gsl_vector_get(weight, j); d=gsl_matrix_get(G, i, j); if (wi<=0 || wj<=0) {d=0;} else {d/=sqrt(wi*wj);} gsl_matrix_set(G, i, j, d); if (j!=i) {gsl_matrix_set(G, j, i, d);} } } } //eigen-decomposition and calculate trace_G cout<<"Start Eigen-Decomposition..."<<endl; time_start=clock(); if (cPar.a_mode==31) { cPar.trace_G=EigenDecomp (G, U, eval, 1); } else { cPar.trace_G=EigenDecomp (G, U, eval, 0); } if (!cPar.file_weight.empty()) { double wi; for (size_t i=0; i<U->size1; i++) { wi=gsl_vector_get(weight, i); if (wi<=0) {wi=0;} else {wi=sqrt(wi);} gsl_vector_view Urow=gsl_matrix_row (U, i); gsl_vector_scale (&Urow.vector, wi); } } cPar.trace_G=0.0; for (size_t i=0; i<eval->size; i++) { if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} cPar.trace_G+=gsl_vector_get (eval, i); } cPar.trace_G/=(double)eval->size; cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); } else { ReadFile_eigenU (cPar.file_ku, cPar.error, U); if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;} ReadFile_eigenD (cPar.file_kd, cPar.error, eval); if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;} cPar.trace_G=0.0; for (size_t i=0; i<eval->size; i++) { if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);} cPar.trace_G+=gsl_vector_get(eval, i); } cPar.trace_G/=(double)eval->size; } if (cPar.a_mode==31) { cPar.WriteMatrix(U, "eigenU"); cPar.WriteVector(eval, "eigenD"); } else if (!cPar.file_gene.empty() ) { //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, Y, UtY); LMM cLmm; cLmm.CopyFromParam(cPar); gsl_vector_view Y_col=gsl_matrix_column (Y, 0); gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype cLmm.WriteFiles(); cLmm.CopyToParam(cPar); } else { //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, Y, UtY); //calculate REMLE/MLE estimate and pve for univariate model if (cPar.n_ph==1) { gsl_vector_view beta=gsl_matrix_row (B, 0); gsl_vector_view se_beta=gsl_matrix_row (se_B, 0); gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector); cPar.beta_mle_null.clear(); cPar.se_beta_mle_null.clear(); for (size_t i=0; i<B->size2; i++) { cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) ); cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) ); } CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector); cPar.beta_remle_null.clear(); cPar.se_beta_remle_null.clear(); for (size_t i=0; i<B->size2; i++) { cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) ); cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) ); } CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); cPar.PrintSummary(); //calculate and output residuals if (cPar.a_mode==5) { gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1); gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1); gsl_vector *u_hat=gsl_vector_alloc (Y->size1); gsl_vector *e_hat=gsl_vector_alloc (Y->size1); gsl_vector *y_hat=gsl_vector_alloc (Y->size1); //obtain Utu and Ute gsl_vector_memcpy (y_hat, &UtY_col.vector); gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat); double d, u, e; for (size_t i=0; i<eval->size; i++) { d=gsl_vector_get (eval, i); u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); gsl_vector_set (Utu_hat, i, u); gsl_vector_set (Ute_hat, i, e); } //obtain u and e gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat); gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat); //output residuals cPar.WriteVector(u_hat, "residU"); cPar.WriteVector(e_hat, "residE"); gsl_vector_free(u_hat); gsl_vector_free(e_hat); gsl_vector_free(y_hat); } } //Fit LMM or mvLMM if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4) { if (cPar.n_ph==1) { LMM cLmm; cLmm.CopyFromParam(cPar); gsl_vector_view Y_col=gsl_matrix_column (Y, 0); gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); if (!cPar.file_bfile.empty()) { if (cPar.file_gxe.empty()) { cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); } else { cLmm.AnalyzePlinkGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env); } } // WJA added else if(!cPar.file_oxford.empty()) { cLmm.Analyzebgen (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); } else { if (cPar.file_gxe.empty()) { cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); } else { cLmm.AnalyzeBimbamGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env); } } cLmm.WriteFiles(); cLmm.CopyToParam(cPar); } else { MVLMM cMvlmm; cMvlmm.CopyFromParam(cPar); if (!cPar.file_bfile.empty()) { if (cPar.file_gxe.empty()) { cMvlmm.AnalyzePlink (U, eval, UtW, UtY); } else { cMvlmm.AnalyzePlinkGXE (U, eval, UtW, UtY, env); } } else if(!cPar.file_oxford.empty()) { cMvlmm.Analyzebgen (U, eval, UtW, UtY); } else { if (cPar.file_gxe.empty()) { cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY); } else { cMvlmm.AnalyzeBimbamGXE (U, eval, UtW, UtY, env); } } cMvlmm.WriteFiles(); cMvlmm.CopyToParam(cPar); } } } //release all matrices and vectors gsl_matrix_free (Y); gsl_matrix_free (W); gsl_matrix_free(B); gsl_matrix_free(se_B); gsl_matrix_free (G); gsl_matrix_free (U); gsl_matrix_free (UtW); gsl_matrix_free (UtY); gsl_vector_free (eval); gsl_vector_free (env); } //BSLMM if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { gsl_vector *y=gsl_vector_alloc (cPar.ni_test); gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); gsl_matrix *G=gsl_matrix_alloc (y->size, y->size); gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test); //set covariates matrix W and phenotype vector y //an intercept should be included in W, cPar.CopyCvtPhen (W, y, 0); //center y, even for case/control data cPar.pheno_mean=CenterVector(y); //run bvsr if rho==1 if (cPar.rho_min==1 && cPar.rho_max==1) { //read genotypes X (not UtX) cPar.ReadGenotypes (UtX, G, false); //perform BSLMM analysis BSLMM cBslmm; cBslmm.CopyFromParam(cPar); time_start=clock(); cBslmm.MCMC(UtX, y); cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); cBslmm.CopyToParam(cPar); //else, if rho!=1 } else { gsl_matrix *U=gsl_matrix_alloc (y->size, y->size); gsl_vector *eval=gsl_vector_alloc (y->size); gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2); gsl_vector *Uty=gsl_vector_alloc (y->size); //read relatedness matrix G if (!(cPar.file_kin).empty()) { cPar.ReadGenotypes (UtX, G, false); //read relatedness matrix G ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} //center matrix G CenterMatrix (G); } else { cPar.ReadGenotypes (UtX, G, true); } //eigen-decomposition and calculate trace_G cout<<"Start Eigen-Decomposition..."<<endl; time_start=clock(); cPar.trace_G=EigenDecomp (G, U, eval, 0); cPar.trace_G=0.0; for (size_t i=0; i<eval->size; i++) { if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} cPar.trace_G+=gsl_vector_get (eval, i); } cPar.trace_G/=(double)eval->size; cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, y, Uty); //calculate REMLE/MLE estimate and pve CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); cPar.PrintSummary(); //Creat and calcualte UtX, use a large memory cout<<"Calculating UtX..."<<endl; time_start=clock(); CalcUtX (U, UtX); cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //perform BSLMM or BSLMMDAP analysis if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { BSLMM cBslmm; cBslmm.CopyFromParam(cPar); time_start=clock(); if (cPar.a_mode==12) { //ridge regression cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null); } else { //Run MCMC cBslmm.MCMC(U, UtX, Uty, eval, y); } cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); cBslmm.CopyToParam(cPar); } else { } //release all matrices and vectors gsl_matrix_free (G); gsl_matrix_free (U); gsl_matrix_free (UtW); gsl_vector_free (eval); gsl_vector_free (Uty); } gsl_matrix_free (W); gsl_vector_free (y); gsl_matrix_free (UtX); } //BSLMM-DAP if (cPar.a_mode==14 || cPar.a_mode==15 || cPar.a_mode==16) { if (cPar.a_mode==14) { gsl_vector *y=gsl_vector_alloc (cPar.ni_test); gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); gsl_matrix *G=gsl_matrix_alloc (y->size, y->size); gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test); //set covariates matrix W and phenotype vector y //an intercept should be included in W, cPar.CopyCvtPhen (W, y, 0); //center y, even for case/control data cPar.pheno_mean=CenterVector(y); //run bvsr if rho==1 if (cPar.rho_min==1 && cPar.rho_max==1) { //read genotypes X (not UtX) cPar.ReadGenotypes (UtX, G, false); //perform BSLMM analysis BSLMM cBslmm; cBslmm.CopyFromParam(cPar); time_start=clock(); cBslmm.MCMC(UtX, y); cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); cBslmm.CopyToParam(cPar); //else, if rho!=1 } else { gsl_matrix *U=gsl_matrix_alloc (y->size, y->size); gsl_vector *eval=gsl_vector_alloc (y->size); gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2); gsl_vector *Uty=gsl_vector_alloc (y->size); //read relatedness matrix G if (!(cPar.file_kin).empty()) { cPar.ReadGenotypes (UtX, G, false); //read relatedness matrix G ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} //center matrix G CenterMatrix (G); } else { cPar.ReadGenotypes (UtX, G, true); } //eigen-decomposition and calculate trace_G cout<<"Start Eigen-Decomposition..."<<endl; time_start=clock(); cPar.trace_G=EigenDecomp (G, U, eval, 0); cPar.trace_G=0.0; for (size_t i=0; i<eval->size; i++) { if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} cPar.trace_G+=gsl_vector_get (eval, i); } cPar.trace_G/=(double)eval->size; cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, y, Uty); //calculate REMLE/MLE estimate and pve CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); cPar.PrintSummary(); //Creat and calcualte UtX, use a large memory cout<<"Calculating UtX..."<<endl; time_start=clock(); CalcUtX (U, UtX); cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //perform analysis; assume X and y are already centered BSLMMDAP cBslmmDap; cBslmmDap.CopyFromParam(cPar); time_start=clock(); cBslmmDap.DAP_CalcBF (U, UtX, Uty, eval, y); cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); cBslmmDap.CopyToParam(cPar); //release all matrices and vectors gsl_matrix_free (G); gsl_matrix_free (U); gsl_matrix_free (UtW); gsl_vector_free (eval); gsl_vector_free (Uty); } gsl_matrix_free (W); gsl_vector_free (y); gsl_matrix_free (UtX); } else if (cPar.a_mode==15) { //perform EM algorithm and estimate parameters vector<string> vec_rs; vector<double> vec_sa2, vec_sb2, wab; vector<vector<vector<double> > > BF; //read hyp and bf files (functions defined in BSLMMDAP) ReadFile_hyb (cPar.file_hyp, vec_sa2, vec_sb2, wab); ReadFile_bf (cPar.file_bf, vec_rs, BF); cPar.ns_test=vec_rs.size(); if (wab.size()!=BF[0][0].size()) {cout<<"error! hyp and bf files dimension do not match"<<endl;} //load annotations gsl_matrix *Ac; gsl_matrix_int *Ad; gsl_vector_int *dlevel; size_t kc, kd; if (!cPar.file_cat.empty()) { ReadFile_cat (cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd); } else { kc=0; kd=0; } cout<<"## number of blocks = "<<BF.size()<<endl; cout<<"## number of analyzed SNPs = "<<vec_rs.size()<<endl; cout<<"## grid size for hyperparameters = "<<wab.size()<<endl; cout<<"## number of continuous annotations = "<<kc<<endl; cout<<"## number of discrete annotations = "<<kd<<endl; //DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel); //perform analysis BSLMMDAP cBslmmDap; cBslmmDap.CopyFromParam(cPar); time_start=clock(); cBslmmDap.DAP_EstimateHyper (kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac, Ad, dlevel); cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); cBslmmDap.CopyToParam(cPar); gsl_matrix_free(Ac); gsl_matrix_int_free(Ad); gsl_vector_int_free(dlevel); } else { // } } /* //LDR (change 14 to 16?) if (cPar.a_mode==14) { gsl_vector *y=gsl_vector_alloc (cPar.ni_test); gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); gsl_matrix *G=gsl_matrix_alloc (1, 1); vector<vector<unsigned char> > Xt; //set covariates matrix W and phenotype vector y //an intercept is included in W cPar.CopyCvtPhen (W, y, 0); //read in genotype matrix X cPar.ReadGenotypes (Xt, G, false); LDR cLdr; cLdr.CopyFromParam(cPar); time_start=clock(); cLdr.VB(Xt, W, y); cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); cLdr.CopyToParam(cPar); gsl_vector_free (y); gsl_matrix_free (W); gsl_matrix_free (G); } */ cPar.time_total=(clock()-time_begin)/(double(CLOCKS_PER_SEC)*60.0); return; } void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar) { string file_str; file_str=cPar.path_out+"/"+cPar.file_out; file_str+=".log.txt"; ofstream outfile (file_str.c_str(), ofstream::out); if (!outfile) {cout<<"error writing log file: "<<file_str.c_str()<<endl; return;} outfile<<"##"<<endl; outfile<<"## GEMMA Version = "<<version<<endl; outfile<<"##"<<endl; outfile<<"## Command Line Input = "; for(int i = 0; i < argc; i++) { outfile<<argv[i]<<" "; } outfile<<endl; outfile<<"##"<<endl; time_t rawtime; time(&rawtime); tm *ptm = localtime (&rawtime); outfile<<"## Date = "<<asctime(ptm); //ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl; outfile<<"##"<<endl; outfile<<"## Summary Statistics:"<<endl; if (!cPar.file_cor.empty() || !cPar.file_study.empty() || !cPar.file_mstudy.empty() ) { outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl; outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl; //outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl; //outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_ref<<endl; //outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; //outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl; outfile<<"## number of variance components = "<<cPar.n_vc<<endl; outfile<<"## pve estimates = "; for (size_t i=0; i<cPar.v_pve.size(); i++) { outfile<<" "<<cPar.v_pve[i]; } outfile<<endl; outfile<<"## se(pve) = "; for (size_t i=0; i<cPar.v_se_pve.size(); i++) { outfile<<" "<<cPar.v_se_pve[i]; } outfile<<endl; if (cPar.n_vc>1) { outfile<<"## total pve = "<<cPar.pve_total<<endl; outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl; } outfile<<"## sigma2 per snp = "; for (size_t i=0; i<cPar.v_sigma2.size(); i++) { outfile<<" "<<cPar.v_sigma2[i]; } outfile<<endl; outfile<<"## se(sigma2 per snp) = "; for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) { outfile<<" "<<cPar.v_se_sigma2[i]; } outfile<<endl; outfile<<"## enrichment = "; for (size_t i=0; i<cPar.v_enrich.size(); i++) { outfile<<" "<<cPar.v_enrich[i]; } outfile<<endl; outfile<<"## se(enrichment) = "; for (size_t i=0; i<cPar.v_se_enrich.size(); i++) { outfile<<" "<<cPar.v_se_enrich[i]; } outfile<<endl; } else if (!cPar.file_beta.empty() && (cPar.a_mode==61 || cPar.a_mode==62) ) { outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl; outfile<<"## number of total individuals in the reference = "<<cPar.ni_total<<endl; outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl; outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_total<<endl; outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; outfile<<"## number of variance components = "<<cPar.n_vc<<endl; } else if (!cPar.file_beta.empty() && (cPar.a_mode==66 || cPar.a_mode==67) ) { outfile<<"## number of total individuals in the sample = "<<cPar.ni_total<<endl; outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl; outfile<<"## number of total SNPs in the sample = "<<cPar.ns_total<<endl; outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; outfile<<"## number of variance components = "<<cPar.n_vc<<endl; outfile<<"## pve estimates = "; for (size_t i=0; i<cPar.v_pve.size(); i++) { outfile<<" "<<cPar.v_pve[i]; } outfile<<endl; outfile<<"## se(pve) = "; for (size_t i=0; i<cPar.v_se_pve.size(); i++) { outfile<<" "<<cPar.v_se_pve[i]; } outfile<<endl; if (cPar.n_vc>1) { outfile<<"## total pve = "<<cPar.pve_total<<endl; outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl; } outfile<<"## sigma2 per snp = "; for (size_t i=0; i<cPar.v_sigma2.size(); i++) { outfile<<" "<<cPar.v_sigma2[i]; } outfile<<endl; outfile<<"## se(sigma2 per snp) = "; for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) { outfile<<" "<<cPar.v_se_sigma2[i]; } outfile<<endl; outfile<<"## enrichment = "; for (size_t i=0; i<cPar.v_enrich.size(); i++) { outfile<<" "<<cPar.v_enrich[i]; } outfile<<endl; outfile<<"## se(enrichment) = "; for (size_t i=0; i<cPar.v_se_enrich.size(); i++) { outfile<<" "<<cPar.v_se_enrich[i]; } outfile<<endl; } else { outfile<<"## number of total individuals = "<<cPar.ni_total<<endl; if (cPar.a_mode==43) { outfile<<"## number of analyzed individuals = "<<cPar.ni_cvt<<endl; outfile<<"## number of individuals with full phenotypes = "<<cPar.ni_test<<endl; } else if (cPar.a_mode!=27 && cPar.a_mode!=28) { outfile<<"## number of analyzed individuals = "<<cPar.ni_test<<endl; } outfile<<"## number of covariates = "<<cPar.n_cvt<<endl; outfile<<"## number of phenotypes = "<<cPar.n_ph<<endl; if (cPar.a_mode==43) { outfile<<"## number of observed data = "<<cPar.np_obs<<endl; outfile<<"## number of missing data = "<<cPar.np_miss<<endl; } if (cPar.a_mode==25 || cPar.a_mode==26 || cPar.a_mode==27 || cPar.a_mode==28 || cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63 || cPar.a_mode==66 || cPar.a_mode==67) { outfile<<"## number of variance components = "<<cPar.n_vc<<endl; } if (!(cPar.file_gene).empty()) { outfile<<"## number of total genes = "<<cPar.ng_total<<endl; outfile<<"## number of analyzed genes = "<<cPar.ng_test<<endl; } else if (cPar.file_epm.empty()) { outfile<<"## number of total SNPs = "<<cPar.ns_total<<endl; outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; } else { outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; } if (cPar.a_mode==13) { outfile<<"## number of cases = "<<cPar.ni_case<<endl; outfile<<"## number of controls = "<<cPar.ni_control<<endl; } } if ( (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) && cPar.file_cor.empty() && cPar.file_study.empty() && cPar.file_mstudy.empty() ) { // outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl; if (cPar.n_ph==1) { outfile<<"## pve estimates = "; for (size_t i=0; i<cPar.v_pve.size(); i++) { outfile<<" "<<cPar.v_pve[i]; } outfile<<endl; outfile<<"## se(pve) = "; for (size_t i=0; i<cPar.v_se_pve.size(); i++) { outfile<<" "<<cPar.v_se_pve[i]; } outfile<<endl; if (cPar.n_vc>1) { outfile<<"## total pve = "<<cPar.pve_total<<endl; outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl; } outfile<<"## sigma2 estimates = "; for (size_t i=0; i<cPar.v_sigma2.size(); i++) { outfile<<" "<<cPar.v_sigma2[i]; } outfile<<endl; outfile<<"## se(sigma2) = "; for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) { outfile<<" "<<cPar.v_se_sigma2[i]; } outfile<<endl; if (!cPar.file_beta.empty() ) { outfile<<"## enrichment = "; for (size_t i=0; i<cPar.v_enrich.size(); i++) { outfile<<" "<<cPar.v_enrich[i]; } outfile<<endl; outfile<<"## se(enrichment) = "; for (size_t i=0; i<cPar.v_se_enrich.size(); i++) { outfile<<" "<<cPar.v_se_enrich[i]; } outfile<<endl; } /* outfile<<"## beta estimate in the null model = "; for (size_t i=0; i<cPar.beta_remle_null.size(); i++) { outfile<<" "<<cPar.beta_remle_null[i]; } outfile<<endl; outfile<<"## se(beta) = "; for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) { outfile<<" "<<cPar.se_beta_remle_null[i]; } outfile<<endl; */ } } if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl; outfile<<"## MLE log-likelihood in the null model = "<<cPar.logl_mle_H0<<endl; if (cPar.n_ph==1) { //outfile<<"## lambda REMLE estimate in the null (linear mixed) model = "<<cPar.l_remle_null<<endl; //outfile<<"## lambda MLE estimate in the null (linear mixed) model = "<<cPar.l_mle_null<<endl; outfile<<"## pve estimate in the null model = "<<cPar.pve_null<<endl; outfile<<"## se(pve) in the null model = "<<cPar.pve_se_null<<endl; outfile<<"## vg estimate in the null model = "<<cPar.vg_remle_null<<endl; outfile<<"## ve estimate in the null model = "<<cPar.ve_remle_null<<endl; outfile<<"## beta estimate in the null model = "; for (size_t i=0; i<cPar.beta_remle_null.size(); i++) { outfile<<" "<<cPar.beta_remle_null[i]; } outfile<<endl; outfile<<"## se(beta) = "; for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) { outfile<<" "<<cPar.se_beta_remle_null[i]; } outfile<<endl; } else { size_t c; outfile<<"## REMLE estimate for Vg in the null model: "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<=i; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<cPar.Vg_remle_null[c]<<"\t"; } outfile<<endl; } outfile<<"## se(Vg): "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<=i; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<sqrt(cPar.VVg_remle_null[c])<<"\t"; } outfile<<endl; } outfile<<"## REMLE estimate for Ve in the null model: "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<=i; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<cPar.Ve_remle_null[c]<<"\t"; } outfile<<endl; } outfile<<"## se(Ve): "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<=i; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<sqrt(cPar.VVe_remle_null[c])<<"\t"; } outfile<<endl; } outfile<<"## MLE estimate for Vg in the null model: "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<cPar.n_ph; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<cPar.Vg_mle_null[c]<<"\t"; } outfile<<endl; } outfile<<"## se(Vg): "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<=i; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<sqrt(cPar.VVg_mle_null[c])<<"\t"; } outfile<<endl; } outfile<<"## MLE estimate for Ve in the null model: "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<cPar.n_ph; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<cPar.Ve_mle_null[c]<<"\t"; } outfile<<endl; } outfile<<"## se(Ve): "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<=i; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<sqrt(cPar.VVe_mle_null[c])<<"\t"; } outfile<<endl; } outfile<<"## estimate for B (d by c) in the null model (columns correspond to the covariates provided in the file): "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<cPar.n_cvt; j++) { c=i*cPar.n_cvt+j; outfile<<cPar.beta_remle_null[c]<<"\t"; } outfile<<endl; } outfile<<"## se(B): "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<cPar.n_cvt; j++) { c=i*cPar.n_cvt+j; outfile<<cPar.se_beta_remle_null[c]<<"\t"; } outfile<<endl; } } } /* if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { if (cPar.n_ph==1) { outfile<<"## REMLE vg estimate in the null model = "<<cPar.vg_remle_null<<endl; outfile<<"## REMLE ve estimate in the null model = "<<cPar.ve_remle_null<<endl; } else { size_t c; outfile<<"## REMLE estimate for Vg in the null model: "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<=i; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<cPar.Vg_remle_null[c]<<"\t"; } outfile<<endl; } outfile<<"## REMLE estimate for Ve in the null model: "<<endl; for (size_t i=0; i<cPar.n_ph; i++) { for (size_t j=0; j<=i; j++) { c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); outfile<<cPar.Ve_remle_null[c]<<"\t"; } outfile<<endl; } } } */ if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) { outfile<<"## estimated mean = "<<cPar.pheno_mean<<endl; } if (cPar.a_mode==11 || cPar.a_mode==13) { outfile<<"##"<<endl; outfile<<"## MCMC related:"<<endl; outfile<<"## initial value of h = "<<cPar.cHyp_initial.h<<endl; outfile<<"## initial value of rho = "<<cPar.cHyp_initial.rho<<endl; outfile<<"## initial value of pi = "<<exp(cPar.cHyp_initial.logp)<<endl; outfile<<"## initial value of |gamma| = "<<cPar.cHyp_initial.n_gamma<<endl; outfile<<"## random seed = "<<cPar.randseed<<endl; outfile<<"## acceptance ratio = "<<(double)cPar.n_accept/(double)((cPar.w_step+cPar.s_step)*cPar.n_mh)<<endl; } outfile<<"##"<<endl; outfile<<"## Computation Time:"<<endl; outfile<<"## total computation time = "<<cPar.time_total<<" min "<<endl; outfile<<"## computation time break down: "<<endl; if (cPar.a_mode==21 || cPar.a_mode==22 || cPar.a_mode==11 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) { outfile<<"## time on calculating relatedness matrix = "<<cPar.time_G<<" min "<<endl; } if (cPar.a_mode==31) { outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl; } if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) { outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl; outfile<<"## time on calculating UtX = "<<cPar.time_UtX<<" min "<<endl; } if ((cPar.a_mode>=1 && cPar.a_mode<=4) || (cPar.a_mode>=51 && cPar.a_mode<=54) ) { outfile<<"## time on optimization = "<<cPar.time_opt<<" min "<<endl; } if (cPar.a_mode==11 || cPar.a_mode==13) { outfile<<"## time on proposal = "<<cPar.time_Proposal<<" min "<<endl; outfile<<"## time on mcmc = "<<cPar.time_opt<<" min "<<endl; outfile<<"## time on Omega = "<<cPar.time_Omega<<" min "<<endl; } if (cPar.a_mode==41 || cPar.a_mode==42) { outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl; } if (cPar.a_mode==43) { outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl; outfile<<"## time on predicting phenotypes = "<<cPar.time_opt<<" min "<<endl; } outfile<<"##"<<endl; outfile.close(); outfile.clear(); return; }