aboutsummaryrefslogtreecommitdiff
path: root/src/gemma.cpp
diff options
context:
space:
mode:
authorxiangzhou2016-05-23 17:05:35 -0400
committerxiangzhou2016-05-23 17:05:35 -0400
commit943e970c9cbc184dcca679fbe455f48c32242cdc (patch)
tree70369d969dee3432e09e345c8106a541ac0d5a76 /src/gemma.cpp
parent3ab77854a52ac016b9e2b824858f5f0c695d4170 (diff)
downloadpangemma-943e970c9cbc184dcca679fbe455f48c32242cdc.tar.gz
version 0.95alpha
Diffstat (limited to 'src/gemma.cpp')
-rw-r--r--src/gemma.cpp1840
1 files changed, 1416 insertions, 424 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp
index b8693a8..3b9fe29 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -39,9 +39,11 @@
#include "vc_float.h"
#include "lm_float.h" //for LM class
#include "bslmm_float.h" //for BSLMM class
+#include "ldr_float.h" //for LDR class
#include "lmm_float.h" //for LMM class, and functions CalcLambda, CalcPve, CalcVgVe
#include "mvlmm_float.h" //for MVLMM class
#include "prdt_float.h" //for PRDT class
+#include "varcov_float.h" //for MVLMM class
#include "mathfunc_float.h" //for a few functions
#else
#include "io.h"
@@ -49,9 +51,11 @@
#include "vc.h"
#include "lm.h"
#include "bslmm.h"
+#include "ldr.h"
#include "lmm.h"
#include "mvlmm.h"
#include "prdt.h"
+#include "varcov.h"
#include "mathfunc.h"
#endif
@@ -60,26 +64,23 @@ using namespace std;
-GEMMA::GEMMA(void):
-version("0.95alpha"), date("08/08/2014"), year("2011")
+GEMMA::GEMMA(void):
+version("0.95alpha"), date("07/11/2015"), year("2011")
{}
void GEMMA::PrintHeader (void)
{
cout<<endl;
cout<<"*********************************************************"<<endl;
- cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl;
+ cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl;
cout<<" Version "<<version<<", "<<date<<" "<<endl;
- cout<<" Visit "<<endl;
- cout<<" http://stephenslab.uchicago.edu/software.html "<<endl;
- cout<<" http://home.uchicago.edu/~xz7/software.html "<<endl;
- cout<<" For Possible Updates "<<endl;
+ cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl;
cout<<" (C) "<<year<<" Xiang Zhou "<<endl;
- cout<<" GNU General Public License "<<endl;
- cout<<" For Help, Type ./gemma -h "<<endl;
+ cout<<" GNU General Public License "<<endl;
+ cout<<" For Help, Type ./gemma -h "<<endl;
cout<<"*********************************************************"<<endl;
cout<<endl;
-
+
return;
}
@@ -89,13 +90,13 @@ void GEMMA::PrintLicense (void)
cout<<endl;
cout<<"The Software Is Distributed Under GNU General Public License, But May Also Require The Following Notifications."<<endl;
cout<<endl;
-
+
cout<<"Including Lapack Routines In The Software May Require The Following Notification:"<<endl;
cout<<"Copyright (c) 1992-2010 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved."<<endl;
cout<<"Copyright (c) 2000-2010 The University of California Berkeley. All rights reserved."<<endl;
- cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. All rights reserved."<<endl;
+ cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. All rights reserved."<<endl;
cout<<endl;
-
+
cout<<"$COPYRIGHT$"<<endl;
cout<<"Additional copyrights may follow"<<endl;
cout<<"$HEADER$"<<endl;
@@ -113,9 +114,9 @@ void GEMMA::PrintLicense (void)
<<"THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE "
<<"OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."<<endl;
cout<<endl;
-
-
-
+
+
+
return;
}
@@ -124,9 +125,9 @@ void GEMMA::PrintLicense (void)
void GEMMA::PrintHelp(size_t option)
{
if (option==0) {
- cout<<endl;
+ cout<<endl;
cout<<" GEMMA version "<<version<<", released on "<<date<<endl;
- cout<<" implemented by Xiang Zhou"<<endl;
+ cout<<" implemented by Xiang Zhou"<<endl;
cout<<endl;
cout<<" type ./gemma -h [num] for detailed helps"<<endl;
cout<<" options: " << endl;
@@ -135,72 +136,116 @@ void GEMMA::PrintHelp(size_t option)
cout<<" 3: SNP QC"<<endl;
cout<<" 4: calculate relatedness matrix"<<endl;
cout<<" 5: perform eigen decomposition"<<endl;
- cout<<" 6: perform variance component estiamtion"<<endl;
+ cout<<" 6: perform variance component estimation"<<endl;
cout<<" 7: fit a linear model"<<endl;
cout<<" 8: fit a linear mixed model"<<endl;
cout<<" 9: fit a multivariate linear mixed model"<<endl;
cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl;
cout<<" 11: obtain predicted values"<<endl;
- cout<<" 12: note"<<endl;
+ cout<<" 12: calculate snp variance covariance"<<endl;
+ cout<<" 13: note"<<endl;
cout<<endl;
- }
-
+ }
+
if (option==1) {
cout<<" QUICK GUIDE" << endl;
cout<<" to generate a relatedness matrix: "<<endl;
cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl;
cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl;
+ cout<<" to generate the S matrix: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl;
+ cout<<" to generate the q vector: "<<endl;
+ cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl;
+ cout<<" to generate the ldsc weigthts: "<<endl;
+ cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl;
cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl;
cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl;
cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl;
cout<<" to estimate variance components: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -vc -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -k [filename] -vc -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -mk [filename] -vc -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -mk [filename] -vc -o [prefix]"<<endl;
+ cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" options for the above two commands: -crt -windowbp [num]"<<endl;
+ cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl;
+ cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl;
cout<<" to fit a linear mixed model: "<<endl;
cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" to fit a linear mixed model to test g by e effects: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
cout<<" to fit a multivariate linear mixed model: "<<endl;
cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
cout<<" to fit a Bayesian sparse linear mixed model: "<<endl;
cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl;
cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl;
cout<<" to obtain predicted values: "<<endl;
cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
+ cout<<" to calculate correlations between SNPs: "<<endl;
+ cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl;
+ cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl;
cout<<endl;
}
-
+
if (option==2) {
cout<<" FILE I/O RELATED OPTIONS" << endl;
- cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl;
- cout<<" requires: *.fam, *.bim and *.bed files"<<endl;
+ cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl;
+ cout<<" requires: *.fam, *.bim and *.bed files"<<endl;
cout<<" missing value: -9"<<endl;
cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl;
- cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
- cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
+ cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
+ cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl;
- cout<<" format: phenotype for individual 1"<<endl;
- cout<<" phenotype for individual 2"<<endl;
+ cout<<" format: phenotype for individual 1"<<endl;
+ cout<<" phenotype for individual 2"<<endl;
+ cout<<" ..."<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl;
+ cout<<" format: rs#1, base_position, chr_number"<<endl;
+ cout<<" rs#2, base_position, chr_number"<<endl;
cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl;
- cout<<" format: rs#1, base_position, chr_number"<<endl;
- cout<<" rs#2, base_position, chr_number"<<endl;
+ // WJA added
+ cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl;
+ cout<<" requires: *.bgen, *.sample files"<<endl;
+
+ cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl;
+ cout<<" format: variable for individual 1"<<endl;
+ cout<<" variable for individual 2"<<endl;
cout<<" ..."<<endl;
- cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl;
- cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl;
- cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl;
- cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl;
- cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl;
- cout<<" format: covariate 1 for individual 1, ... , covariate c for individual 1"<<endl;
- cout<<" covariate 1 for individual 2, ... , covariate c for individual 2"<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl;
+ cout<<" format: variable for individual 1"<<endl;
+ cout<<" variable for individual 2"<<endl;
cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
+ cout<<" missing value: NA"<<endl;
+ cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl;
+ cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl;
+ cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl;
+ cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl;
+ cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl;
+ cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl;
+ cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl;
+ cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl;
+ cout<<" missing value: NA"<<endl;
cout<<" note: the intercept (a column of 1s) may need to be included"<<endl;
cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl;
cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl;
@@ -210,74 +255,81 @@ void GEMMA::PrintHelp(size_t option)
cout<<" n4: estimated gamma column number (0 to ignore)"<<endl;
cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl;
cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl;
- cout<<" format: value for individual 1"<<endl;
- cout<<" value for individual 2"<<endl;
+ cout<<" format: value for individual 1"<<endl;
+ cout<<" value for individual 2"<<endl;
cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
+ cout<<" missing value: NA"<<endl;
cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl;
cout<<" -mu [num] "<<" specify input estimated mean value"<<endl;
cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl;
- cout<<" format: header"<<endl;
- cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl;
- cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl;
+ cout<<" format: header"<<endl;
+ cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl;
+ cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl;
cout<<" ..."<<endl;
- cout<<" missing value: not allowed"<<endl;
+ cout<<" missing value: not allowed"<<endl;
cout<<" -r [filename] "<<" specify input total read count file name"<<endl;
- cout<<" format: total read count for individual 1"<<endl;
- cout<<" total read count for individual 2"<<endl;
+ cout<<" format: total read count for individual 1"<<endl;
+ cout<<" total read count for individual 2"<<endl;
cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
+ cout<<" missing value: NA"<<endl;
cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl;
- cout<<" format: rs#1"<<endl;
- cout<<" rs#2"<<endl;
+ cout<<" format: rs#1"<<endl;
+ cout<<" rs#2"<<endl;
cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
+ cout<<" missing value: NA"<<endl;
cout<<" -silence "<<" silent terminal display"<<endl;
cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl;
cout<<" options: 1: \"n by n matrix\" format"<<endl;
cout<<" 2: \"id id value\" format"<<endl;
- cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl;
+ cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl;
cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl;
- cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl;
- cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl;
- cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl;
- cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl;
+ cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl;
+ cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl;
+ cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl;
+ cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl;
cout<<endl;
}
-
+
if (option==3) {
cout<<" SNP QC OPTIONS" << endl;
- cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl;
- cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl;
- cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl;
- cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl;
- cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl;
+ cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl;
+ cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl;
+ cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl;
+ cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl;
+ cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl;
cout<<endl;
}
-
+
if (option==4) {
cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
- cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl;
+ cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl;
cout<<" options: 1: centered XX^T/p"<<endl;
cout<<" 2: standardized XX^T/p"<<endl;
cout<<" note: non-polymorphic SNPs are excluded "<<endl;
cout<<endl;
}
-
+
if (option==5) {
cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl;
- cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl;
+ cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl;
cout<<endl;
}
if (option==6) {
cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
- cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl;
+ cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl;
+ cout<<" options (with kinship file): 1: HE regression (default)"<<endl;
+ cout<<" 2: REML"<<endl;
+ cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl;
+ cout<<" 2: Standardized genotypes"<<endl;
+ cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl;
+ cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl;
+ cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl;
cout<<endl;
}
-
+
if (option==7) {
- cout<<" LINEAR MODEL OPTIONS" << endl;
+ cout<<" LINEAR MODEL OPTIONS" << endl;
cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl;
cout<<" options: 1: Wald test"<<endl;
cout<<" 2: Likelihood ratio test"<<endl;
@@ -285,21 +337,21 @@ void GEMMA::PrintHelp(size_t option)
cout<<" 4: 1-3"<<endl;
cout<<endl;
}
-
+
if (option==8) {
- cout<<" LINEAR MIXED MODEL OPTIONS" << endl;
+ cout<<" LINEAR MIXED MODEL OPTIONS" << endl;
cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: Wald test"<<endl;
+ cout<<" options: 1: Wald test"<<endl;
cout<<" 2: Likelihood ratio test"<<endl;
cout<<" 3: Score test"<<endl;
cout<<" 4: 1-3"<<endl;
cout<<" 5: Parameter estimation in the null model only"<<endl;
- cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl;
- cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl;
- cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl;
+ cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl;
+ cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl;
+ cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl;
cout<<endl;
}
-
+
if (option==9) {
cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl;
@@ -310,51 +362,63 @@ void GEMMA::PrintHelp(size_t option)
cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl;
cout<<endl;
}
-
+
if (option==10) {
cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl;
cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: BSLMM"<<endl;
- cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl;
- cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
-
+ cout<<" options: 1: BSLMM"<<endl;
+ cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl;
+ cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
+
+ cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl;
+ cout<<" options: 1: LDR"<<endl;
+
cout<<" MCMC OPTIONS" << endl;
- cout<<" Prior" << endl;
- cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl;
- cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl;
- cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl;
- cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl;
- cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl;
- cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl;
- cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl;
- cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl;
-
+ cout<<" Prior" << endl;
+ cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl;
+ cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl;
+ cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl;
+ cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl;
+ cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl;
+ cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl;
+ cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl;
+ cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl;
+
cout<<" Proposal" << endl;
- cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl;
- cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
- cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
- cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl;
-
+ cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl;
+ cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
+ cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
+ cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl;
+
cout<<" Others" << endl;
- cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl;
- cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl;
- cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl;
- cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl;
- cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl;
- cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl;
- cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl;
+ cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl;
+ cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl;
+ cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl;
+ cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl;
+ cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl;
+ cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl;
+ cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl;
cout<<endl;
}
-
+
if (option==11) {
cout<<" PREDICTION OPTIONS" << endl;
cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl;
- cout<<" options: 1: predict for individuals with missing phenotypes"<<endl;
- cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl;
+ cout<<" options: 1: predict for individuals with missing phenotypes"<<endl;
+ cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl;
cout<<endl;
}
-
+
if (option==12) {
+ cout<<" CALC CORRELATION OPTIONS" << endl;
+ cout<<" -calccor "<<endl;
+ cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl;
+ cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl;
+ cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl;
+ cout<<endl;
+ }
+
+ if (option==13) {
cout<<" NOTE"<<endl;
cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl;
cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl;
@@ -363,17 +427,29 @@ void GEMMA::PrintHelp(size_t option)
cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl;
cout<<endl;
}
-
+
return;
}
-
+//options
+//gk: 21-22
+//gs: 25-26
+//gq: 27-28
+//eigen: 31-32
+//lmm: 1-5
+//bslmm: 11-13
+//predict: 41-43
+//lm: 51
+//vc: 61
+//ci: 66-67
+//calccor: 71
+//gw: 72
void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
{
string str;
-
- for(int i = 1; i < argc; i++) {
+
+ for(int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-bfile")==0 || strcmp(argv[i], "--bfile")==0 || strcmp(argv[i], "-b")==0) {
if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
++i;
@@ -381,6 +457,13 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
str.assign(argv[i]);
cPar.file_bfile=str;
}
+ else if (strcmp(argv[i], "-mbfile")==0 || strcmp(argv[i], "--mbfile")==0 || strcmp(argv[i], "-mb")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mbfile=str;
+ }
else if (strcmp(argv[i], "-silence")==0) {
cPar.mode_silence=true;
}
@@ -391,6 +474,13 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
str.assign(argv[i]);
cPar.file_geno=str;
}
+ else if (strcmp(argv[i], "-mg")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mgeno=str;
+ }
else if (strcmp(argv[i], "-p")==0) {
if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
++i;
@@ -405,6 +495,42 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
str.assign(argv[i]);
cPar.file_anno=str;
}
+ // WJA added
+ else if (strcmp(argv[i], "-oxford")==0 || strcmp(argv[i], "--oxford")==0 || strcmp(argv[i], "-x")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_oxford=str;
+ }
+ else if (strcmp(argv[i], "-gxe")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_gxe=str;
+ }
+ else if (strcmp(argv[i], "-widv")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_weight=str;
+ }
+ else if (strcmp(argv[i], "-wsnp")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_wsnp=str;
+ }
+ else if (strcmp(argv[i], "-wcat")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_wcat=str;
+ }
else if (strcmp(argv[i], "-k")==0) {
if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
++i;
@@ -440,6 +566,62 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
str.assign(argv[i]);
cPar.file_cvt=str;
}
+ else if (strcmp(argv[i], "-cat")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_cat=str;
+ }
+ else if (strcmp(argv[i], "-mcat")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mcat=str;
+ }
+ else if (strcmp(argv[i], "-beta")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_beta=str;
+ }
+ else if (strcmp(argv[i], "-cor")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_cor=str;
+ }
+ else if (strcmp(argv[i], "-study")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_study=str;
+ }
+ else if (strcmp(argv[i], "-ref")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_ref=str;
+ }
+ else if (strcmp(argv[i], "-mstudy")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mstudy=str;
+ }
+ else if (strcmp(argv[i], "-mref")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mref=str;
+ }
else if (strcmp(argv[i], "-epm")==0) {
if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
++i;
@@ -447,7 +629,7 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
str.assign(argv[i]);
cPar.file_epm=str;
}
- else if (strcmp(argv[i], "-en")==0) {
+ else if (strcmp(argv[i], "-en")==0) {
while (argv[i+1] != NULL && argv[i+1][0] != '-') {
++i;
str.clear();
@@ -503,7 +685,7 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
str.clear();
str.assign(argv[i]);
cPar.k_mode=atoi(str.c_str());
- }
+ }
else if (strcmp(argv[i], "-n")==0) {
(cPar.p_column).clear();
while (argv[i+1] != NULL && argv[i+1][0] != '-') {
@@ -533,7 +715,7 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
str.clear();
str.assign(argv[i]);
cPar.file_out=str;
- }
+ }
else if (strcmp(argv[i], "-miss")==0) {
if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
++i;
@@ -566,31 +748,101 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
cPar.maf_level=-1;
}
else if (strcmp(argv[i], "-gk")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -eigen -vc -lm -lmm -bslmm -predict options is allowed."<<endl; break;}
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=21; continue;}
++i;
str.clear();
str.assign(argv[i]);
cPar.a_mode=20+atoi(str.c_str());
- }
+ }
+ else if (strcmp(argv[i], "-gs")==0) {
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=25; continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode=24+atoi(str.c_str());
+ }
+ else if (strcmp(argv[i], "-gq")==0) {
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=27; continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode=26+atoi(str.c_str());
+ }
+ else if (strcmp(argv[i], "-gw")==0) {
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=72; continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode=71+atoi(str.c_str());
+ }
+ else if (strcmp(argv[i], "-sample")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.ni_subsample=atoi(str.c_str());
+ }
else if (strcmp(argv[i], "-eigen")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -eigen -vc -lm -lmm -bslmm -predict options is allowed."<<endl; break;}
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=31; continue;}
++i;
str.clear();
str.assign(argv[i]);
cPar.a_mode=30+atoi(str.c_str());
- }
+ }
+ else if (strcmp(argv[i], "-calccor")==0) {
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=71; continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode=70+atoi(str.c_str());
+ }
else if (strcmp(argv[i], "-vc")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -eigen -vc -lm -lmm -bslmm -predict options is allowed."<<endl; break;}
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=61; continue;}
++i;
str.clear();
str.assign(argv[i]);
cPar.a_mode=60+atoi(str.c_str());
- }
+ }
+ else if (strcmp(argv[i], "-ci")==0) {
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=66; continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode=65+atoi(str.c_str());
+ }
+ else if (strcmp(argv[i], "-pve")==0) {
+ double s=0;
+ while (argv[i+1] != NULL && (argv[i+1][0] != '-' || !isalpha(argv[i+1][1]) ) ) {
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.v_pve.push_back(atof(str.c_str()));
+ s+=atof(str.c_str());
+ }
+ if (s==1) {
+ cout<<"summation of pve equals one."<<endl;
+ }
+ }
+ else if (strcmp(argv[i], "-blocks")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.n_block=atoi(str.c_str());
+ }
+ else if (strcmp(argv[i], "-noconstrain")==0) {
+ cPar.noconstrain=true;
+ }
else if (strcmp(argv[i], "-lm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -eigen -vc -lm -lmm -bslmm -predict options is allowed."<<endl; break;}
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=51; continue;}
++i;
str.clear();
@@ -598,7 +850,7 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
cPar.a_mode=50+atoi(str.c_str());
}
else if (strcmp(argv[i], "-fa")==0 || strcmp(argv[i], "-lmm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -eigen -vc -lm -lmm -bslmm -predict options is allowed."<<endl; break;}
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=1; continue;}
++i;
str.clear();
@@ -665,13 +917,21 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
cPar.crt=1;
}
else if (strcmp(argv[i], "-bslmm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -eigen -vc -lm -lmm -bslmm -predict options is allowed."<<endl; break;}
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=11; continue;}
++i;
str.clear();
str.assign(argv[i]);
cPar.a_mode=10+atoi(str.c_str());
}
+ else if (strcmp(argv[i], "-ldr")==0) {
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=14; continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode=13+atoi(str.c_str());
+ }
else if (strcmp(argv[i], "-hmin")==0) {
if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
++i;
@@ -799,25 +1059,46 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
cPar.n_mh=atoi(str.c_str());
}
else if (strcmp(argv[i], "-predict")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -eigen -vc -lm -lmm -bslmm -predict options is allowed."<<endl; break;}
+ if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=41; continue;}
++i;
str.clear();
str.assign(argv[i]);
cPar.a_mode=40+atoi(str.c_str());
}
+ else if (strcmp(argv[i], "-windowcm")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_cm=atof(str.c_str());
+ }
+ else if (strcmp(argv[i], "-windowbp")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_bp=atoi(str.c_str());
+ }
+ else if (strcmp(argv[i], "-windowns")==0) {
+ if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_ns=atoi(str.c_str());
+ }
else {cout<<"error! unrecognized option: "<<argv[i]<<endl; cPar.error=true; continue;}
}
-
+
//change prediction mode to 43, if the epm file is not provided
if (cPar.a_mode==41 && cPar.file_epm.empty()) {cPar.a_mode=43;}
-
+
return;
}
-void GEMMA::BatchRun (PARAM &cPar)
+void GEMMA::BatchRun (PARAM &cPar)
{
clock_t time_begin, time_start;
time_begin=clock();
@@ -828,25 +1109,26 @@ void GEMMA::BatchRun (PARAM &cPar)
if (cPar.error==true) {cout<<"error! fail to read files. "<<endl; return;}
cPar.CheckData();
if (cPar.error==true) {cout<<"error! fail to check data. "<<endl; return;}
- //Prediction for bslmm
+
+ //Prediction for bslmm
if (cPar.a_mode==41 || cPar.a_mode==42) {
gsl_vector *y_prdt;
-
+
y_prdt=gsl_vector_alloc (cPar.ni_total-cPar.ni_test);
//set to zero
gsl_vector_set_zero (y_prdt);
-
+
PRDT cPRDT;
cPRDT.CopyFromParam(cPar);
-
+
//add breeding value if needed
if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
cout<<"Adding Breeding Values ... "<<endl;
-
+
gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total);
gsl_vector *u_hat=gsl_vector_alloc (cPar.ni_test);
-
+
//read kinship matrix and set u_hat
vector<int> indicator_all;
size_t c_bv=0;
@@ -854,13 +1136,13 @@ void GEMMA::BatchRun (PARAM &cPar)
indicator_all.push_back(1);
if (cPar.indicator_bv[i]==1) {gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]); c_bv++;}
}
-
+
ReadFile_kin (cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode, cPar.error, G);
if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //read u
- cPRDT.AddBV(G, u_hat, y_prdt);
-
+
+ //read u
+ cPRDT.AddBV(G, u_hat, y_prdt);
+
gsl_matrix_free(G);
gsl_vector_free(u_hat);
}
@@ -872,10 +1154,10 @@ void GEMMA::BatchRun (PARAM &cPar)
else {
cPRDT.AnalyzeBimbam (y_prdt);
}
-
+
//add mu
gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
-
+
//convert y to probability if needed
if (cPar.a_mode==42) {
double d;
@@ -885,51 +1167,51 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_vector_set(y_prdt, i, d);
}
}
-
-
+
+
cPRDT.CopyToParam(cPar);
-
+
cPRDT.WriteFiles(y_prdt);
-
+
gsl_vector_free(y_prdt);
}
-
-
+
+
//Prediction with kinship matrix only; for one or more phenotypes
if (cPar.a_mode==43) {
- //first, use individuals with full phenotypes to obtain estimates of Vg and Ve
+ //first, use individuals with full phenotypes to obtain estimates of Vg and Ve
gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
+ gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
+ gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
gsl_vector *eval=gsl_vector_alloc (Y->size1);
-
+
gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph);
gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt);
//set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
+ //an intercept should be included in W,
cPar.CopyCvtPhen (W, Y, 0);
cPar.CopyCvtPhen (W_full, Y_full, 1);
-
- gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph);
- gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1);
+
+ gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph);
+ gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1);
gsl_matrix *H_full=gsl_matrix_alloc (Y_full->size1*Y_hat->size2, Y_full->size1*Y_hat->size2);
-
+
//read relatedness matrix G, and matrix G_full
ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
ReadFile_kin (cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, cPar.error, G_full);
if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
+
//center matrix G
CenterMatrix (G);
CenterMatrix (G_full);
-
+
//eigen-decomposition and calculate trace_G
cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
+ time_start=clock();
cPar.trace_G=EigenDecomp (G, U, eval, 0);
cPar.trace_G=0.0;
for (size_t i=0; i<eval->size; i++) {
@@ -937,8 +1219,8 @@ void GEMMA::BatchRun (PARAM &cPar)
cPar.trace_G+=gsl_vector_get (eval, i);
}
cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+ cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
//calculate UtW and Uty
CalcUtX (U, W, UtW);
CalcUtX (U, Y, UtY);
@@ -948,7 +1230,7 @@ void GEMMA::BatchRun (PARAM &cPar)
if (cPar.n_ph==1) {
gsl_vector *beta=gsl_vector_alloc (W->size2);
gsl_vector *se_beta=gsl_vector_alloc (W->size2);
-
+
double lambda, logl, vg, ve;
gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
@@ -959,29 +1241,29 @@ void GEMMA::BatchRun (PARAM &cPar)
cout<<"REMLE estimate for vg in the null model = "<<vg<<endl;
cout<<"REMLE estimate for ve in the null model = "<<ve<<endl;
cPar.vg_remle_null=vg; cPar.ve_remle_null=ve;
-
+
//obtain Y_hat from fixed effects
- gsl_vector_view Yhat_col=gsl_matrix_column (Y_hat, 0);
+ gsl_vector_view Yhat_col=gsl_matrix_column (Y_hat, 0);
gsl_blas_dgemv (CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
-
+
//obtain H
gsl_matrix_set_identity (H_full);
gsl_matrix_scale (H_full, ve);
gsl_matrix_scale (G_full, vg);
gsl_matrix_add (H_full, G_full);
-
- //free matrices
+
+ //free matrices
gsl_vector_free(beta);
gsl_vector_free(se_beta);
- } else {
+ } else {
gsl_matrix *Vg=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph);
gsl_matrix *Ve=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph);
gsl_matrix *B=gsl_matrix_alloc (cPar.n_ph, W->size2);
gsl_matrix *se_B=gsl_matrix_alloc (cPar.n_ph, W->size2);
-
+
//obtain estimates
CalcMvLmmVgVeBeta (eval, UtW, UtY, cPar.em_iter, cPar.nr_iter, cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max, cPar.n_region, Vg, Ve, B, se_B);
-
+
cout<<"REMLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<Vg->size1; i++) {
for (size_t j=0; j<=i; j++) {
@@ -1004,110 +1286,250 @@ void GEMMA::BatchRun (PARAM &cPar)
cPar.Ve_remle_null.push_back(gsl_matrix_get (Ve, i, j) );
}
}
-
+
//obtain Y_hat from fixed effects
gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
-
+
//obtain H
KroneckerSym(G_full, Vg, H_full);
for (size_t i=0; i<G_full->size1; i++) {
gsl_matrix_view H_sub=gsl_matrix_submatrix (H_full, i*Ve->size1, i*Ve->size2, Ve->size1, Ve->size2);
gsl_matrix_add (&H_sub.matrix, Ve);
}
-
- //free matrices
+
+ //free matrices
gsl_matrix_free (Vg);
gsl_matrix_free (Ve);
gsl_matrix_free (B);
gsl_matrix_free (se_B);
}
-
+
PRDT cPRDT;
-
+
cPRDT.CopyFromParam(cPar);
-
+
cout<<"Predicting Missing Phentypes ... "<<endl;
- time_start=clock();
+ time_start=clock();
cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
cPRDT.WriteFiles(Y_full);
-
+
gsl_matrix_free(Y);
- gsl_matrix_free(W);
+ gsl_matrix_free(W);
gsl_matrix_free(G);
- gsl_matrix_free(U);
+ gsl_matrix_free(U);
gsl_matrix_free(UtW);
gsl_matrix_free(UtY);
gsl_vector_free(eval);
-
+
gsl_matrix_free(Y_full);
gsl_matrix_free(Y_hat);
gsl_matrix_free(W_full);
- gsl_matrix_free(G_full);
+ gsl_matrix_free(G_full);
gsl_matrix_free(H_full);
}
-
-
+
+
//Generate Kinship matrix
- if (cPar.a_mode==21 || cPar.a_mode==22) {
+ if (cPar.a_mode==21 || cPar.a_mode==22) {
cout<<"Calculating Relatedness Matrix ... "<<endl;
-
+
gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total);
-
+
time_start=clock();
cPar.CalcKin (G);
cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
if (cPar.error==true) {cout<<"error! fail to calculate relatedness matrix. "<<endl; return;}
-
+
if (cPar.a_mode==21) {
cPar.WriteMatrix (G, "cXX");
} else {
cPar.WriteMatrix (G, "sXX");
}
-
+
gsl_matrix_free (G);
}
-
-
+
+ //Compute the LDSC weights (not implemented yet)
+ if (cPar.a_mode==72) {
+ cout<<"Calculating Weights ... "<<endl;
+
+ VARCOV cVarcov;
+ cVarcov.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cVarcov.AnalyzePlink ();
+ } else {
+ cVarcov.AnalyzeBimbam ();
+ }
+
+ cVarcov.CopyToParam(cPar);
+ }
+
+
+ //Compute the S matrix (and its variance), that is used for variance component estimation using summary statistics
+ if (cPar.a_mode==25 || cPar.a_mode==26) {
+ cout<<"Calculating the S Matrix ... "<<endl;
+
+ gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
+ gsl_vector *ns=gsl_vector_alloc (cPar.n_vc+1);
+ gsl_matrix_set_zero(S);
+ gsl_vector_set_zero(ns);
+
+ gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+ gsl_vector_view ns_vec=gsl_vector_subvector(ns, 0, cPar.n_vc);
+
+ gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
+ gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
+ gsl_matrix_set_zero (K);
+ gsl_matrix_set_zero (A);
+
+ gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
+ gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
+
+ cPar.CopyCvtPhen (W, y, 0);
+
+ set<string> setSnps_beta;
+ map <string, double> mapRS2wA, mapRS2wK;
+
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ time_start=clock();
+ cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &ns_vec.vector);
+ cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
+
+ gsl_vector_set (ns, cPar.n_vc, cPar.ni_test);
+
+ cPar.WriteMatrix (S, "S");
+ cPar.WriteVector (ns, "size");
+ cPar.WriteVar ("snps");
+ /*
+ cout<<scientific;
+ for (size_t i=0; i<cPar.n_vc; i++) {
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get(S, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+
+ for (size_t i=cPar.n_vc; i<cPar.n_vc*2; i++) {
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get(S, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ gsl_matrix_free (S);
+ gsl_vector_free (ns);
+
+ gsl_matrix_free (A);
+ gsl_matrix_free (K);
+
+ gsl_vector_free (y);
+ gsl_matrix_free (K);
+ }
+
+ //Compute the q vector, that is used for variance component estimation using summary statistics
+ if (cPar.a_mode==27 || cPar.a_mode==28) {
+ gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
+ gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
+ gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
+ gsl_vector_set_zero (q);
+ gsl_vector_set_zero (s);
+
+ gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_ni;
+ vector<double> vec_weight, vec_z2;
+ map<string, double> mapRS2weight;
+ mapRS2weight.clear();
+
+ time_start=clock();
+ ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, cPar.ns_test);
+ cout<<"## number of total individuals = "<<cPar.ni_total<<endl;
+ cout<<"## number of total SNPs = "<<cPar.ns_total<<endl;
+ cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ cout<<"## number of variance components = "<<cPar.n_vc<<endl;
+ cout<<"Calculating the q vector ... "<<endl;
+ Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
+ cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ if (cPar.error==true) {cout<<"error! fail to calculate the q vector. "<<endl; return;}
+
+ gsl_vector_set (s, cPar.n_vc, cPar.ni_total);
+
+ cPar.WriteMatrix (Vq, "Vq");
+ cPar.WriteVector (q, "q");
+ cPar.WriteVector (s, "size");
+ /*
+ for (size_t i=0; i<cPar.n_vc; i++) {
+ cout<<gsl_vector_get(q, i)<<endl;
+ }
+ */
+ gsl_matrix_free (Vq);
+ gsl_vector_free (q);
+ gsl_vector_free (s);
+ }
+
+
+ //Calculate SNP covariance
+ if (cPar.a_mode==71) {
+ VARCOV cVarcov;
+ cVarcov.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cVarcov.AnalyzePlink ();
+ } else {
+ cVarcov.AnalyzeBimbam ();
+ }
+
+ cVarcov.CopyToParam(cPar);
+ }
+
+
//LM
if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM
gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
+ gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
+
+ //set covariates matrix W and phenotype matrix Y
+ //an intercept should be included in W,
cPar.CopyCvtPhen (W, Y, 0);
-
+
//Fit LM or mvLM
- if (cPar.n_ph==1) {
+ if (cPar.n_ph==1) {
LM cLm;
cLm.CopyFromParam(cPar);
-
+
gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
-
- if (!cPar.file_gene.empty()) {
+
+ if (!cPar.file_gene.empty()) {
cLm.AnalyzeGene (W, &Y_col.vector); //y is the predictor, not the phenotype
} else if (!cPar.file_bfile.empty()) {
cLm.AnalyzePlink (W, &Y_col.vector);
+ } else if (!cPar.file_oxford.empty()) {
+ cLm.Analyzebgen (W, &Y_col.vector);
} else {
cLm.AnalyzeBimbam (W, &Y_col.vector);
}
-
+
cLm.WriteFiles();
cLm.CopyToParam(cPar);
}
/*
- else {
+ else {
MVLM cMvlm;
- cMvlm.CopyFromParam(cPar);
-
+ cMvlm.CopyFromParam(cPar);
+
if (!cPar.file_bfile.empty()) {
cMvlm.AnalyzePlink (W, Y);
} else {
cMvlm.AnalyzeBimbam (W, Y);
}
-
+
cMvlm.WriteFiles();
cMvlm.CopyToParam(cPar);
}
@@ -1115,27 +1537,202 @@ void GEMMA::BatchRun (PARAM &cPar)
//release all matrices and vectors
gsl_matrix_free (Y);
gsl_matrix_free (W);
- }
+ }
//VC estimation with one or multiple kinship matrices
//REML approach only
//if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp
- //for one phenotype only;
- if (cPar.a_mode==61) {
+ //for one phenotype only;
+ if (cPar.a_mode==61 || cPar.a_mode==62) {
+ if (!cPar.file_beta.empty() ) {
+ //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
+ //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
+ //update indicator_snps, so that the numbers are in accordance with mapRS2wK
+ set<string> setSnps_beta;
+ ReadFile_snps_header (cPar.file_beta, setSnps_beta);
+
+ map <string, double> mapRS2wA, mapRS2wK;
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ cPar.UpdateSNP (mapRS2wK);
+
+ //setup matrices and vectors
+ gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
+ gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
+ gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
+ gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
+
+ gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
+ gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
+
+ gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
+ gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
+
+ gsl_matrix_set_zero (K);
+ gsl_matrix_set_zero (A);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Vq);
+ gsl_vector_set_zero (q);
+ gsl_vector_set_zero (s);
+
+ cPar.CopyCvtPhen (W, y, 0);
+
+ gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+ gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_ni;
+ vector<double> vec_weight, vec_z2;
+
+ //read beta, based on the mapRS2wK
+ ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, cPar.ns_test);
+
+ cout<<"Study Panel: "<<endl;
+ cout<<"## number of total individuals = "<<cPar.ni_study<<endl;
+ cout<<"## number of total SNPs = "<<cPar.ns_study<<endl;
+ cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ cout<<"## number of variance components = "<<cPar.n_vc<<endl;
+
+ //compute q
+ Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
+
+ //compute S
+ time_start=clock();
+ cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector);
+ cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
+
+ //compute vc estimates
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+
+ //if LDSC weights, then compute the weights and run the above steps again
+ if (cPar.a_mode==62) {
+ //compute the weights and normalize the weights for A
+ cPar.UpdateWeight (1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
+
+ //read beta file again, and update weigths vector
+ ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_total, cPar.ns_test);
+
+ //compute q
+ Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
+
+ //compute S
+ time_start=clock();
+ cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector);
+ cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
+
+ //compute vc estimates
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+ }
+
+ gsl_vector_set (s, cPar.n_vc, cPar.ni_test);
+
+ cPar.WriteMatrix (S, "S");
+ cPar.WriteMatrix (Vq, "Vq");
+ cPar.WriteVector (q, "q");
+ cPar.WriteVector (s, "size");
+
+ gsl_matrix_free (S);
+ gsl_matrix_free (Vq);
+ gsl_vector_free (q);
+ gsl_vector_free (s);
+
+ gsl_matrix_free (A);
+ gsl_matrix_free (K);
+ gsl_vector_free (y);
+ gsl_matrix_free (W);
+ } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
+ if (!cPar.file_study.empty()) {
+ string sfile=cPar.file_study+".size.txt";
+ CountFileLines (sfile, cPar.n_vc);
+ } else {
+ string file_name;
+ igzstream infile (cPar.file_mstudy.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open mstudy file: "<<cPar.file_study<<endl; return;}
+
+ safeGetline(infile, file_name);
+
+ infile.clear();
+ infile.close();
+
+ string sfile=file_name+".size.txt";
+ CountFileLines (sfile, cPar.n_vc);
+ }
+
+ cPar.n_vc=cPar.n_vc-1;
+
+ gsl_matrix *S=gsl_matrix_alloc (2*cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
+ //gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1, (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
+ //gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1, (n_lines*(n_lines+1))/2*(n_lines+1) );
+ gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
+ gsl_vector *s_study=gsl_vector_alloc (cPar.n_vc);
+ gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
+ gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+
+ gsl_matrix_set_zero(Vq);
+ //gsl_matrix_set_zero(V);
+ //gsl_matrix_set_zero(Vslope);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s_study);
+ gsl_vector_set_zero(s_ref);
+
+ if (!cPar.file_study.empty()) {
+ ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
+ } else {
+ ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
+ }
+
+ if (!cPar.file_ref.empty()) {
+ ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref);
+ } else {
+ ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref);
+ }
+
+ cout<<"## number of variance components = "<<cPar.n_vc<<endl;
+ cout<<"## number of individuals in the sample = "<<cPar.ni_study<<endl;
+ cout<<"## number of individuals in the reference = "<<cPar.ni_ref<<endl;
+
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+
+ gsl_vector_view s_sub=gsl_vector_subvector (s, 0, cPar.n_vc);
+ gsl_vector_memcpy (&s_sub.vector, s_ref);
+ gsl_vector_set (s, cPar.n_vc, cPar.ni_ref);
+
+ cPar.WriteMatrix (S, "S");
+ cPar.WriteMatrix (Vq, "Vq");
+ cPar.WriteVector (q, "q");
+ cPar.WriteVector (s, "size");
+
+ gsl_matrix_free (S);
+ gsl_matrix_free (Vq);
+ //gsl_matrix_free (V);
+ //gsl_matrix_free (Vslope);
+ gsl_vector_free (q);
+ gsl_vector_free (s_study);
+ gsl_vector_free (s_ref);
+ gsl_vector_free (s);
+ } else {
gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1*cPar.n_vc );
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
+ //set covariates matrix W and phenotype matrix Y
+ //an intercept should be included in W,
cPar.CopyCvtPhen (W, Y, 0);
//read kinship matrices
if (!(cPar.file_mk).empty()) {
ReadFile_mk (cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
+
//center matrix G, and obtain v_traceG
double d=0;
(cPar.v_traceG).clear();
@@ -1152,7 +1749,7 @@ void GEMMA::BatchRun (PARAM &cPar)
} else if (!(cPar.file_kin).empty()) {
ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
+
//center matrix G
CenterMatrix (G);
@@ -1167,8 +1764,8 @@ void GEMMA::BatchRun (PARAM &cPar)
/*
//eigen-decomposition and calculate trace_G
cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
-
+ time_start=clock();
+
if (cPar.a_mode==31) {
cPar.trace_G=EigenDecomp (G, U, eval, 1);
} else {
@@ -1182,14 +1779,14 @@ void GEMMA::BatchRun (PARAM &cPar)
}
cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
} else {
ReadFile_eigenU (cPar.file_ku, cPar.error, U);
if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;}
-
- ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
+
+ ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;}
-
+
cPar.trace_G=0.0;
for (size_t i=0; i<eval->size; i++) {
if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
@@ -1202,7 +1799,7 @@ void GEMMA::BatchRun (PARAM &cPar)
if (cPar.n_ph==1) {
// if (cPar.n_vc==1) {
/*
- //calculate UtW and Uty
+ //calculate UtW and Uty
CalcUtX (U, W, UtW);
CalcUtX (U, Y, UtY);
@@ -1228,10 +1825,10 @@ void GEMMA::BatchRun (PARAM &cPar)
cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
}
-
+
CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
cPar.PrintSummary();
-
+
//calculate and output residuals
if (cPar.a_mode==5) {
gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
@@ -1239,11 +1836,11 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
+
//obtain Utu and Ute
gsl_vector_memcpy (y_hat, &UtY_col.vector);
gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
+
double d, u, e;
for (size_t i=0; i<eval->size; i++) {
d=gsl_vector_get (eval, i);
@@ -1252,37 +1849,210 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_vector_set (Utu_hat, i, u);
gsl_vector_set (Ute_hat, i, e);
}
-
+
//obtain u and e
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
- //output residuals
+
+ //output residuals
cPar.WriteVector(u_hat, "residU");
cPar.WriteVector(e_hat, "residE");
-
+
gsl_vector_free(u_hat);
gsl_vector_free(e_hat);
gsl_vector_free(y_hat);
- }
-*/
+ }
+*/
// } else {
gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
VC cVc;
- cVc.CopyFromParam(cPar);
- cVc.CalcVCreml (G, W, &Y_col.vector);
+ cVc.CopyFromParam(cPar);
+ if (cPar.a_mode==61) {
+ cVc.CalcVChe (G, W, &Y_col.vector);
+ } else {
+ cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector);
+ }
cVc.CopyToParam(cPar);
-
//obtain pve from sigma2
//obtain se_pve from se_sigma2
-
+
//}
- }
+ }
+ }
+
+ }
+
-
+ //compute confidence intervals with additional summary statistics
+ //we do not check the sign of z-scores here, but they have to be matched with the genotypes
+ if (cPar.a_mode==66 || cPar.a_mode==67) {
+ //read reference file first
+ gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Svar=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
+ gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Svar);
+ gsl_vector_set_zero(s_ref);
+
+ if (!cPar.file_ref.empty()) {
+ ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
+ } else {
+ ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
+ }
+
+ //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
+ //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
+ set<string> setSnps_beta;
+ ReadFile_snps_header (cPar.file_beta, setSnps_beta);
+
+ //obtain the weights for wA, which contains the SNP weights for SNPs used in the model
+ map <string, double> mapRS2wK;
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ //set up matrices and vector
+ gsl_matrix *Xz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XWz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XtXWz=gsl_matrix_alloc (mapRS2wK.size(), cPar.n_vc*cPar.n_vc);
+ gsl_vector *w=gsl_vector_alloc (mapRS2wK.size());
+ gsl_vector *w1=gsl_vector_alloc (mapRS2wK.size());
+ gsl_vector *z=gsl_vector_alloc (mapRS2wK.size());
+ gsl_vector *s_vec=gsl_vector_alloc (cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_size;
+ vector<double> vec_z;
+
+ map <string, double> mapRS2z, mapRS2wA;
+ map <string, string> mapRS2A1;
+ string file_str;
+
+ //update s_vec, the number of snps in each category
+ for (size_t i=0; i<cPar.n_vc; i++) {
+ vec_size.push_back(0);
+ }
+
+ for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
+ vec_size[cPar.mapRS2cat[it->first]]++;
+ }
+
+ for (size_t i=0; i<cPar.n_vc; i++) {
+ gsl_vector_set(s_vec, i, vec_size[i]);
+ }
+
+ //update mapRS2wA using v_pve and s_vec
+ if (cPar.a_mode==66) {
+ for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
+ mapRS2wA[it->first]=1;
+ }
+ } else {
+ cPar.UpdateWeight (0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
+ }
+
+ //read in z-scores based on allele 0, and save that into a vector
+ ReadFile_beta (cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
+
+ //update snp indicator, save weights to w, save z-scores to vec_z, save category label to vec_cat
+ //sign of z is determined by matching alleles
+ cPar.UpdateSNPnZ (mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
+
+ //compute an n by k matrix of X_iWz
+ cout<<"Calculating Xz ... "<<endl;
+
+ gsl_matrix_set_zero(Xz);
+ gsl_vector_set_all (w1, 1);
+
+ if (!cPar.file_bfile.empty() ) {
+ file_str=cPar.file_bfile+".bed";
+ PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
+ } else if (!cPar.file_mbfile.empty() ){
+ MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz);
+ }
+ /*
+ cout<<"Xz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (Xz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ if (cPar.a_mode==66) {
+ gsl_matrix_memcpy (XWz, Xz);
+ } else if (cPar.a_mode==67) {
+ cout<<"Calculating XWz ... "<<endl;
+
+ gsl_matrix_set_zero(XWz);
+
+ if (!cPar.file_bfile.empty() ) {
+ file_str=cPar.file_bfile+".bed";
+ PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz);
+ } else if (!cPar.file_mbfile.empty() ){
+ MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz);
+ }
+ }
+ /*
+ cout<<"XWz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ cout<<gsl_vector_get (w, i)<<endl;
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (XWz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ //compute an p by k matrix of X_j^TWX_iWz
+ cout<<"Calculating XtXWz ... "<<endl;
+ gsl_matrix_set_zero(XtXWz);
+
+ if (!cPar.file_bfile.empty() ) {
+ file_str=cPar.file_bfile+".bed";
+ PlinkXtXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXtXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz);
+ } else if (!cPar.file_mbfile.empty() ){
+ MFILEXtXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXtXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz);
+ }
+ /*
+ cout<<"XtXWz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (XtXWz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ //compute confidence intervals
+ CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+
+ //write files
+ //cPar.WriteMatrix (XWz, "XWz");
+ //cPar.WriteMatrix (XtXWz, "XtXWz");
+ //cPar.WriteVector (w, "w");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Svar);
+ gsl_vector_free(s_ref);
+
+ gsl_matrix_free(Xz);
+ gsl_matrix_free(XWz);
+ gsl_matrix_free(XtXWz);
+ gsl_vector_free(w);
+ gsl_vector_free(w1);
+ gsl_vector_free(z);
+ gsl_vector_free(s_vec);
}
-
-
+
+
//LMM or mvLMM or Eigen-Decomposition
if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==31) { //Fit LMM or mvLMM or eigen
gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
@@ -1290,33 +2060,62 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_matrix *B=gsl_matrix_alloc (Y->size2, W->size2); //B is a d by c matrix
gsl_matrix *se_B=gsl_matrix_alloc (Y->size2, W->size2);
gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
+ gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
gsl_vector *eval=gsl_vector_alloc (Y->size1);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
+ gsl_vector *env=gsl_vector_alloc (Y->size1);
+ gsl_vector *weight=gsl_vector_alloc (Y->size1);
+
+ //set covariates matrix W and phenotype matrix Y
+ //an intercept should be included in W,
cPar.CopyCvtPhen (W, Y, 0);
-
- //read relatedness matrix G
+ if (!cPar.file_gxe.empty()) {cPar.CopyGxe (env);}
+
+ //read relatedness matrix G
if (!(cPar.file_kin).empty()) {
ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
+
//center matrix G
CenterMatrix (G);
-
+
+ //is residual weights are provided, then
+ if (!cPar.file_weight.empty()) {
+ cPar.CopyWeight (weight);
+ double d, wi, wj;
+ for (size_t i=0; i<G->size1; i++) {
+ wi=gsl_vector_get(weight, i);
+ for (size_t j=i; j<G->size2; j++) {
+ wj=gsl_vector_get(weight, j);
+ d=gsl_matrix_get(G, i, j);
+ if (wi<=0 || wj<=0) {d=0;} else {d/=sqrt(wi*wj);}
+ gsl_matrix_set(G, i, j, d);
+ if (j!=i) {gsl_matrix_set(G, j, i, d);}
+ }
+ }
+ }
+
//eigen-decomposition and calculate trace_G
cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
-
+ time_start=clock();
+
if (cPar.a_mode==31) {
cPar.trace_G=EigenDecomp (G, U, eval, 1);
} else {
cPar.trace_G=EigenDecomp (G, U, eval, 0);
}
+ if (!cPar.file_weight.empty()) {
+ double wi;
+ for (size_t i=0; i<U->size1; i++) {
+ wi=gsl_vector_get(weight, i);
+ if (wi<=0) {wi=0;} else {wi=sqrt(wi);}
+ gsl_vector_view Urow=gsl_matrix_row (U, i);
+ gsl_vector_scale (&Urow.vector, wi);
+ }
+ }
+
cPar.trace_G=0.0;
for (size_t i=0; i<eval->size; i++) {
if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
@@ -1324,14 +2123,14 @@ void GEMMA::BatchRun (PARAM &cPar)
}
cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
} else {
ReadFile_eigenU (cPar.file_ku, cPar.error, U);
if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;}
-
- ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
+
+ ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;}
-
+
cPar.trace_G=0.0;
for (size_t i=0; i<eval->size; i++) {
if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
@@ -1339,14 +2138,29 @@ void GEMMA::BatchRun (PARAM &cPar)
}
cPar.trace_G/=(double)eval->size;
}
-
+
if (cPar.a_mode==31) {
cPar.WriteMatrix(U, "eigenU");
cPar.WriteVector(eval, "eigenD");
- } else {
- //calculate UtW and Uty
+ } else if (!cPar.file_gene.empty() ) {
+ //calculate UtW and Uty
CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
+ CalcUtX (U, Y, UtY);
+
+ LMM cLmm;
+ cLmm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
+ gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
+
+ cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype
+
+ cLmm.WriteFiles();
+ cLmm.CopyToParam(cPar);
+ } else {
+ //calculate UtW and Uty
+ CalcUtX (U, W, UtW);
+ CalcUtX (U, Y, UtY);
//calculate REMLE/MLE estimate and pve for univariate model
if (cPar.n_ph==1) {
@@ -1372,10 +2186,10 @@ void GEMMA::BatchRun (PARAM &cPar)
cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
}
-
+
CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
cPar.PrintSummary();
-
+
//calculate and output residuals
if (cPar.a_mode==5) {
gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
@@ -1383,11 +2197,11 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
+
//obtain Utu and Ute
gsl_vector_memcpy (y_hat, &UtY_col.vector);
gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
+
double d, u, e;
for (size_t i=0; i<eval->size; i++) {
d=gsl_vector_get (eval, i);
@@ -1396,81 +2210,106 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_vector_set (Utu_hat, i, u);
gsl_vector_set (Ute_hat, i, e);
}
-
+
//obtain u and e
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
- //output residuals
+
+ //output residuals
cPar.WriteVector(u_hat, "residU");
cPar.WriteVector(e_hat, "residE");
-
+
gsl_vector_free(u_hat);
gsl_vector_free(e_hat);
gsl_vector_free(y_hat);
- }
- }
-
+ }
+ }
+
//Fit LMM or mvLMM
if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4) {
- if (cPar.n_ph==1) {
+ if (cPar.n_ph==1) {
LMM cLmm;
cLmm.CopyFromParam(cPar);
-
+
gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- if (!cPar.file_gene.empty()) {
- cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype
- } else if (!cPar.file_bfile.empty()) {
- cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- } else {
- cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- }
-
+
+ if (!cPar.file_bfile.empty()) {
+ if (cPar.file_gxe.empty()) {
+ cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
+ } else {
+ cLmm.AnalyzePlinkGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
+ }
+ }
+ // WJA added
+ else if(!cPar.file_oxford.empty()) {
+ cLmm.Analyzebgen (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
+ }
+ else {
+ if (cPar.file_gxe.empty()) {
+ cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
+ } else {
+ cLmm.AnalyzeBimbamGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
+ }
+ }
+
cLmm.WriteFiles();
cLmm.CopyToParam(cPar);
- } else {
+ } else {
MVLMM cMvlmm;
- cMvlmm.CopyFromParam(cPar);
-
+ cMvlmm.CopyFromParam(cPar);
+
if (!cPar.file_bfile.empty()) {
- cMvlmm.AnalyzePlink (U, eval, UtW, UtY);
- } else {
- cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY);
+ if (cPar.file_gxe.empty()) {
+ cMvlmm.AnalyzePlink (U, eval, UtW, UtY);
+ } else {
+ cMvlmm.AnalyzePlinkGXE (U, eval, UtW, UtY, env);
+ }
+ }
+ else if(!cPar.file_oxford.empty())
+ {
+ cMvlmm.Analyzebgen (U, eval, UtW, UtY);
+ }
+ else {
+ if (cPar.file_gxe.empty()) {
+ cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY);
+ } else {
+ cMvlmm.AnalyzeBimbamGXE (U, eval, UtW, UtY, env);
+ }
}
-
+
cMvlmm.WriteFiles();
cMvlmm.CopyToParam(cPar);
}
}
}
-
-
+
+
//release all matrices and vectors
gsl_matrix_free (Y);
gsl_matrix_free (W);
gsl_matrix_free(B);
gsl_matrix_free(se_B);
- gsl_matrix_free (G);
+ gsl_matrix_free (G);
gsl_matrix_free (U);
gsl_matrix_free (UtW);
gsl_matrix_free (UtY);
gsl_vector_free (eval);
- }
-
-
+ gsl_vector_free (env);
+ }
+
+
//BSLMM
if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
+ gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
- gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
- //set covariates matrix W and phenotype vector y
- //an intercept should be included in W,
+ gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
+
+ //set covariates matrix W and phenotype vector y
+ //an intercept should be included in W,
cPar.CopyCvtPhen (W, y, 0);
-
+
//center y, even for case/control data
cPar.pheno_mean=CenterVector(y);
@@ -1482,32 +2321,32 @@ void GEMMA::BatchRun (PARAM &cPar)
//perform BSLMM analysis
BSLMM cBslmm;
cBslmm.CopyFromParam(cPar);
- time_start=clock();
+ time_start=clock();
cBslmm.MCMC(UtX, y);
cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
cBslmm.CopyToParam(cPar);
//else, if rho!=1
} else {
- gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
+ gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
gsl_vector *eval=gsl_vector_alloc (y->size);
gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
gsl_vector *Uty=gsl_vector_alloc (y->size);
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
+
+ //read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
cPar.ReadGenotypes (UtX, G, false);
-
+
//read relatedness matrix G
ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
+
//center matrix G
CenterMatrix (G);
} else {
cPar.ReadGenotypes (UtX, G, true);
}
-
+
//eigen-decomposition and calculate trace_G
cout<<"Start Eigen-Decomposition..."<<endl;
time_start=clock();
@@ -1518,39 +2357,39 @@ void GEMMA::BatchRun (PARAM &cPar)
cPar.trace_G+=gsl_vector_get (eval, i);
}
cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
+ cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //calculate UtW and Uty
CalcUtX (U, W, UtW);
CalcUtX (U, y, Uty);
-
+
//calculate REMLE/MLE estimate and pve
CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
+
cPar.PrintSummary();
-
+
//Creat and calcualte UtX, use a large memory
cout<<"Calculating UtX..."<<endl;
- time_start=clock();
+ time_start=clock();
CalcUtX (U, UtX);
cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//perform BSLMM analysis
BSLMM cBslmm;
cBslmm.CopyFromParam(cPar);
- time_start=clock();
- if (cPar.a_mode==12) { //ridge regression
+ time_start=clock();
+ if (cPar.a_mode==12) { //ridge regression
cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
} else { //Run MCMC
cBslmm.MCMC(U, UtX, Uty, eval, y);
}
cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
cBslmm.CopyToParam(cPar);
-
+
//release all matrices and vectors
- gsl_matrix_free (G);
+ gsl_matrix_free (G);
gsl_matrix_free (U);
gsl_matrix_free (UtW);
gsl_vector_free (eval);
@@ -1560,106 +2399,259 @@ void GEMMA::BatchRun (PARAM &cPar)
gsl_matrix_free (W);
gsl_vector_free (y);
gsl_matrix_free (UtX);
- }
-
-
-
+ }
+
+
+
+ //LDR
+ if (cPar.a_mode==14) {
+ gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
+ gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
+ gsl_matrix *G=gsl_matrix_alloc (1, 1);
+ vector<vector<unsigned char> > Xt;
+
+ //set covariates matrix W and phenotype vector y
+ //an intercept is included in W
+ cPar.CopyCvtPhen (W, y, 0);
+
+ //read in genotype matrix X
+ cPar.ReadGenotypes (Xt, G, false);
+
+ LDR cLdr;
+ cLdr.CopyFromParam(cPar);
+ time_start=clock();
+
+ cLdr.VB(Xt, W, y);
+
+ cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ cLdr.CopyToParam(cPar);
+
+ gsl_vector_free (y);
+ gsl_matrix_free (W);
+ gsl_matrix_free (G);
+ }
+
cPar.time_total=(clock()-time_begin)/(double(CLOCKS_PER_SEC)*60.0);
-
+
return;
}
-void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
+void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
{
string file_str;
file_str=cPar.path_out+"/"+cPar.file_out;
file_str+=".log.txt";
-
+
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {cout<<"error writing log file: "<<file_str.c_str()<<endl; return;}
-
+
outfile<<"##"<<endl;
outfile<<"## GEMMA Version = "<<version<<endl;
-
+
outfile<<"##"<<endl;
outfile<<"## Command Line Input = ";
- for(int i = 1; i < argc; i++) {
+ for(int i = 0; i < argc; i++) {
outfile<<argv[i]<<" ";
}
outfile<<endl;
outfile<<"##"<<endl;
- time_t rawtime;
+ time_t rawtime;
time(&rawtime);
tm *ptm = localtime (&rawtime);
- outfile<<"## Date = "<<asctime(ptm)<<endl;
+ outfile<<"## Date = "<<asctime(ptm);
//ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl;
-
+
outfile<<"##"<<endl;
outfile<<"## Summary Statistics:"<<endl;
- outfile<<"## number of total individuals = "<<cPar.ni_total<<endl;
- if (cPar.a_mode==43) {
- outfile<<"## number of analyzed individuals = "<<cPar.ni_cvt<<endl;
- outfile<<"## number of individuals with full phenotypes = "<<cPar.ni_test<<endl;
- } else {
- outfile<<"## number of analyzed individuals = "<<cPar.ni_test<<endl;
- }
- outfile<<"## number of covariates = "<<cPar.n_cvt<<endl;
- outfile<<"## number of phenotypes = "<<cPar.n_ph<<endl;
- if (cPar.a_mode==43) {
- outfile<<"## number of observed data = "<<cPar.np_obs<<endl;
- outfile<<"## number of missing data = "<<cPar.np_miss<<endl;
- }
- if (cPar.a_mode==61) {
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
- }
-
- if (!(cPar.file_gene).empty()) {
- outfile<<"## number of total genes = "<<cPar.ng_total<<endl;
- outfile<<"## number of analyzed genes = "<<cPar.ng_test<<endl;
- } else if (cPar.file_epm.empty()) {
- outfile<<"## number of total SNPs = "<<cPar.ns_total<<endl;
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ if (!cPar.file_cor.empty() || !cPar.file_study.empty() || !cPar.file_mstudy.empty() ) {
+ outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl;
+ outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl;
+ //outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
+ //outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_ref<<endl;
+ //outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ //outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl;
+ outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
+
+ outfile<<"## pve estimates = ";
+ for (size_t i=0; i<cPar.v_pve.size(); i++) {
+ outfile<<" "<<cPar.v_pve[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(pve) = ";
+ for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
+ outfile<<" "<<cPar.v_se_pve[i];
+ }
+ outfile<<endl;
+
+ if (cPar.n_vc>1) {
+ outfile<<"## total pve = "<<cPar.pve_total<<endl;
+ outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
+ }
+
+ outfile<<"## sigma2 per snp = ";
+ for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
+ outfile<<" "<<cPar.v_sigma2[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(sigma2 per snp) = ";
+ for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
+ outfile<<" "<<cPar.v_se_sigma2[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## enrichment = ";
+ for (size_t i=0; i<cPar.v_enrich.size(); i++) {
+ outfile<<" "<<cPar.v_enrich[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(enrichment) = ";
+ for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
+ outfile<<" "<<cPar.v_se_enrich[i];
+ }
+ outfile<<endl;
+ } else if (!cPar.file_beta.empty() && (cPar.a_mode==61 || cPar.a_mode==62) ) {
+ outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl;
+ outfile<<"## number of total individuals in the reference = "<<cPar.ni_total<<endl;
+ outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
+ outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_total<<endl;
+ outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
+ } else if (!cPar.file_beta.empty() && (cPar.a_mode==66 || cPar.a_mode==67) ) {
+ outfile<<"## number of total individuals in the sample = "<<cPar.ni_total<<endl;
+ outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl;
+ outfile<<"## number of total SNPs in the sample = "<<cPar.ns_total<<endl;
+ outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
+
+ outfile<<"## pve estimates = ";
+ for (size_t i=0; i<cPar.v_pve.size(); i++) {
+ outfile<<" "<<cPar.v_pve[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(pve) = ";
+ for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
+ outfile<<" "<<cPar.v_se_pve[i];
+ }
+ outfile<<endl;
+
+ if (cPar.n_vc>1) {
+ outfile<<"## total pve = "<<cPar.pve_total<<endl;
+ outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
+ }
+
+ outfile<<"## sigma2 per snp = ";
+ for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
+ outfile<<" "<<cPar.v_sigma2[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(sigma2 per snp) = ";
+ for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
+ outfile<<" "<<cPar.v_se_sigma2[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## enrichment = ";
+ for (size_t i=0; i<cPar.v_enrich.size(); i++) {
+ outfile<<" "<<cPar.v_enrich[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(enrichment) = ";
+ for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
+ outfile<<" "<<cPar.v_se_enrich[i];
+ }
+ outfile<<endl;
} else {
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ outfile<<"## number of total individuals = "<<cPar.ni_total<<endl;
+
+ if (cPar.a_mode==43) {
+ outfile<<"## number of analyzed individuals = "<<cPar.ni_cvt<<endl;
+ outfile<<"## number of individuals with full phenotypes = "<<cPar.ni_test<<endl;
+ } else if (cPar.a_mode!=27 && cPar.a_mode!=28) {
+ outfile<<"## number of analyzed individuals = "<<cPar.ni_test<<endl;
+ }
+
+ outfile<<"## number of covariates = "<<cPar.n_cvt<<endl;
+ outfile<<"## number of phenotypes = "<<cPar.n_ph<<endl;
+ if (cPar.a_mode==43) {
+ outfile<<"## number of observed data = "<<cPar.np_obs<<endl;
+ outfile<<"## number of missing data = "<<cPar.np_miss<<endl;
+ }
+ if (cPar.a_mode==25 || cPar.a_mode==26 || cPar.a_mode==27 || cPar.a_mode==28 || cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==66 || cPar.a_mode==67) {
+ outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
+ }
+
+ if (!(cPar.file_gene).empty()) {
+ outfile<<"## number of total genes = "<<cPar.ng_total<<endl;
+ outfile<<"## number of analyzed genes = "<<cPar.ng_test<<endl;
+ } else if (cPar.file_epm.empty()) {
+ outfile<<"## number of total SNPs = "<<cPar.ns_total<<endl;
+ outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ } else {
+ outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ }
+
+ if (cPar.a_mode==13) {
+ outfile<<"## number of cases = "<<cPar.ni_case<<endl;
+ outfile<<"## number of controls = "<<cPar.ni_control<<endl;
+ }
}
-
- if (cPar.a_mode==13) {
- outfile<<"## number of cases = "<<cPar.ni_case<<endl;
- outfile<<"## number of controls = "<<cPar.ni_control<<endl;
- }
-
-
- if (cPar.a_mode==61) {
- // outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
- if (cPar.n_ph==1) {
- outfile<<"## pve estimate in the null model = ";
- for (size_t i=0; i<cPar.v_pve.size(); i++) {
- outfile<<" "<<cPar.v_pve[i];
- }
- outfile<<endl;
-
- outfile<<"## se(pve) in the null model = ";
- for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
- outfile<<" "<<cPar.v_se_pve[i];
- }
- outfile<<endl;
-
- outfile<<"## sigma2 estimate in the null model = ";
- for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_sigma2[i];
- }
- outfile<<endl;
- outfile<<"## se(sigma2) in the null model = ";
- for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_se_sigma2[i];
- }
- outfile<<endl;
+ if ( (cPar.a_mode==61 || cPar.a_mode==62) && cPar.file_cor.empty() && cPar.file_study.empty() && cPar.file_mstudy.empty() ) {
+ // outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
+ if (cPar.n_ph==1) {
+ outfile<<"## pve estimates = ";
+ for (size_t i=0; i<cPar.v_pve.size(); i++) {
+ outfile<<" "<<cPar.v_pve[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(pve) = ";
+ for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
+ outfile<<" "<<cPar.v_se_pve[i];
+ }
+ outfile<<endl;
+
+ if (cPar.n_vc>1) {
+ outfile<<"## total pve = "<<cPar.pve_total<<endl;
+ outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
+ }
+
+ outfile<<"## sigma2 estimates = ";
+ for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
+ outfile<<" "<<cPar.v_sigma2[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(sigma2) = ";
+ for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
+ outfile<<" "<<cPar.v_se_sigma2[i];
+ }
+ outfile<<endl;
+
+ if (!cPar.file_beta.empty() ) {
+ outfile<<"## enrichment = ";
+ for (size_t i=0; i<cPar.v_enrich.size(); i++) {
+ outfile<<" "<<cPar.v_enrich[i];
+ }
+ outfile<<endl;
+
+ outfile<<"## se(enrichment) = ";
+ for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
+ outfile<<" "<<cPar.v_se_enrich[i];
+ }
+ outfile<<endl;
+ }
/*
outfile<<"## beta estimate in the null model = ";
for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
@@ -1672,19 +2664,19 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
*/
- }
+ }
}
-
+
if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
outfile<<"## MLE log-likelihood in the null model = "<<cPar.logl_mle_H0<<endl;
if (cPar.n_ph==1) {
//outfile<<"## lambda REMLE estimate in the null (linear mixed) model = "<<cPar.l_remle_null<<endl;
- //outfile<<"## lambda MLE estimate in the null (linear mixed) model = "<<cPar.l_mle_null<<endl;
+ //outfile<<"## lambda MLE estimate in the null (linear mixed) model = "<<cPar.l_mle_null<<endl;
outfile<<"## pve estimate in the null model = "<<cPar.pve_null<<endl;
- outfile<<"## se(pve) in the null model = "<<cPar.pve_se_null<<endl;
+ outfile<<"## se(pve) in the null model = "<<cPar.pve_se_null<<endl;
outfile<<"## vg estimate in the null model = "<<cPar.vg_remle_null<<endl;
- outfile<<"## ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
+ outfile<<"## ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
outfile<<"## beta estimate in the null model = ";
for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
outfile<<" "<<cPar.beta_remle_null[i];
@@ -1695,10 +2687,10 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
outfile<<" "<<cPar.se_beta_remle_null[i];
}
outfile<<endl;
-
+
} else {
size_t c;
- outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
+ outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<=i; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1706,7 +2698,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
}
- outfile<<"## se(Vg): "<<endl;
+ outfile<<"## se(Vg): "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<=i; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1714,7 +2706,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
}
- outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
+ outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<=i; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1722,7 +2714,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
}
- outfile<<"## se(Ve): "<<endl;
+ outfile<<"## se(Ve): "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<=i; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1730,7 +2722,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
}
-
+
outfile<<"## MLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<cPar.n_ph; j++) {
@@ -1739,7 +2731,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
}
- outfile<<"## se(Vg): "<<endl;
+ outfile<<"## se(Vg): "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<=i; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1747,7 +2739,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
}
- outfile<<"## MLE estimate for Ve in the null model: "<<endl;
+ outfile<<"## MLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<cPar.n_ph; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1755,7 +2747,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
}
- outfile<<"## se(Ve): "<<endl;
+ outfile<<"## se(Ve): "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<=i; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1781,15 +2773,15 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
}
}
-
+
/*
if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
if (cPar.n_ph==1) {
outfile<<"## REMLE vg estimate in the null model = "<<cPar.vg_remle_null<<endl;
- outfile<<"## REMLE ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
+ outfile<<"## REMLE ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
} else {
size_t c;
- outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
+ outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<=i; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1797,7 +2789,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
outfile<<endl;
}
- outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
+ outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<cPar.n_ph; i++) {
for (size_t j=0; j<=i; j++) {
c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
@@ -1808,15 +2800,15 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
}
*/
-
-
+
+
if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- outfile<<"## estimated mean = "<<cPar.pheno_mean<<endl;
+ outfile<<"## estimated mean = "<<cPar.pheno_mean<<endl;
}
-
- if (cPar.a_mode==11 || cPar.a_mode==13) {
+
+ if (cPar.a_mode==11 || cPar.a_mode==13) {
outfile<<"##"<<endl;
- outfile<<"## MCMC related:"<<endl;
+ outfile<<"## MCMC related:"<<endl;
outfile<<"## initial value of h = "<<cPar.cHyp_initial.h<<endl;
outfile<<"## initial value of rho = "<<cPar.cHyp_initial.rho<<endl;
outfile<<"## initial value of pi = "<<exp(cPar.cHyp_initial.logp)<<endl;
@@ -1824,7 +2816,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
outfile<<"## random seed = "<<cPar.randseed<<endl;
outfile<<"## acceptance ratio = "<<(double)cPar.n_accept/(double)((cPar.w_step+cPar.s_step)*cPar.n_mh)<<endl;
}
-
+
outfile<<"##"<<endl;
outfile<<"## Computation Time:"<<endl;
outfile<<"## total computation time = "<<cPar.time_total<<" min "<<endl;
@@ -1837,7 +2829,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
}
if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- outfile<<"## time on calculating UtX = "<<cPar.time_UtX<<" min "<<endl;
+ outfile<<"## time on calculating UtX = "<<cPar.time_UtX<<" min "<<endl;
}
if ((cPar.a_mode>=1 && cPar.a_mode<=4) || (cPar.a_mode>=51 && cPar.a_mode<=54) ) {
outfile<<"## time on optimization = "<<cPar.time_opt<<" min "<<endl;
@@ -1855,7 +2847,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
outfile<<"## time on predicting phenotypes = "<<cPar.time_opt<<" min "<<endl;
}
outfile<<"##"<<endl;
-
+
outfile.close();
outfile.clear();
return;