aboutsummaryrefslogtreecommitdiff
path: root/src/gemma.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gemma.cpp')
-rw-r--r--src/gemma.cpp6549
1 files changed, 3584 insertions, 2965 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 1a9ca9b..c72475b 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -16,427 +16,670 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include <cmath>
+#include <cstring>
+#include <ctime>
#include <fstream>
+#include <iostream>
#include <string>
-#include <cstring>
#include <sys/stat.h>
-#include <ctime>
-#include <cmath>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
-#include "gsl/gsl_eigen.h"
#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
-#include "lapack.h"
-#include "io.h"
-#include "gemma.h"
-#include "vc.h"
-#include "lm.h"
#include "bslmm.h"
#include "bslmmdap.h"
+#include "gemma.h"
+#include "io.h"
+#include "lapack.h"
#include "ldr.h"
+#include "lm.h"
#include "lmm.h"
+#include "mathfunc.h"
#include "mvlmm.h"
#include "prdt.h"
#include "varcov.h"
-#include "mathfunc.h"
+#include "vc.h"
using namespace std;
-GEMMA::GEMMA(void):
-version("0.97"), date("07/27/2017"), year("2017")
-{}
-
-void GEMMA::PrintHeader (void) {
- cout<<endl;
- cout<<"*********************************************************"<<endl;
- cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl;
- cout<<" Version "<<version<<", "<<date<<" "<<
- endl;
- cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl;
- cout<<" (C) "<<year<<" Xiang Zhou "<<endl;
- cout<<" GNU General Public License "<<endl;
- cout<<" For Help, Type ./gemma -h "<<endl;
- cout<<"*********************************************************"<<endl;
- cout<<endl;
+GEMMA::GEMMA(void) : version("0.97"), date("07/27/2017"), year("2017") {}
+
+void GEMMA::PrintHeader(void) {
+ cout << endl;
+ cout << "*********************************************************" << endl;
+ cout << " Genome-wide Efficient Mixed Model Association (GEMMA) " << endl;
+ cout << " Version " << version << ", " << date
+ << " " << endl;
+ cout << " Visit http://www.xzlab.org/software.html For Updates " << endl;
+ cout << " (C) " << year << " Xiang Zhou "
+ << endl;
+ cout << " GNU General Public License " << endl;
+ cout << " For Help, Type ./gemma -h " << endl;
+ cout << "*********************************************************" << endl;
+ cout << endl;
return;
}
-void GEMMA::PrintLicense (void) {
- cout<<endl;
- cout<<"The Software Is Distributed Under GNU General Public "<<
- "License, But May Also Require The Following Notifications."<<endl;
- cout<<endl;
-
- cout<<"Including Lapack Routines In The Software May Require"<<
- " The Following Notification:"<<endl;
- cout<<"Copyright (c) 1992-2010 The University of Tennessee and "<<
- "The University of Tennessee Research Foundation. All rights "<<
- "reserved."<<endl;
- cout<<"Copyright (c) 2000-2010 The University of California "<<
- "Berkeley. All rights reserved."<<endl;
- cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. "<<
- "All rights reserved."<<endl;
- cout<<endl;
-
- cout<<"$COPYRIGHT$"<<endl;
- cout<<"Additional copyrights may follow"<<endl;
- cout<<"$HEADER$"<<endl;
- cout<<"Redistribution and use in source and binary forms, with or "<<
- "without modification, are permitted provided that the following "<<
- " conditions are met:"<<endl;
- cout<<"- Redistributions of source code must retain the above "<<
- "copyright notice, this list of conditions and the following "<<
- "disclaimer."<<endl;
- cout<<"- Redistributions in binary form must reproduce the above "<<
- "copyright notice, this list of conditions and the following "<<
- "disclaimer listed in this license in the documentation and/or "<<
- "other materials provided with the distribution."<<endl;
- cout<<"- Neither the name of the copyright holders nor the names "<<
- "of its contributors may be used to endorse or promote products "<<
- "derived from this software without specific prior written "<<
- "permission."<<endl;
- cout<<"The copyright holders provide no reassurances that the "<<
- "source code provided does not infringe any patent, copyright, "<<
- "or any other "<<
- "intellectual property rights of third parties. "<<
- "The copyright holders disclaim any liability to any recipient "<<
- "for claims brought against "<<
- "recipient by any third party for infringement of that parties "<<
- "intellectual property rights. "<<endl;
- cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "<<
- "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "<<
- "INCLUDING, BUT NOT "<<
- "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "<<
- "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "<<
- "SHALL THE COPYRIGHT "<<
- "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "<<
- "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "<<
- "(INCLUDING, BUT NOT "<<
- "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "<<
- "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "<<
- "HOWEVER CAUSED AND ON ANY "<<
- "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "<<
- "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "<<
- "OUT OF THE USE "<<
- "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "<<
- "SUCH DAMAGE."<<endl;
- cout<<endl;
-
- return;
+void GEMMA::PrintLicense(void) {
+ cout << endl;
+ cout << "The Software Is Distributed Under GNU General Public "
+ << "License, But May Also Require The Following Notifications." << endl;
+ cout << endl;
+
+ cout << "Including Lapack Routines In The Software May Require"
+ << " The Following Notification:" << endl;
+ cout << "Copyright (c) 1992-2010 The University of Tennessee and "
+ << "The University of Tennessee Research Foundation. All rights "
+ << "reserved." << endl;
+ cout << "Copyright (c) 2000-2010 The University of California "
+ << "Berkeley. All rights reserved." << endl;
+ cout << "Copyright (c) 2006-2010 The University of Colorado Denver. "
+ << "All rights reserved." << endl;
+ cout << endl;
+
+ cout << "$COPYRIGHT$" << endl;
+ cout << "Additional copyrights may follow" << endl;
+ cout << "$HEADER$" << endl;
+ cout << "Redistribution and use in source and binary forms, with or "
+ << "without modification, are permitted provided that the following "
+ << " conditions are met:" << endl;
+ cout << "- Redistributions of source code must retain the above "
+ << "copyright notice, this list of conditions and the following "
+ << "disclaimer." << endl;
+ cout << "- Redistributions in binary form must reproduce the above "
+ << "copyright notice, this list of conditions and the following "
+ << "disclaimer listed in this license in the documentation and/or "
+ << "other materials provided with the distribution." << endl;
+ cout << "- Neither the name of the copyright holders nor the names "
+ << "of its contributors may be used to endorse or promote products "
+ << "derived from this software without specific prior written "
+ << "permission." << endl;
+ cout << "The copyright holders provide no reassurances that the "
+ << "source code provided does not infringe any patent, copyright, "
+ << "or any other "
+ << "intellectual property rights of third parties. "
+ << "The copyright holders disclaim any liability to any recipient "
+ << "for claims brought against "
+ << "recipient by any third party for infringement of that parties "
+ << "intellectual property rights. " << endl;
+ cout << "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "
+ << "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "
+ << "INCLUDING, BUT NOT "
+ << "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "
+ << "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "
+ << "SHALL THE COPYRIGHT "
+ << "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "
+ << "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "
+ << "(INCLUDING, BUT NOT "
+ << "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "
+ << "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "
+ << "HOWEVER CAUSED AND ON ANY "
+ << "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "
+ << "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "
+ << "OUT OF THE USE "
+ << "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "
+ << "SUCH DAMAGE." << endl;
+ cout << endl;
+
+ return;
}
void GEMMA::PrintHelp(size_t option) {
- if (option==0) {
- cout<<endl;
- cout<<" GEMMA version "<<version<<", released on "<<date<<endl;
- cout<<" implemented by Xiang Zhou"<<endl;
- cout<<endl;
- cout<<" type ./gemma -h [num] for detailed helps"<<endl;
- cout<<" options: " << endl;
- cout<<" 1: quick guide"<<endl;
- cout<<" 2: file I/O related"<<endl;
- cout<<" 3: SNP QC"<<endl;
- cout<<" 4: calculate relatedness matrix"<<endl;
- cout<<" 5: perform eigen decomposition"<<endl;
- cout<<" 6: perform variance component estimation"<<endl;
- cout<<" 7: fit a linear model"<<endl;
- cout<<" 8: fit a linear mixed model"<<endl;
- cout<<" 9: fit a multivariate linear mixed model"<<endl;
- cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl;
- cout<<" 11: obtain predicted values"<<endl;
- cout<<" 12: calculate snp variance covariance"<<endl;
- cout<<" 13: note"<<endl;
- cout<<endl;
+ if (option == 0) {
+ cout << endl;
+ cout << " GEMMA version " << version << ", released on " << date << endl;
+ cout << " implemented by Xiang Zhou" << endl;
+ cout << endl;
+ cout << " type ./gemma -h [num] for detailed helps" << endl;
+ cout << " options: " << endl;
+ cout << " 1: quick guide" << endl;
+ cout << " 2: file I/O related" << endl;
+ cout << " 3: SNP QC" << endl;
+ cout << " 4: calculate relatedness matrix" << endl;
+ cout << " 5: perform eigen decomposition" << endl;
+ cout << " 6: perform variance component estimation" << endl;
+ cout << " 7: fit a linear model" << endl;
+ cout << " 8: fit a linear mixed model" << endl;
+ cout << " 9: fit a multivariate linear mixed model" << endl;
+ cout << " 10: fit a Bayesian sparse linear mixed model" << endl;
+ cout << " 11: obtain predicted values" << endl;
+ cout << " 12: calculate snp variance covariance" << endl;
+ cout << " 13: note" << endl;
+ cout << endl;
}
- if (option==1) {
- cout<<" QUICK GUIDE" << endl;
- cout<<" to generate a relatedness matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl;
- cout<<" to generate the S matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl;
- cout<<" to generate the q vector: "<<endl;
- cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl;
- cout<<" to generate the ldsc weigthts: "<<endl;
- cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl;
- cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl;
- cout<<" to estimate variance components: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" options for the above two commands: -crt -windowbp [num]"<<endl;
- cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl;
- cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl;
- cout<<" to fit a linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a linear mixed model to test g by e effects: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl;
- cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a multivariate linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
- cout<<" to fit a Bayesian sparse linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl;
- cout<<" to obtain predicted values: "<<endl;
- cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
- cout<<" to calculate correlations between SNPs: "<<endl;
- cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl;
- cout<<endl;
+ if (option == 1) {
+ cout << " QUICK GUIDE" << endl;
+ cout << " to generate a relatedness matrix: " << endl;
+ cout << " ./gemma -bfile [prefix] -gk [num] -o [prefix]" << endl;
+ cout << " ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"
+ << endl;
+ cout << " to generate the S matrix: " << endl;
+ cout << " ./gemma -bfile [prefix] -gs -o [prefix]" << endl;
+ cout << " ./gemma -p [filename] -g [filename] -gs -o [prefix]"
+ << endl;
+ cout << " ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"
+ << endl;
+ cout << " ./gemma -p [filename] -g [filename] -cat [filename] -gs "
+ "-o [prefix]"
+ << endl;
+ cout << " ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"
+ << endl;
+ cout << " ./gemma -p [filename] -g [filename] -sample [num] -gs -o "
+ "[prefix]"
+ << endl;
+ cout << " to generate the q vector: " << endl;
+ cout << " ./gemma -beta [filename] -gq -o [prefix]" << endl;
+ cout << " ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"
+ << endl;
+ cout << " to generate the ldsc weigthts: " << endl;
+ cout << " ./gemma -beta [filename] -gw -o [prefix]" << endl;
+ cout << " ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"
+ << endl;
+ cout << " to perform eigen decomposition of the relatedness matrix: "
+ << endl;
+ cout << " ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -k [filename] -eigen "
+ "-o [prefix]"
+ << endl;
+ cout << " to estimate variance components: " << endl;
+ cout << " ./gemma -bfile [prefix] -k [filename] -vc [num] -o "
+ "[prefix]"
+ << endl;
+ cout << " ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -bfile [prefix] -mk [filename] -vc [num] -o "
+ "[prefix]"
+ << endl;
+ cout
+ << " ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -beta [filename] -cor [filename] -vc [num] -o "
+ "[prefix]"
+ << endl;
+ cout << " ./gemma -beta [filename] -cor [filename] -cat [filename] "
+ "-vc [num] -o [prefix]"
+ << endl;
+ cout << " options for the above two commands: -crt -windowbp [num]"
+ << endl;
+ cout << " ./gemma -mq [filename] -ms [filename] -mv [filename] -vc "
+ "[num] -o [prefix]"
+ << endl;
+ cout << " or with summary statistics, replace bfile with mbfile, "
+ "or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"
+ << endl;
+ cout << " ./gemma -beta [filename] -bfile [filename] -cat "
+ "[filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -beta [filename] -bfile [filename] -cat "
+ "[filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"
+ << endl;
+ cout << " to fit a linear mixed model: " << endl;
+ cout << " ./gemma -bfile [prefix] -k [filename] -lmm [num] -o "
+ "[prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] -k "
+ "[filename] -lmm [num] -o [prefix]"
+ << endl;
+ cout << " to fit a linear mixed model to test g by e effects: " << endl;
+ cout << " ./gemma -bfile [prefix] -gxe [filename] -k [filename] "
+ "-lmm [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] -gxe "
+ "[filename] -k [filename] -lmm [num] -o [prefix]"
+ << endl;
+ cout << " to fit a univariate linear mixed model with different residual "
+ "weights for different individuals: "
+ << endl;
+ cout << " ./gemma -bfile [prefix] -weight [filename] -k [filename] "
+ "-lmm [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] "
+ "-weight [filename] -k [filename] -lmm [num] -o [prefix]"
+ << endl;
+ cout << " to fit a multivariate linear mixed model: " << endl;
+ cout << " ./gemma -bfile [prefix] -k [filename] -lmm [num] -n "
+ "[num1] [num2] -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] -k "
+ "[filename] -lmm [num] -n [num1] [num2] -o [prefix]"
+ << endl;
+ cout << " to fit a Bayesian sparse linear mixed model: " << endl;
+ cout << " ./gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] -bslmm "
+ "[num] -o [prefix]"
+ << endl;
+ cout << " to obtain predicted values: " << endl;
+ cout << " ./gemma -bfile [prefix] -epm [filename] -emu [filename] "
+ "-ebv [filename] -k [filename] -predict [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -epm [filename] -emu "
+ "[filename] -ebv [filename] -k [filename] -predict [num] -o "
+ "[prefix]"
+ << endl;
+ cout << " to calculate correlations between SNPs: " << endl;
+ cout << " ./gemma -bfile [prefix] -calccor -o [prefix]" << endl;
+ cout << " ./gemma -g [filename] -p [filename] -calccor -o [prefix]"
+ << endl;
+ cout << endl;
}
- if (option==2) {
- cout<<" FILE I/O RELATED OPTIONS" << endl;
- cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl;
- cout<<" requires: *.fam, *.bim and *.bed files"<<endl;
- cout<<" missing value: -9"<<endl;
- cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl;
- cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
- cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl;
- cout<<" format: phenotype for individual 1"<<endl;
- cout<<" phenotype for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl;
- cout<<" format: rs#1, base_position, chr_number"<<endl;
- cout<<" rs#2, base_position, chr_number"<<endl;
- cout<<" ..."<<endl;
+ if (option == 2) {
+ cout << " FILE I/O RELATED OPTIONS" << endl;
+ cout << " -bfile [prefix] "
+ << " specify input PLINK binary ped file prefix." << endl;
+ cout << " requires: *.fam, *.bim and *.bed files" << endl;
+ cout << " missing value: -9" << endl;
+ cout << " -g [filename] "
+ << " specify input BIMBAM mean genotype file name" << endl;
+ cout << " format: rs#1, allele0, allele1, genotype for individual "
+ "1, genotype for individual 2, ..."
+ << endl;
+ cout << " rs#2, allele0, allele1, genotype for individual "
+ "1, genotype for individual 2, ..."
+ << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -p [filename] "
+ << " specify input BIMBAM phenotype file name" << endl;
+ cout << " format: phenotype for individual 1" << endl;
+ cout << " phenotype for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -a [filename] "
+ << " specify input BIMBAM SNP annotation file name (optional)" << endl;
+ cout << " format: rs#1, base_position, chr_number" << endl;
+ cout << " rs#2, base_position, chr_number" << endl;
+ cout << " ..." << endl;
// WJA added.
- cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl;
- cout<<" requires: *.bgen, *.sample files"<<endl;
-
- cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl;
- cout<<" format: variable for individual 1"<<endl;
- cout<<" variable for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl;
- cout<<" format: variable for individual 1"<<endl;
- cout<<" variable for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl;
- cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl;
- cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl;
- cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl;
- cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl;
- cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl;
- cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl;
- cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" note: the intercept (a column of 1s) may need to be included"<<endl;
- cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl;
- cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl;
- cout<<" options: n1: rs column number"<<endl;
- cout<<" n2: estimated alpha column number (0 to ignore)"<<endl;
- cout<<" n3: estimated beta column number (0 to ignore)"<<endl;
- cout<<" n4: estimated gamma column number (0 to ignore)"<<endl;
- cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl;
- cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl;
- cout<<" format: value for individual 1"<<endl;
- cout<<" value for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl;
- cout<<" -mu [num] "<<" specify input estimated mean value"<<endl;
- cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl;
- cout<<" format: header"<<endl;
- cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl;
- cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: not allowed"<<endl;
- cout<<" -r [filename] "<<" specify input total read count file name"<<endl;
- cout<<" format: total read count for individual 1"<<endl;
- cout<<" total read count for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl;
- cout<<" format: rs#1"<<endl;
- cout<<" rs#2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -silence "<<" silent terminal display"<<endl;
- cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl;
- cout<<" options: 1: \"n by n matrix\" format"<<endl;
- cout<<" 2: \"id id value\" format"<<endl;
- cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl;
- cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl;
- cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl;
- cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl;
- cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl;
- cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl;
- cout<<endl;
+ cout << " -oxford [prefix] "
+ << " specify input Oxford genotype bgen file prefix." << endl;
+ cout << " requires: *.bgen, *.sample files" << endl;
+
+ cout << " -gxe [filename] "
+ << " specify input file that contains a column of environmental "
+ "factor for g by e tests"
+ << endl;
+ cout << " format: variable for individual 1" << endl;
+ cout << " variable for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -widv [filename] "
+ << " specify input file that contains a column of residual weights"
+ << endl;
+ cout << " format: variable for individual 1" << endl;
+ cout << " variable for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -k [filename] "
+ << " specify input kinship/relatedness matrix file name" << endl;
+ cout << " -mk [filename] "
+ << " specify input file which contains a list of kinship/relatedness "
+ "matrices"
+ << endl;
+ cout << " -u [filename] "
+ << " specify input file containing the eigen vectors of the "
+ "kinship/relatedness matrix"
+ << endl;
+ cout << " -d [filename] "
+ << " specify input file containing the eigen values of the "
+ "kinship/relatedness matrix"
+ << endl;
+ cout << " -c [filename] "
+ << " specify input covariates file name (optional)" << endl;
+ cout << " -cat [filename] "
+ << " specify input category file name (optional), which contains rs "
+ "cat1 cat2 ..."
+ << endl;
+ cout << " -beta [filename] "
+ << " specify input beta file name (optional), which contains rs beta "
+ "se_beta n_total (or n_mis and n_obs) estimates from a lm model"
+ << endl;
+ cout << " -cor [filename] "
+ << " specify input correlation file name (optional), which contains "
+ "rs window_size correlations from snps"
+ << endl;
+ cout << " missing value: NA" << endl;
+ cout << " note: the intercept (a column of 1s) may need to be "
+ "included"
+ << endl;
+ cout << " -epm [filename] "
+ << " specify input estimated parameter file name" << endl;
+ cout << " -en [n1] [n2] [n3] [n4] "
+ << " specify values for the input estimated parameter file (with a "
+ "header)"
+ << endl;
+ cout << " options: n1: rs column number" << endl;
+ cout << " n2: estimated alpha column number (0 to ignore)"
+ << endl;
+ cout << " n3: estimated beta column number (0 to ignore)"
+ << endl;
+ cout << " n4: estimated gamma column number (0 to ignore)"
+ << endl;
+ cout << " default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if "
+ "-ebv is specified"
+ << endl;
+ cout << " -ebv [filename] "
+ << " specify input estimated random effect (breeding value) file name"
+ << endl;
+ cout << " format: value for individual 1" << endl;
+ cout << " value for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -emu [filename] "
+ << " specify input log file name containing estimated mean" << endl;
+ cout << " -mu [num] "
+ << " specify input estimated mean value" << endl;
+ cout << " -gene [filename] "
+ << " specify input gene expression file name" << endl;
+ cout << " format: header" << endl;
+ cout << " gene1, count for individual 1, count for "
+ "individual 2, ..."
+ << endl;
+ cout << " gene2, count for individual 1, count for "
+ "individual 2, ..."
+ << endl;
+ cout << " ..." << endl;
+ cout << " missing value: not allowed" << endl;
+ cout << " -r [filename] "
+ << " specify input total read count file name" << endl;
+ cout << " format: total read count for individual 1" << endl;
+ cout << " total read count for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout
+ << " -snps [filename] "
+ << " specify input snps file name to only analyze a certain set of snps"
+ << endl;
+ cout << " format: rs#1" << endl;
+ cout << " rs#2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -silence "
+ << " silent terminal display" << endl;
+ cout << " -km [num] "
+ << " specify input kinship/relatedness file type (default 1)." << endl;
+ cout << " options: 1: \"n by n matrix\" format" << endl;
+ cout << " 2: \"id id value\" format" << endl;
+ cout << " -n [num] "
+ << " specify phenotype column in the phenotype/*.fam file (optional; "
+ "default 1)"
+ << endl;
+ cout << " -pace [num] "
+ << " specify terminal display update pace (default 100000 SNPs or "
+ "100000 iterations)."
+ << endl;
+ cout << " -outdir [path] "
+ << " specify output directory path (default \"./output/\")" << endl;
+ cout << " -o [prefix] "
+ << " specify output file prefix (default \"result\")" << endl;
+ cout << " output: prefix.cXX.txt or prefix.sXX.txt from "
+ "kinship/relatedness matrix estimation"
+ << endl;
+ cout << " output: prefix.assoc.txt and prefix.log.txt form "
+ "association tests"
+ << endl;
+ cout << endl;
}
- if (option==3) {
- cout<<" SNP QC OPTIONS" << endl;
- cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl;
- cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl;
- cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl;
- cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl;
- cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl;
- cout<<endl;
+ if (option == 3) {
+ cout << " SNP QC OPTIONS" << endl;
+ cout << " -miss [num] "
+ << " specify missingness threshold (default 0.05)" << endl;
+ cout << " -maf [num] "
+ << " specify minor allele frequency threshold (default 0.01)" << endl;
+ cout << " -hwe [num] "
+ << " specify HWE test p value threshold (default 0; no test)" << endl;
+ cout << " -r2 [num] "
+ << " specify r-squared threshold (default 0.9999)" << endl;
+ cout << " -notsnp "
+ << " minor allele frequency cutoff is not used" << endl;
+ cout << endl;
}
- if (option==4) {
- cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
- cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl;
- cout<<" options: 1: centered XX^T/p"<<endl;
- cout<<" 2: standardized XX^T/p"<<endl;
- cout<<" note: non-polymorphic SNPs are excluded "<<endl;
- cout<<endl;
+ if (option == 4) {
+ cout << " RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
+ cout << " -gk [num] "
+ << " specify which type of kinship/relatedness matrix to generate "
+ "(default 1)"
+ << endl;
+ cout << " options: 1: centered XX^T/p" << endl;
+ cout << " 2: standardized XX^T/p" << endl;
+ cout << " note: non-polymorphic SNPs are excluded " << endl;
+ cout << endl;
}
- if (option==5) {
- cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl;
- cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl;
- cout<<endl;
+ if (option == 5) {
+ cout << " EIGEN-DECOMPOSITION OPTIONS" << endl;
+ cout << " -eigen "
+ << " specify to perform eigen decomposition of the loaded relatedness "
+ "matrix"
+ << endl;
+ cout << endl;
}
- if (option==6) {
- cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
- cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl;
- cout<<" options (with kinship file): 1: HE regression (default)"<<endl;
- cout<<" 2: REML"<<endl;
- cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl;
- cout<<" 2: Standardized genotypes"<<endl;
- cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl;
- cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl;
- cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl;
- cout<<endl;
+ if (option == 6) {
+ cout << " VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
+ cout << " -vc "
+ << " specify to perform variance component estimation for the loaded "
+ "relatedness matrix/matrices"
+ << endl;
+ cout
+ << " options (with kinship file): 1: HE regression (default)"
+ << endl;
+ cout << " 2: REML" << endl;
+ cout << " options (with beta/cor files): 1: Centered genotypes "
+ "(default)"
+ << endl;
+ cout << " 2: Standardized genotypes"
+ << endl;
+ cout << " -crt -windowbp [num]"
+ << " specify the window size based on bp (default 1000000; 1Mb)"
+ << endl;
+ cout << " -crt -windowcm [num]"
+ << " specify the window size based on cm (default 0)" << endl;
+ cout << " -crt -windowns [num]"
+ << " specify the window size based on number of snps (default 0)"
+ << endl;
+ cout << endl;
}
- if (option==7) {
- cout<<" LINEAR MODEL OPTIONS" << endl;
- cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: Wald test"<<endl;
- cout<<" 2: Likelihood ratio test"<<endl;
- cout<<" 3: Score test"<<endl;
- cout<<" 4: 1-3"<<endl;
- cout<<endl;
+ if (option == 7) {
+ cout << " LINEAR MODEL OPTIONS" << endl;
+ cout << " -lm [num] "
+ << " specify analysis options (default 1)." << endl;
+ cout << " options: 1: Wald test" << endl;
+ cout << " 2: Likelihood ratio test" << endl;
+ cout << " 3: Score test" << endl;
+ cout << " 4: 1-3" << endl;
+ cout << endl;
}
- if (option==8) {
- cout<<" LINEAR MIXED MODEL OPTIONS" << endl;
- cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: Wald test"<<endl;
- cout<<" 2: Likelihood ratio test"<<endl;
- cout<<" 3: Score test"<<endl;
- cout<<" 4: 1-3"<<endl;
- cout<<" 5: Parameter estimation in the null model only"<<endl;
- cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl;
- cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl;
- cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl;
- cout<<endl;
+ if (option == 8) {
+ cout << " LINEAR MIXED MODEL OPTIONS" << endl;
+ cout << " -lmm [num] "
+ << " specify analysis options (default 1)." << endl;
+ cout << " options: 1: Wald test" << endl;
+ cout << " 2: Likelihood ratio test" << endl;
+ cout << " 3: Score test" << endl;
+ cout << " 4: 1-3" << endl;
+ cout << " 5: Parameter estimation in the null model only"
+ << endl;
+ cout << " -lmin [num] "
+ << " specify minimal value for lambda (default 1e-5)" << endl;
+ cout << " -lmax [num] "
+ << " specify maximum value for lambda (default 1e+5)" << endl;
+ cout
+ << " -region [num] "
+ << " specify the number of regions used to evaluate lambda (default 10)"
+ << endl;
+ cout << endl;
}
- if (option==9) {
- cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
- cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl;
- cout<<" -emi "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl;
- cout<<" -nri "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl;
- cout<<" -emp "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl;
- cout<<" -nrp "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl;
- cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl;
- cout<<endl;
+ if (option == 9) {
+ cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
+ cout << " -pnr "
+ << " specify the pvalue threshold to use the Newton-Raphson's method "
+ "(default 0.001)"
+ << endl;
+ cout << " -emi "
+ << " specify the maximum number of iterations for the PX-EM method in "
+ "the null (default 10000)"
+ << endl;
+ cout << " -nri "
+ << " specify the maximum number of iterations for the "
+ "Newton-Raphson's method in the null (default 100)"
+ << endl;
+ cout << " -emp "
+ << " specify the precision for the PX-EM method in the null (default "
+ "0.0001)"
+ << endl;
+ cout << " -nrp "
+ << " specify the precision for the Newton-Raphson's method in the "
+ "null (default 0.0001)"
+ << endl;
+ cout << " -crt "
+ << " specify to output corrected pvalues for these pvalues that are "
+ "below the -pnr threshold"
+ << endl;
+ cout << endl;
}
- if (option==10) {
- cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl;
- cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: BSLMM"<<endl;
- cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl;
- cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
- cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl;
- cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl;
-
- cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: LDR"<<endl;
-
- cout<<" MCMC OPTIONS" << endl;
- cout<<" Prior" << endl;
- cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl;
- cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl;
- cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl;
- cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl;
- cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl;
- cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl;
- cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl;
- cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl;
-
- cout<<" Proposal" << endl;
- cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl;
- cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
- cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
- cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl;
-
- cout<<" Others" << endl;
- cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl;
- cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl;
- cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl;
- cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl;
- cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl;
- cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl;
- cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl;
- cout<<endl;
+ if (option == 10) {
+ cout << " MULTI-LOCUS ANALYSIS OPTIONS" << endl;
+ cout << " -bslmm [num] "
+ << " specify analysis options (default 1)." << endl;
+ cout << " options: 1: BSLMM" << endl;
+ cout << " 2: standard ridge regression/GBLUP (no mcmc)"
+ << endl;
+ cout << " 3: probit BSLMM (requires 0/1 phenotypes)"
+ << endl;
+ cout
+ << " 4: BSLMM with DAP for Hyper Parameter Estimation"
+ << endl;
+ cout << " 5: BSLMM with DAP for Fine Mapping" << endl;
+
+ cout << " -ldr [num] "
+ << " specify analysis options (default 1)." << endl;
+ cout << " options: 1: LDR" << endl;
+
+ cout << " MCMC OPTIONS" << endl;
+ cout << " Prior" << endl;
+ cout << " -hmin [num] "
+ << " specify minimum value for h (default 0)" << endl;
+ cout << " -hmax [num] "
+ << " specify maximum value for h (default 1)" << endl;
+ cout << " -rmin [num] "
+ << " specify minimum value for rho (default 0)" << endl;
+ cout << " -rmax [num] "
+ << " specify maximum value for rho (default 1)" << endl;
+ cout << " -pmin [num] "
+ << " specify minimum value for log10(pi) (default log10(1/p), where p "
+ "is the number of analyzed SNPs )"
+ << endl;
+ cout << " -pmax [num] "
+ << " specify maximum value for log10(pi) (default log10(1) )" << endl;
+ cout << " -smin [num] "
+ << " specify minimum value for |gamma| (default 0)" << endl;
+ cout << " -smax [num] "
+ << " specify maximum value for |gamma| (default 300)" << endl;
+
+ cout << " Proposal" << endl;
+ cout << " -gmean [num] "
+ << " specify the mean for the geometric distribution (default: 2000)"
+ << endl;
+ cout << " -hscale [num] "
+ << " specify the step size scale for the proposal distribution of h "
+ "(value between 0 and 1, default min(10/sqrt(n),1) )"
+ << endl;
+ cout << " -rscale [num] "
+ << " specify the step size scale for the proposal distribution of rho "
+ "(value between 0 and 1, default min(10/sqrt(n),1) )"
+ << endl;
+ cout << " -pscale [num] "
+ << " specify the step size scale for the proposal distribution of "
+ "log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )"
+ << endl;
+
+ cout << " Others" << endl;
+ cout << " -w [num] "
+ << " specify burn-in steps (default 100,000)" << endl;
+ cout << " -s [num] "
+ << " specify sampling steps (default 1,000,000)" << endl;
+ cout << " -rpace [num] "
+ << " specify recording pace, record one state in every [num] steps "
+ "(default 10)"
+ << endl;
+ cout << " -wpace [num] "
+ << " specify writing pace, write values down in every [num] recorded "
+ "steps (default 1000)"
+ << endl;
+ cout << " -seed [num] "
+ << " specify random seed (a random seed is generated by default)"
+ << endl;
+ cout << " -mh [num] "
+ << " specify number of MH steps in each iteration (default 10)"
+ << endl;
+ cout << " requires: 0/1 phenotypes and -bslmm 3 option" << endl;
+ cout << endl;
}
- if (option==11) {
- cout<<" PREDICTION OPTIONS" << endl;
- cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl;
- cout<<" options: 1: predict for individuals with missing phenotypes"<<endl;
- cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl;
- cout<<endl;
+ if (option == 11) {
+ cout << " PREDICTION OPTIONS" << endl;
+ cout << " -predict [num] "
+ << " specify prediction options (default 1)." << endl;
+ cout << " options: 1: predict for individuals with missing "
+ "phenotypes"
+ << endl;
+ cout << " 2: predict for individuals with missing "
+ "phenotypes, and convert the predicted values to probability "
+ "scale. Use only for files fitted with -bslmm 3 option"
+ << endl;
+ cout << endl;
}
- if (option==12) {
- cout<<" CALC CORRELATION OPTIONS" << endl;
- cout<<" -calccor "<<endl;
- cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl;
- cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl;
- cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl;
- cout<<endl;
+ if (option == 12) {
+ cout << " CALC CORRELATION OPTIONS" << endl;
+ cout << " -calccor " << endl;
+ cout << " -windowbp [num] "
+ << " specify the window size based on bp (default 1000000; 1Mb)"
+ << endl;
+ cout << " -windowcm [num] "
+ << " specify the window size based on cm (default 0; not used)"
+ << endl;
+ cout << " -windowns [num] "
+ << " specify the window size based on number of snps (default 0; not "
+ "used)"
+ << endl;
+ cout << endl;
}
- if (option==13) {
- cout<<" NOTE"<<endl;
- cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl;
- cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl;
- cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl;
- cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl;
- cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl;
- cout<<endl;
+ if (option == 13) {
+ cout << " NOTE" << endl;
+ cout << " 1. Only individuals with non-missing phenotoypes and covariates "
+ "will be analyzed."
+ << endl;
+ cout << " 2. Missing genotoypes will be repalced with the mean genotype of "
+ "that SNP."
+ << endl;
+ cout << " 3. For lmm analysis, memory should be large enough to hold the "
+ "relatedness matrix and to perform eigen decomposition."
+ << endl;
+ cout << " 4. For multivariate lmm analysis, use a large -pnr for each snp "
+ "will increase computation time dramatically."
+ << endl;
+ cout << " 5. For bslmm analysis, in addition to 3, memory should be large "
+ "enough to hold the whole genotype matrix."
+ << endl;
+ cout << endl;
}
return;
@@ -457,2609 +700,2985 @@ void GEMMA::PrintHelp(size_t option) {
// calccor: 71
// gw: 72
-void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) {
- string str;
-
- for(int i = 1; i < argc; i++) {
- if (strcmp(argv[i], "-bfile")==0 ||
- strcmp(argv[i], "--bfile")==0 ||
- strcmp(argv[i], "-b")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_bfile=str;
- }
- else if (strcmp(argv[i], "-mbfile")==0 ||
- strcmp(argv[i], "--mbfile")==0 ||
- strcmp(argv[i], "-mb")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mbfile=str;
- }
- else if (strcmp(argv[i], "-silence")==0) {
- cPar.mode_silence=true;
- }
- else if (strcmp(argv[i], "-g")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_geno=str;
- }
- else if (strcmp(argv[i], "-mg")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mgeno=str;
- }
- else if (strcmp(argv[i], "-p")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_pheno=str;
- }
- else if (strcmp(argv[i], "-a")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_anno=str;
- }
-
- // WJA added.
- else if (strcmp(argv[i], "-oxford")==0 ||
- strcmp(argv[i], "--oxford")==0 ||
- strcmp(argv[i], "-x")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_oxford=str;
- }
- else if (strcmp(argv[i], "-gxe")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_gxe=str;
- }
- else if (strcmp(argv[i], "-widv")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_weight=str;
- }
- else if (strcmp(argv[i], "-wsnp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_wsnp=str;
- }
- else if (strcmp(argv[i], "-wcat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_wcat=str;
- }
- else if (strcmp(argv[i], "-k")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_kin=str;
- }
- else if (strcmp(argv[i], "-mk")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mk=str;
- }
- else if (strcmp(argv[i], "-u")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_ku=str;
- }
- else if (strcmp(argv[i], "-d")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_kd=str;
- }
- else if (strcmp(argv[i], "-c")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_cvt=str;
- }
- else if (strcmp(argv[i], "-cat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_cat=str;
- }
- else if (strcmp(argv[i], "-mcat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mcat=str;
- }
- else if (strcmp(argv[i], "-catc")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_catc=str;
- }
- else if (strcmp(argv[i], "-mcatc")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mcatc=str;
- }
- else if (strcmp(argv[i], "-beta")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_beta=str;
- }
- else if (strcmp(argv[i], "-bf")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_bf=str;
- }
- else if (strcmp(argv[i], "-hyp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_hyp=str;
- }
- else if (strcmp(argv[i], "-cor")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_cor=str;
- }
- else if (strcmp(argv[i], "-study")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_study=str;
- }
- else if (strcmp(argv[i], "-ref")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_ref=str;
- }
- else if (strcmp(argv[i], "-mstudy")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mstudy=str;
- }
- else if (strcmp(argv[i], "-mref")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mref=str;
- }
- else if (strcmp(argv[i], "-epm")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_epm=str;
- }
- else if (strcmp(argv[i], "-en")==0) {
- while (argv[i+1] != NULL && argv[i+1][0] != '-') {
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.est_column.push_back(atoi(str.c_str()));
- }
- }
- else if (strcmp(argv[i], "-ebv")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_ebv=str;
- }
- else if (strcmp(argv[i], "-emu")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_log=str;
- }
- else if (strcmp(argv[i], "-mu")==0) {
- if(argv[i+1] == NULL) {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.pheno_mean=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-gene")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_gene=str;
- }
- else if (strcmp(argv[i], "-r")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_read=str;
- }
- else if (strcmp(argv[i], "-snps")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_snps=str;
- }
- else if (strcmp(argv[i], "-km")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.k_mode=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-n")==0) {
- (cPar.p_column).clear();
- while (argv[i+1] != NULL && argv[i+1][0] != '-') {
- ++i;
- str.clear();
- str.assign(argv[i]);
- (cPar.p_column).push_back(atoi(str.c_str()));
- }
- }
- else if (strcmp(argv[i], "-pace")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.d_pace=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-outdir")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.path_out=str;
- }
- else if (strcmp(argv[i], "-o")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_out=str;
- }
- else if (strcmp(argv[i], "-miss")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.miss_level=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-maf")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- if (cPar.maf_level!=-1) {cPar.maf_level=atof(str.c_str());}
- }
- else if (strcmp(argv[i], "-hwe")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.hwe_level=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-r2")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.r2_level=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-notsnp")==0) {
- cPar.maf_level=-1;
- }
- else if (strcmp(argv[i], "-gk")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=21; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=20+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-gs")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=25; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=24+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-gq")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=27; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=26+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-gw")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=72; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=71+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-sample")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.ni_subsample=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-eigen")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=31; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=30+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-calccor")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=71; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=70+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-vc")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=61; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=60+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-ci")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=66; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=65+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-pve")==0) {
- double s=0;
- while (argv[i+1] != NULL && (argv[i+1][0] != '-' || !isalpha(argv[i+1][1]) ) ) {
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.v_pve.push_back(atof(str.c_str()));
- s+=atof(str.c_str());
- }
- if (s==1) {
- cout<<"summation of pve equals one."<<endl;
- }
- }
- else if (strcmp(argv[i], "-blocks")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.n_block=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-noconstrain")==0) {
- cPar.noconstrain=true;
- }
- else if (strcmp(argv[i], "-lm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=51; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=50+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-fa")==0 || strcmp(argv[i], "-lmm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=1; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-lmin")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.l_min=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-lmax")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.l_max=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-region")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.n_region=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-pnr")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.p_nr=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-emi")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.em_iter=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-nri")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.nr_iter=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-emp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.em_prec=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-nrp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.nr_prec=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-crt")==0) {
- cPar.crt=1;
- }
- else if (strcmp(argv[i], "-bslmm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=11; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=10+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-hmin")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.h_min=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-hmax")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.h_max=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-rmin")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.rho_min=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-rmax")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.rho_max=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-pmin")==0) {
- if(argv[i+1] == NULL) {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.logp_min=atof(str.c_str())*log(10.0);
- }
- else if (strcmp(argv[i], "-pmax")==0) {
- if(argv[i+1] == NULL) {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.logp_max=atof(str.c_str())*log(10.0);
- }
- else if (strcmp(argv[i], "-smin")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.s_min=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-smax")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.s_max=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-gmean")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.geo_mean=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-hscale")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.h_scale=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-rscale")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.rho_scale=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-pscale")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.logp_scale=atof(str.c_str())*log(10.0);
- }
- else if (strcmp(argv[i], "-w")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.w_step=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-s")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.s_step=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-rpace")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.r_pace=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-wpace")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.w_pace=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-seed")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.randseed=atol(str.c_str());
- }
- else if (strcmp(argv[i], "-mh")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.n_mh=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-predict")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=41; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=40+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-windowcm")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.window_cm=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-windowbp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.window_bp=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-windowns")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.window_ns=atoi(str.c_str());
- }
- else {cout<<"error! unrecognized option: "<<argv[i]<<endl; cPar.error=true; continue;}
- }
-
- // Change prediction mode to 43 if the epm file is not provided.
- if (cPar.a_mode==41 && cPar.file_epm.empty()) {
- cPar.a_mode=43;
- }
-
- return;
+void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
+ string str;
+
+ for (int i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "-bfile") == 0 || strcmp(argv[i], "--bfile") == 0 ||
+ strcmp(argv[i], "-b") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_bfile = str;
+ } else if (strcmp(argv[i], "-mbfile") == 0 ||
+ strcmp(argv[i], "--mbfile") == 0 ||
+ strcmp(argv[i], "-mb") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mbfile = str;
+ } else if (strcmp(argv[i], "-silence") == 0) {
+ cPar.mode_silence = true;
+ } else if (strcmp(argv[i], "-g") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_geno = str;
+ } else if (strcmp(argv[i], "-mg") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mgeno = str;
+ } else if (strcmp(argv[i], "-p") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_pheno = str;
+ } else if (strcmp(argv[i], "-a") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_anno = str;
+ }
+
+ // WJA added.
+ else if (strcmp(argv[i], "-oxford") == 0 ||
+ strcmp(argv[i], "--oxford") == 0 || strcmp(argv[i], "-x") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_oxford = str;
+ } else if (strcmp(argv[i], "-gxe") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_gxe = str;
+ } else if (strcmp(argv[i], "-widv") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_weight = str;
+ } else if (strcmp(argv[i], "-wsnp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_wsnp = str;
+ } else if (strcmp(argv[i], "-wcat") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_wcat = str;
+ } else if (strcmp(argv[i], "-k") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_kin = str;
+ } else if (strcmp(argv[i], "-mk") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mk = str;
+ } else if (strcmp(argv[i], "-u") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_ku = str;
+ } else if (strcmp(argv[i], "-d") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_kd = str;
+ } else if (strcmp(argv[i], "-c") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_cvt = str;
+ } else if (strcmp(argv[i], "-cat") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_cat = str;
+ } else if (strcmp(argv[i], "-mcat") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mcat = str;
+ } else if (strcmp(argv[i], "-catc") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_catc = str;
+ } else if (strcmp(argv[i], "-mcatc") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mcatc = str;
+ } else if (strcmp(argv[i], "-beta") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_beta = str;
+ } else if (strcmp(argv[i], "-bf") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_bf = str;
+ } else if (strcmp(argv[i], "-hyp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_hyp = str;
+ } else if (strcmp(argv[i], "-cor") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_cor = str;
+ } else if (strcmp(argv[i], "-study") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_study = str;
+ } else if (strcmp(argv[i], "-ref") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_ref = str;
+ } else if (strcmp(argv[i], "-mstudy") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mstudy = str;
+ } else if (strcmp(argv[i], "-mref") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mref = str;
+ } else if (strcmp(argv[i], "-epm") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_epm = str;
+ } else if (strcmp(argv[i], "-en") == 0) {
+ while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.est_column.push_back(atoi(str.c_str()));
+ }
+ } else if (strcmp(argv[i], "-ebv") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_ebv = str;
+ } else if (strcmp(argv[i], "-emu") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_log = str;
+ } else if (strcmp(argv[i], "-mu") == 0) {
+ if (argv[i + 1] == NULL) {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.pheno_mean = atof(str.c_str());
+ } else if (strcmp(argv[i], "-gene") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_gene = str;
+ } else if (strcmp(argv[i], "-r") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_read = str;
+ } else if (strcmp(argv[i], "-snps") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_snps = str;
+ } else if (strcmp(argv[i], "-km") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.k_mode = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-n") == 0) {
+ (cPar.p_column).clear();
+ while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ (cPar.p_column).push_back(atoi(str.c_str()));
+ }
+ } else if (strcmp(argv[i], "-pace") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.d_pace = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-outdir") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.path_out = str;
+ } else if (strcmp(argv[i], "-o") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_out = str;
+ } else if (strcmp(argv[i], "-miss") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.miss_level = atof(str.c_str());
+ } else if (strcmp(argv[i], "-maf") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ if (cPar.maf_level != -1) {
+ cPar.maf_level = atof(str.c_str());
+ }
+ } else if (strcmp(argv[i], "-hwe") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.hwe_level = atof(str.c_str());
+ } else if (strcmp(argv[i], "-r2") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.r2_level = atof(str.c_str());
+ } else if (strcmp(argv[i], "-notsnp") == 0) {
+ cPar.maf_level = -1;
+ } else if (strcmp(argv[i], "-gk") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 21;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 20 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-gs") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 25;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 24 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-gq") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 27;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 26 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-gw") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 72;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 71 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-sample") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.ni_subsample = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-eigen") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 31;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 30 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-calccor") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 71;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 70 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-vc") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 61;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 60 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-ci") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 66;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 65 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-pve") == 0) {
+ double s = 0;
+ while (argv[i + 1] != NULL &&
+ (argv[i + 1][0] != '-' || !isalpha(argv[i + 1][1]))) {
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.v_pve.push_back(atof(str.c_str()));
+ s += atof(str.c_str());
+ }
+ if (s == 1) {
+ cout << "summation of pve equals one." << endl;
+ }
+ } else if (strcmp(argv[i], "-blocks") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.n_block = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-noconstrain") == 0) {
+ cPar.noconstrain = true;
+ } else if (strcmp(argv[i], "-lm") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 51;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 50 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-fa") == 0 || strcmp(argv[i], "-lmm") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 1;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-lmin") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.l_min = atof(str.c_str());
+ } else if (strcmp(argv[i], "-lmax") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.l_max = atof(str.c_str());
+ } else if (strcmp(argv[i], "-region") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.n_region = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-pnr") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.p_nr = atof(str.c_str());
+ } else if (strcmp(argv[i], "-emi") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.em_iter = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-nri") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.nr_iter = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-emp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.em_prec = atof(str.c_str());
+ } else if (strcmp(argv[i], "-nrp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.nr_prec = atof(str.c_str());
+ } else if (strcmp(argv[i], "-crt") == 0) {
+ cPar.crt = 1;
+ } else if (strcmp(argv[i], "-bslmm") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 11;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 10 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-hmin") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.h_min = atof(str.c_str());
+ } else if (strcmp(argv[i], "-hmax") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.h_max = atof(str.c_str());
+ } else if (strcmp(argv[i], "-rmin") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.rho_min = atof(str.c_str());
+ } else if (strcmp(argv[i], "-rmax") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.rho_max = atof(str.c_str());
+ } else if (strcmp(argv[i], "-pmin") == 0) {
+ if (argv[i + 1] == NULL) {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.logp_min = atof(str.c_str()) * log(10.0);
+ } else if (strcmp(argv[i], "-pmax") == 0) {
+ if (argv[i + 1] == NULL) {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.logp_max = atof(str.c_str()) * log(10.0);
+ } else if (strcmp(argv[i], "-smin") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.s_min = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-smax") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.s_max = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-gmean") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.geo_mean = atof(str.c_str());
+ } else if (strcmp(argv[i], "-hscale") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.h_scale = atof(str.c_str());
+ } else if (strcmp(argv[i], "-rscale") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.rho_scale = atof(str.c_str());
+ } else if (strcmp(argv[i], "-pscale") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.logp_scale = atof(str.c_str()) * log(10.0);
+ } else if (strcmp(argv[i], "-w") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.w_step = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-s") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.s_step = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-rpace") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.r_pace = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-wpace") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.w_pace = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-seed") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.randseed = atol(str.c_str());
+ } else if (strcmp(argv[i], "-mh") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.n_mh = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-predict") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 41;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 40 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-windowcm") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_cm = atof(str.c_str());
+ } else if (strcmp(argv[i], "-windowbp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_bp = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-windowns") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_ns = atoi(str.c_str());
+ } else {
+ cout << "error! unrecognized option: " << argv[i] << endl;
+ cPar.error = true;
+ continue;
+ }
+ }
+
+ // Change prediction mode to 43 if the epm file is not provided.
+ if (cPar.a_mode == 41 && cPar.file_epm.empty()) {
+ cPar.a_mode = 43;
+ }
+
+ return;
}
-void GEMMA::BatchRun (PARAM &cPar) {
- clock_t time_begin, time_start;
- time_begin=clock();
-
- // Read Files.
- cout<<"Reading Files ... "<<endl;
- cPar.ReadFiles();
- if (cPar.error==true) {cout<<"error! fail to read files. "<<endl; return;}
- cPar.CheckData();
- if (cPar.error==true) {cout<<"error! fail to check data. "<<endl; return;}
-
- //Prediction for bslmm
- if (cPar.a_mode==41 || cPar.a_mode==42) {
- gsl_vector *y_prdt;
-
- y_prdt=gsl_vector_alloc (cPar.ni_total-cPar.ni_test);
-
- //set to zero
- gsl_vector_set_zero (y_prdt);
-
- PRDT cPRDT;
- cPRDT.CopyFromParam(cPar);
-
- //add breeding value if needed
- if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
- cout<<"Adding Breeding Values ... "<<endl;
-
- gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total);
- gsl_vector *u_hat=gsl_vector_alloc (cPar.ni_test);
-
- //read kinship matrix and set u_hat
- vector<int> indicator_all;
- size_t c_bv=0;
- for (size_t i=0; i<cPar.indicator_idv.size(); i++) {
- indicator_all.push_back(1);
- if (cPar.indicator_bv[i]==1) {gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]); c_bv++;}
- }
-
- ReadFile_kin (cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //read u
- cPRDT.AddBV(G, u_hat, y_prdt);
-
- gsl_matrix_free(G);
- gsl_vector_free(u_hat);
- }
-
- //add beta
- if (!cPar.file_bfile.empty()) {
- cPRDT.AnalyzePlink (y_prdt);
- }
- else {
- cPRDT.AnalyzeBimbam (y_prdt);
- }
-
- //add mu
- gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
-
- //convert y to probability if needed
- if (cPar.a_mode==42) {
- double d;
- for (size_t i=0; i<y_prdt->size; i++) {
- d=gsl_vector_get(y_prdt, i);
- d=gsl_cdf_gaussian_P(d, 1.0);
- gsl_vector_set(y_prdt, i, d);
- }
- }
-
-
- cPRDT.CopyToParam(cPar);
-
- cPRDT.WriteFiles(y_prdt);
-
- gsl_vector_free(y_prdt);
- }
-
- //Prediction with kinship matrix only; for one or more phenotypes
- if (cPar.a_mode==43) {
- //first, use individuals with full phenotypes to obtain estimates of Vg and Ve
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
- gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
- gsl_vector *eval=gsl_vector_alloc (Y->size1);
-
- gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph);
- gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
- cPar.CopyCvtPhen (W_full, Y_full, 1);
-
- gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph);
- gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1);
- gsl_matrix *H_full=gsl_matrix_alloc (Y_full->size1*Y_hat->size2, Y_full->size1*Y_hat->size2);
-
- //read relatedness matrix G, and matrix G_full
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
- ReadFile_kin (cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, cPar.error, G_full);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
- CenterMatrix (G_full);
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- //calculate variance component and beta estimates
- //and then obtain predicted values
- if (cPar.n_ph==1) {
- gsl_vector *beta=gsl_vector_alloc (W->size2);
- gsl_vector *se_beta=gsl_vector_alloc (W->size2);
-
- double lambda, logl, vg, ve;
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- //obtain estimates
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, lambda, logl);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, lambda, vg, ve, beta, se_beta);
-
- cout<<"REMLE estimate for vg in the null model = "<<vg<<endl;
- cout<<"REMLE estimate for ve in the null model = "<<ve<<endl;
- cPar.vg_remle_null=vg; cPar.ve_remle_null=ve;
-
- //obtain Y_hat from fixed effects
- gsl_vector_view Yhat_col=gsl_matrix_column (Y_hat, 0);
- gsl_blas_dgemv (CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
-
- //obtain H
- gsl_matrix_set_identity (H_full);
- gsl_matrix_scale (H_full, ve);
- gsl_matrix_scale (G_full, vg);
- gsl_matrix_add (H_full, G_full);
-
- //free matrices
- gsl_vector_free(beta);
- gsl_vector_free(se_beta);
- } else {
- gsl_matrix *Vg=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph);
- gsl_matrix *Ve=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph);
- gsl_matrix *B=gsl_matrix_alloc (cPar.n_ph, W->size2);
- gsl_matrix *se_B=gsl_matrix_alloc (cPar.n_ph, W->size2);
-
- //obtain estimates
- CalcMvLmmVgVeBeta (eval, UtW, UtY, cPar.em_iter, cPar.nr_iter, cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max, cPar.n_region, Vg, Ve, B, se_B);
-
- cout<<"REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<Vg->size1; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(Vg, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<Ve->size1; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(Ve, i, j)<<"\t";
- }
- cout<<endl;
- }
- cPar.Vg_remle_null.clear();
- cPar.Ve_remle_null.clear();
- for (size_t i=0; i<Vg->size1; i++) {
- for (size_t j=i; j<Vg->size2; j++) {
- cPar.Vg_remle_null.push_back(gsl_matrix_get (Vg, i, j) );
- cPar.Ve_remle_null.push_back(gsl_matrix_get (Ve, i, j) );
- }
- }
-
- //obtain Y_hat from fixed effects
- gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
-
- //obtain H
- KroneckerSym(G_full, Vg, H_full);
- for (size_t i=0; i<G_full->size1; i++) {
- gsl_matrix_view H_sub=gsl_matrix_submatrix (H_full, i*Ve->size1, i*Ve->size2, Ve->size1, Ve->size2);
- gsl_matrix_add (&H_sub.matrix, Ve);
- }
-
- //free matrices
- gsl_matrix_free (Vg);
- gsl_matrix_free (Ve);
- gsl_matrix_free (B);
- gsl_matrix_free (se_B);
- }
-
- PRDT cPRDT;
-
- cPRDT.CopyFromParam(cPar);
-
- cout<<"Predicting Missing Phentypes ... "<<endl;
- time_start=clock();
- cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- cPRDT.WriteFiles(Y_full);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(W);
- gsl_matrix_free(G);
- gsl_matrix_free(U);
- gsl_matrix_free(UtW);
- gsl_matrix_free(UtY);
- gsl_vector_free(eval);
-
- gsl_matrix_free(Y_full);
- gsl_matrix_free(Y_hat);
- gsl_matrix_free(W_full);
- gsl_matrix_free(G_full);
- gsl_matrix_free(H_full);
- }
-
-
- //Generate Kinship matrix
- if (cPar.a_mode==21 || cPar.a_mode==22) {
- cout<<"Calculating Relatedness Matrix ... "<<endl;
-
- gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total);
-
- time_start=clock();
- cPar.CalcKin (G);
- cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate relatedness matrix. "<<endl; return;}
-
- if (cPar.a_mode==21) {
- cPar.WriteMatrix (G, "cXX");
- } else {
- cPar.WriteMatrix (G, "sXX");
- }
-
- gsl_matrix_free (G);
- }
-
- //Compute the LDSC weights (not implemented yet)
- if (cPar.a_mode==72) {
- cout<<"Calculating Weights ... "<<endl;
-
- VARCOV cVarcov;
- cVarcov.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- cVarcov.AnalyzePlink ();
- } else {
- cVarcov.AnalyzeBimbam ();
- }
-
- cVarcov.CopyToParam(cPar);
- }
-
- // Compute the S matrix (and its variance), that is used for
- // variance component estimation using summary statistics.
- if (cPar.a_mode==25 || cPar.a_mode==26) {
- cout<<"Calculating the S Matrix ... "<<endl;
-
- gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
- gsl_vector *ns=gsl_vector_alloc (cPar.n_vc+1);
- gsl_matrix_set_zero(S);
- gsl_vector_set_zero(ns);
-
- gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
- gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
- gsl_vector_view ns_vec=gsl_vector_subvector(ns, 0, cPar.n_vc);
-
- gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
- gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
- gsl_matrix_set_zero (K);
- gsl_matrix_set_zero (A);
-
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
-
- cPar.CopyCvtPhen (W, y, 0);
-
- set<string> setSnps_beta;
- map <string, double> mapRS2wA, mapRS2wK;
-
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- time_start=clock();
- cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &ns_vec.vector);
- cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
- gsl_vector_set (ns, cPar.n_vc, cPar.ni_test);
-
- cPar.WriteMatrix (S, "S");
- cPar.WriteVector (ns, "size");
- cPar.WriteVar ("snps");
-
- gsl_matrix_free (S);
- gsl_vector_free (ns);
-
- gsl_matrix_free (A);
- gsl_matrix_free (K);
-
- gsl_vector_free (y);
- gsl_matrix_free (K);
- }
-
- //Compute the q vector, that is used for variance component estimation using summary statistics
- if (cPar.a_mode==27 || cPar.a_mode==28) {
- gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
- gsl_vector_set_zero (q);
- gsl_vector_set_zero (s);
-
- gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
-
- vector<size_t> vec_cat, vec_ni;
- vector<double> vec_weight, vec_z2;
- map<string, double> mapRS2weight;
- mapRS2weight.clear();
-
- time_start=clock();
- ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, cPar.ns_test);
- cout<<"## number of total individuals = "<<cPar.ni_total<<endl;
- cout<<"## number of total SNPs = "<<cPar.ns_total<<endl;
- cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- cout<<"## number of variance components = "<<cPar.n_vc<<endl;
- cout<<"Calculating the q vector ... "<<endl;
- Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
- cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- if (cPar.error==true) {cout<<"error! fail to calculate the q vector. "<<endl; return;}
-
- gsl_vector_set (s, cPar.n_vc, cPar.ni_total);
-
- cPar.WriteMatrix (Vq, "Vq");
- cPar.WriteVector (q, "q");
- cPar.WriteVector (s, "size");
- /*
- for (size_t i=0; i<cPar.n_vc; i++) {
- cout<<gsl_vector_get(q, i)<<endl;
- }
- */
- gsl_matrix_free (Vq);
- gsl_vector_free (q);
- gsl_vector_free (s);
- }
-
- // Calculate SNP covariance.
- if (cPar.a_mode==71) {
- VARCOV cVarcov;
- cVarcov.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- cVarcov.AnalyzePlink ();
- } else {
- cVarcov.AnalyzeBimbam ();
- }
-
- cVarcov.CopyToParam(cPar);
- }
-
- // LM.
- if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
-
- //Fit LM or mvLM
- if (cPar.n_ph==1) {
- LM cLm;
- cLm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
-
- if (!cPar.file_gene.empty()) {
- cLm.AnalyzeGene (W, &Y_col.vector); //y is the predictor, not the phenotype
- } else if (!cPar.file_bfile.empty()) {
- cLm.AnalyzePlink (W, &Y_col.vector);
- } else if (!cPar.file_oxford.empty()) {
- cLm.Analyzebgen (W, &Y_col.vector);
- } else {
- cLm.AnalyzeBimbam (W, &Y_col.vector);
- }
-
- cLm.WriteFiles();
- cLm.CopyToParam(cPar);
- }
- /*
- else {
- MVLM cMvlm;
- cMvlm.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- cMvlm.AnalyzePlink (W, Y);
- } else {
- cMvlm.AnalyzeBimbam (W, Y);
- }
-
- cMvlm.WriteFiles();
- cMvlm.CopyToParam(cPar);
- }
- */
- //release all matrices and vectors
- gsl_matrix_free (Y);
- gsl_matrix_free (W);
- }
-
- //VC estimation with one or multiple kinship matrices
- //REML approach only
- //if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp
- //for one phenotype only;
- if (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) {
- if (!cPar.file_beta.empty() ) {
- //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
- //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
- //update indicator_snps, so that the numbers are in accordance with mapRS2wK
- set<string> setSnps_beta;
- ReadFile_snps_header (cPar.file_beta, setSnps_beta);
-
- map <string, double> mapRS2wA, mapRS2wK;
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- cPar.UpdateSNP (mapRS2wK);
-
- // Setup matrices and vectors.
- gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
- gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
-
- gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
- gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
-
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
-
- gsl_matrix_set_zero (K);
- gsl_matrix_set_zero (A);
-
- gsl_matrix_set_zero(S);
- gsl_matrix_set_zero(Vq);
- gsl_vector_set_zero (q);
- gsl_vector_set_zero (s);
-
- cPar.CopyCvtPhen (W, y, 0);
-
- gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
- gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
- gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
-
- vector<size_t> vec_cat, vec_ni;
- vector<double> vec_weight, vec_z2;
-
- //read beta, based on the mapRS2wK
- ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, cPar.ns_test);
-
- cout<<"Study Panel: "<<endl;
- cout<<"## number of total individuals = "<<cPar.ni_study<<endl;
- cout<<"## number of total SNPs = "<<cPar.ns_study<<endl;
- cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- cout<<"## number of variance components = "<<cPar.n_vc<<endl;
-
- //compute q
- Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
-
- //compute S
- time_start=clock();
- cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector);
- cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
- //compute vc estimates
- CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
- //if LDSC weights, then compute the weights and run the above steps again
- if (cPar.a_mode==62) {
- //compute the weights and normalize the weights for A
- cPar.UpdateWeight (1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
-
- //read beta file again, and update weigths vector
- ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_total, cPar.ns_test);
-
- //compute q
- Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
-
- //compute S
- time_start=clock();
- cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector);
- cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
- //compute vc estimates
- CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
- }
-
- gsl_vector_set (s, cPar.n_vc, cPar.ni_test);
-
- cPar.WriteMatrix (S, "S");
- cPar.WriteMatrix (Vq, "Vq");
- cPar.WriteVector (q, "q");
- cPar.WriteVector (s, "size");
-
- gsl_matrix_free (S);
- gsl_matrix_free (Vq);
- gsl_vector_free (q);
- gsl_vector_free (s);
-
- gsl_matrix_free (A);
- gsl_matrix_free (K);
- gsl_vector_free (y);
- gsl_matrix_free (W);
- } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
- if (!cPar.file_study.empty()) {
- string sfile=cPar.file_study+".size.txt";
- CountFileLines (sfile, cPar.n_vc);
- } else {
- string file_name;
- igzstream infile (cPar.file_mstudy.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open mstudy file: "<<cPar.file_study<<endl; return;}
-
- safeGetline(infile, file_name);
-
- infile.clear();
- infile.close();
-
- string sfile=file_name+".size.txt";
- CountFileLines (sfile, cPar.n_vc);
- }
-
- cPar.n_vc=cPar.n_vc-1;
-
- gsl_matrix *S=gsl_matrix_alloc (2*cPar.n_vc, cPar.n_vc);
- gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- //gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1, (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
- //gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1, (n_lines*(n_lines+1))/2*(n_lines+1) );
- gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s_study=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
-
- gsl_matrix_set_zero(S);
- gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
- gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
-
- gsl_matrix_set_zero(Vq);
- //gsl_matrix_set_zero(V);
- //gsl_matrix_set_zero(Vslope);
- gsl_vector_set_zero(q);
- gsl_vector_set_zero(s_study);
- gsl_vector_set_zero(s_ref);
-
- if (!cPar.file_study.empty()) {
- ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
- } else {
- ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
- }
-
- if (!cPar.file_ref.empty()) {
- ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref);
- } else {
- ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref);
- }
-
- cout<<"## number of variance components = "<<cPar.n_vc<<endl;
- cout<<"## number of individuals in the sample = "<<cPar.ni_study<<endl;
- cout<<"## number of individuals in the reference = "<<cPar.ni_ref<<endl;
-
- CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
- gsl_vector_view s_sub=gsl_vector_subvector (s, 0, cPar.n_vc);
- gsl_vector_memcpy (&s_sub.vector, s_ref);
- gsl_vector_set (s, cPar.n_vc, cPar.ni_ref);
-
- cPar.WriteMatrix (S, "S");
- cPar.WriteMatrix (Vq, "Vq");
- cPar.WriteVector (q, "q");
- cPar.WriteVector (s, "size");
-
- gsl_matrix_free (S);
- gsl_matrix_free (Vq);
- //gsl_matrix_free (V);
- //gsl_matrix_free (Vslope);
- gsl_vector_free (q);
- gsl_vector_free (s_study);
- gsl_vector_free (s_ref);
- gsl_vector_free (s);
- } else {
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1*cPar.n_vc );
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
-
- //read kinship matrices
- if (!(cPar.file_mk).empty()) {
- ReadFile_mk (cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G, and obtain v_traceG
- double d=0;
- (cPar.v_traceG).clear();
- for (size_t i=0; i<cPar.n_vc; i++) {
- gsl_matrix_view G_sub=gsl_matrix_submatrix (G, 0, i*G->size1, G->size1, G->size1);
- CenterMatrix (&G_sub.matrix);
- d=0;
- for (size_t j=0; j<G->size1; j++) {
- d+=gsl_matrix_get (&G_sub.matrix, j, j);
- }
- d/=(double)G->size1;
- (cPar.v_traceG).push_back(d);
- }
- } else if (!(cPar.file_kin).empty()) {
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
-
- (cPar.v_traceG).clear();
- double d=0;
- for (size_t j=0; j<G->size1; j++) {
- d+=gsl_matrix_get (G, j, j);
- }
- d/=(double)G->size1;
- (cPar.v_traceG).push_back(d);
- }
- /*
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
-
- if (cPar.a_mode==31) {
- cPar.trace_G=EigenDecomp (G, U, eval, 1);
- } else {
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- }
-
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
-
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- } else {
- ReadFile_eigenU (cPar.file_ku, cPar.error, U);
- if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;}
-
- ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
- if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;}
-
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
- cPar.trace_G+=gsl_vector_get(eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- }
- */
- //fit multiple variance components
- if (cPar.n_ph==1) {
- // if (cPar.n_vc==1) {
- /*
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- gsl_vector_view beta=gsl_matrix_row (B, 0);
- gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
-
- cPar.beta_mle_null.clear();
- cPar.se_beta_mle_null.clear();
- for (size_t i=0; i<B->size2; i++) {
- cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
- cPar.beta_remle_null.clear();
- cPar.se_beta_remle_null.clear();
- for (size_t i=0; i<B->size2; i++) {
- cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
- cPar.PrintSummary();
-
- //calculate and output residuals
- if (cPar.a_mode==5) {
- gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
- //obtain Utu and Ute
- gsl_vector_memcpy (y_hat, &UtY_col.vector);
- gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
- double d, u, e;
- for (size_t i=0; i<eval->size; i++) {
- d=gsl_vector_get (eval, i);
- u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- gsl_vector_set (Utu_hat, i, u);
- gsl_vector_set (Ute_hat, i, e);
- }
-
- //obtain u and e
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
- //output residuals
- cPar.WriteVector(u_hat, "residU");
- cPar.WriteVector(e_hat, "residE");
-
- gsl_vector_free(u_hat);
- gsl_vector_free(e_hat);
- gsl_vector_free(y_hat);
- }
-*/
- // } else {
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- VC cVc;
- cVc.CopyFromParam(cPar);
- if (cPar.a_mode==61) {
- cVc.CalcVChe (G, W, &Y_col.vector);
- } else if (cPar.a_mode==62) {
- cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector);
- } else {
- cVc.CalcVCacl (G, W, &Y_col.vector);
- }
- cVc.CopyToParam(cPar);
- //obtain pve from sigma2
- //obtain se_pve from se_sigma2
-
- //}
- }
- }
-
- }
-
-
- //compute confidence intervals with additional summary statistics
- //we do not check the sign of z-scores here, but they have to be matched with the genotypes
- if (cPar.a_mode==66 || cPar.a_mode==67) {
- //read reference file first
- gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_matrix *Svar=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
-
- gsl_matrix_set_zero(S);
- gsl_matrix_set_zero(Svar);
- gsl_vector_set_zero(s_ref);
-
- if (!cPar.file_ref.empty()) {
- ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
- } else {
- ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
- }
-
- //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
- //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
- set<string> setSnps_beta;
- ReadFile_snps_header (cPar.file_beta, setSnps_beta);
-
- //obtain the weights for wA, which contains the SNP weights for SNPs used in the model
- map <string, double> mapRS2wK;
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- //set up matrices and vector
- gsl_matrix *Xz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
- gsl_matrix *XWz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
- gsl_matrix *XtXWz=gsl_matrix_alloc (mapRS2wK.size(), cPar.n_vc*cPar.n_vc);
- gsl_vector *w=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *w1=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *z=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *s_vec=gsl_vector_alloc (cPar.n_vc);
-
- vector<size_t> vec_cat, vec_size;
- vector<double> vec_z;
-
- map <string, double> mapRS2z, mapRS2wA;
- map <string, string> mapRS2A1;
- string file_str;
-
- //update s_vec, the number of snps in each category
- for (size_t i=0; i<cPar.n_vc; i++) {
- vec_size.push_back(0);
- }
-
- for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
- vec_size[cPar.mapRS2cat[it->first]]++;
- }
-
- for (size_t i=0; i<cPar.n_vc; i++) {
- gsl_vector_set(s_vec, i, vec_size[i]);
- }
-
- //update mapRS2wA using v_pve and s_vec
- if (cPar.a_mode==66) {
- for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
- mapRS2wA[it->first]=1;
- }
- } else {
- cPar.UpdateWeight (0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
- }
-
- //read in z-scores based on allele 0, and save that into a vector
- ReadFile_beta (cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
-
- //update snp indicator, save weights to w, save z-scores to vec_z, save category label to vec_cat
- //sign of z is determined by matching alleles
- cPar.UpdateSNPnZ (mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
-
- //compute an n by k matrix of X_iWz
- cout<<"Calculating Xz ... "<<endl;
-
- gsl_matrix_set_zero(Xz);
- gsl_vector_set_all (w1, 1);
-
- if (!cPar.file_bfile.empty() ) {
- file_str=cPar.file_bfile+".bed";
- PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
- } else if (!cPar.file_geno.empty()) {
- BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
- } else if (!cPar.file_mbfile.empty() ){
- MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz);
- } else if (!cPar.file_mgeno.empty()) {
- MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz);
- }
- /*
- cout<<"Xz: "<<endl;
- for (size_t i=0; i<5; i++) {
- for (size_t j=0; j<cPar.n_vc; j++) {
- cout<<gsl_matrix_get (Xz, i, j)<<" ";
- }
- cout<<endl;
- }
- */
- if (cPar.a_mode==66) {
- gsl_matrix_memcpy (XWz, Xz);
- } else if (cPar.a_mode==67) {
- cout<<"Calculating XWz ... "<<endl;
-
- gsl_matrix_set_zero(XWz);
-
- if (!cPar.file_bfile.empty() ) {
- file_str=cPar.file_bfile+".bed";
- PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz);
- } else if (!cPar.file_geno.empty()) {
- BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz);
- } else if (!cPar.file_mbfile.empty() ){
- MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz);
- } else if (!cPar.file_mgeno.empty()) {
- MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz);
- }
- }
- /*
- cout<<"XWz: "<<endl;
- for (size_t i=0; i<5; i++) {
- cout<<gsl_vector_get (w, i)<<endl;
- for (size_t j=0; j<cPar.n_vc; j++) {
- cout<<gsl_matrix_get (XWz, i, j)<<" ";
- }
- cout<<endl;
- }
- */
- //compute an p by k matrix of X_j^TWX_iWz
- cout<<"Calculating XtXWz ... "<<endl;
- gsl_matrix_set_zero(XtXWz);
-
- if (!cPar.file_bfile.empty() ) {
- file_str=cPar.file_bfile+".bed";
- PlinkXtXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz);
- } else if (!cPar.file_geno.empty()) {
- BimbamXtXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz);
- } else if (!cPar.file_mbfile.empty() ){
- MFILEXtXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz);
- } else if (!cPar.file_mgeno.empty()) {
- MFILEXtXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz);
- }
- /*
- cout<<"XtXWz: "<<endl;
- for (size_t i=0; i<5; i++) {
- for (size_t j=0; j<cPar.n_vc; j++) {
- cout<<gsl_matrix_get (XtXWz, i, j)<<" ";
- }
- cout<<endl;
- }
- */
- //compute confidence intervals
- CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
- //write files
- //cPar.WriteMatrix (XWz, "XWz");
- //cPar.WriteMatrix (XtXWz, "XtXWz");
- //cPar.WriteVector (w, "w");
-
- gsl_matrix_free(S);
- gsl_matrix_free(Svar);
- gsl_vector_free(s_ref);
-
- gsl_matrix_free(Xz);
- gsl_matrix_free(XWz);
- gsl_matrix_free(XtXWz);
- gsl_vector_free(w);
- gsl_vector_free(w1);
- gsl_vector_free(z);
- gsl_vector_free(s_vec);
- }
-
-
- //LMM or mvLMM or Eigen-Decomposition
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==31) { //Fit LMM or mvLMM or eigen
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
- gsl_matrix *B=gsl_matrix_alloc (Y->size2, W->size2); //B is a d by c matrix
- gsl_matrix *se_B=gsl_matrix_alloc (Y->size2, W->size2);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
- gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
- gsl_vector *eval=gsl_vector_alloc (Y->size1);
- gsl_vector *env=gsl_vector_alloc (Y->size1);
- gsl_vector *weight=gsl_vector_alloc (Y->size1);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
- if (!cPar.file_gxe.empty()) {cPar.CopyGxe (env);}
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
-
- //is residual weights are provided, then
- if (!cPar.file_weight.empty()) {
- cPar.CopyWeight (weight);
- double d, wi, wj;
- for (size_t i=0; i<G->size1; i++) {
- wi=gsl_vector_get(weight, i);
- for (size_t j=i; j<G->size2; j++) {
- wj=gsl_vector_get(weight, j);
- d=gsl_matrix_get(G, i, j);
- if (wi<=0 || wj<=0) {d=0;} else {d/=sqrt(wi*wj);}
- gsl_matrix_set(G, i, j, d);
- if (j!=i) {gsl_matrix_set(G, j, i, d);}
- }
- }
- }
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
-
- if (cPar.a_mode==31) {
- cPar.trace_G=EigenDecomp (G, U, eval, 1);
- } else {
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- }
-
- if (!cPar.file_weight.empty()) {
- double wi;
- for (size_t i=0; i<U->size1; i++) {
- wi=gsl_vector_get(weight, i);
- if (wi<=0) {wi=0;} else {wi=sqrt(wi);}
- gsl_vector_view Urow=gsl_matrix_row (U, i);
- gsl_vector_scale (&Urow.vector, wi);
- }
- }
-
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
-
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- } else {
- ReadFile_eigenU (cPar.file_ku, cPar.error, U);
- if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;}
-
- ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
- if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;}
-
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
- cPar.trace_G+=gsl_vector_get(eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- }
-
- if (cPar.a_mode==31) {
- cPar.WriteMatrix(U, "eigenU");
- cPar.WriteVector(eval, "eigenD");
- } else if (!cPar.file_gene.empty() ) {
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- LMM cLmm;
- cLmm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype
-
- cLmm.WriteFiles();
- cLmm.CopyToParam(cPar);
- } else {
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- //calculate REMLE/MLE estimate and pve for univariate model
- if (cPar.n_ph==1) {
- gsl_vector_view beta=gsl_matrix_row (B, 0);
- gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
-
- cPar.beta_mle_null.clear();
- cPar.se_beta_mle_null.clear();
- for (size_t i=0; i<B->size2; i++) {
- cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
- cPar.beta_remle_null.clear();
- cPar.se_beta_remle_null.clear();
- for (size_t i=0; i<B->size2; i++) {
- cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
- cPar.PrintSummary();
-
- //calculate and output residuals
- if (cPar.a_mode==5) {
- gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
- //obtain Utu and Ute
- gsl_vector_memcpy (y_hat, &UtY_col.vector);
- gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
- double d, u, e;
- for (size_t i=0; i<eval->size; i++) {
- d=gsl_vector_get (eval, i);
- u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- gsl_vector_set (Utu_hat, i, u);
- gsl_vector_set (Ute_hat, i, e);
- }
-
- //obtain u and e
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
- //output residuals
- cPar.WriteVector(u_hat, "residU");
- cPar.WriteVector(e_hat, "residE");
-
- gsl_vector_free(u_hat);
- gsl_vector_free(e_hat);
- gsl_vector_free(y_hat);
- }
- }
-
- //Fit LMM or mvLMM
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4) {
- if (cPar.n_ph==1) {
- LMM cLmm;
- cLmm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- if (!cPar.file_bfile.empty()) {
- if (cPar.file_gxe.empty()) {
- cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- } else {
- cLmm.AnalyzePlinkGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
- }
- }
- // WJA added
- else if(!cPar.file_oxford.empty()) {
- cLmm.Analyzebgen (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- }
- else {
- if (cPar.file_gxe.empty()) {
- cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- } else {
- cLmm.AnalyzeBimbamGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
- }
- }
-
- cLmm.WriteFiles();
- cLmm.CopyToParam(cPar);
- } else {
- MVLMM cMvlmm;
- cMvlmm.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- if (cPar.file_gxe.empty()) {
- cMvlmm.AnalyzePlink (U, eval, UtW, UtY);
- } else {
- cMvlmm.AnalyzePlinkGXE (U, eval, UtW, UtY, env);
- }
- }
- else if(!cPar.file_oxford.empty())
- {
- cMvlmm.Analyzebgen (U, eval, UtW, UtY);
- }
- else {
- if (cPar.file_gxe.empty()) {
- cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY);
- } else {
- cMvlmm.AnalyzeBimbamGXE (U, eval, UtW, UtY, env);
- }
- }
-
- cMvlmm.WriteFiles();
- cMvlmm.CopyToParam(cPar);
- }
- }
- }
-
-
- //release all matrices and vectors
- gsl_matrix_free (Y);
- gsl_matrix_free (W);
- gsl_matrix_free(B);
- gsl_matrix_free(se_B);
- gsl_matrix_free (G);
- gsl_matrix_free (U);
- gsl_matrix_free (UtW);
- gsl_matrix_free (UtY);
- gsl_vector_free (eval);
- gsl_vector_free (env);
- }
-
-
- //BSLMM
- if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
- gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
- //set covariates matrix W and phenotype vector y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, y, 0);
-
- //center y, even for case/control data
- cPar.pheno_mean=CenterVector(y);
-
- //run bvsr if rho==1
- if (cPar.rho_min==1 && cPar.rho_max==1) {
- //read genotypes X (not UtX)
- cPar.ReadGenotypes (UtX, G, false);
-
- //perform BSLMM analysis
- BSLMM cBslmm;
- cBslmm.CopyFromParam(cPar);
- time_start=clock();
- cBslmm.MCMC(UtX, y);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmm.CopyToParam(cPar);
- //else, if rho!=1
- } else {
- gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
- gsl_vector *eval=gsl_vector_alloc (y->size);
- gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
- gsl_vector *Uty=gsl_vector_alloc (y->size);
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- cPar.ReadGenotypes (UtX, G, false);
-
- //read relatedness matrix G
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
- } else {
- cPar.ReadGenotypes (UtX, G, true);
- }
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, y, Uty);
-
- //calculate REMLE/MLE estimate and pve
- CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
- cPar.PrintSummary();
-
- //Creat and calcualte UtX, use a large memory
- cout<<"Calculating UtX..."<<endl;
- time_start=clock();
- CalcUtX (U, UtX);
- cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //perform BSLMM or BSLMMDAP analysis
- if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- BSLMM cBslmm;
- cBslmm.CopyFromParam(cPar);
- time_start=clock();
- if (cPar.a_mode==12) { //ridge regression
- cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
- } else { //Run MCMC
- cBslmm.MCMC(U, UtX, Uty, eval, y);
- }
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmm.CopyToParam(cPar);
- } else {
- }
-
- //release all matrices and vectors
- gsl_matrix_free (G);
- gsl_matrix_free (U);
- gsl_matrix_free (UtW);
- gsl_vector_free (eval);
- gsl_vector_free (Uty);
-
- }
- gsl_matrix_free (W);
- gsl_vector_free (y);
- gsl_matrix_free (UtX);
- }
-
-
-
- //BSLMM-DAP
- if (cPar.a_mode==14 || cPar.a_mode==15 || cPar.a_mode==16) {
- if (cPar.a_mode==14) {
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
- gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
- //set covariates matrix W and phenotype vector y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, y, 0);
-
- //center y, even for case/control data
- cPar.pheno_mean=CenterVector(y);
-
- //run bvsr if rho==1
- if (cPar.rho_min==1 && cPar.rho_max==1) {
- //read genotypes X (not UtX)
- cPar.ReadGenotypes (UtX, G, false);
-
- //perform BSLMM analysis
- BSLMM cBslmm;
- cBslmm.CopyFromParam(cPar);
- time_start=clock();
- cBslmm.MCMC(UtX, y);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmm.CopyToParam(cPar);
- //else, if rho!=1
- } else {
- gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
- gsl_vector *eval=gsl_vector_alloc (y->size);
- gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
- gsl_vector *Uty=gsl_vector_alloc (y->size);
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- cPar.ReadGenotypes (UtX, G, false);
-
- //read relatedness matrix G
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
- } else {
- cPar.ReadGenotypes (UtX, G, true);
- }
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, y, Uty);
-
- //calculate REMLE/MLE estimate and pve
- CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
- cPar.PrintSummary();
-
- //Creat and calcualte UtX, use a large memory
- cout<<"Calculating UtX..."<<endl;
- time_start=clock();
- CalcUtX (U, UtX);
- cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //perform analysis; assume X and y are already centered
- BSLMMDAP cBslmmDap;
- cBslmmDap.CopyFromParam(cPar);
- time_start=clock();
- cBslmmDap.DAP_CalcBF (U, UtX, Uty, eval, y);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmmDap.CopyToParam(cPar);
-
- //release all matrices and vectors
- gsl_matrix_free (G);
- gsl_matrix_free (U);
- gsl_matrix_free (UtW);
- gsl_vector_free (eval);
- gsl_vector_free (Uty);
- }
-
- gsl_matrix_free (W);
- gsl_vector_free (y);
- gsl_matrix_free (UtX);
- } else if (cPar.a_mode==15) {
- //perform EM algorithm and estimate parameters
- vector<string> vec_rs;
- vector<double> vec_sa2, vec_sb2, wab;
- vector<vector<vector<double> > > BF;
-
- //read hyp and bf files (functions defined in BSLMMDAP)
- ReadFile_hyb (cPar.file_hyp, vec_sa2, vec_sb2, wab);
- ReadFile_bf (cPar.file_bf, vec_rs, BF);
-
- cPar.ns_test=vec_rs.size();
- if (wab.size()!=BF[0][0].size()) {cout<<"error! hyp and bf files dimension do not match"<<endl;}
-
- //load annotations
- gsl_matrix *Ac;
- gsl_matrix_int *Ad;
- gsl_vector_int *dlevel;
- size_t kc, kd;
- if (!cPar.file_cat.empty()) {
- ReadFile_cat (cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
- } else {
- kc=0; kd=0;
- }
-
- cout<<"## number of blocks = "<<BF.size()<<endl;
- cout<<"## number of analyzed SNPs = "<<vec_rs.size()<<endl;
- cout<<"## grid size for hyperparameters = "<<wab.size()<<endl;
- cout<<"## number of continuous annotations = "<<kc<<endl;
- cout<<"## number of discrete annotations = "<<kd<<endl;
-
- //DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel);
-
- //perform analysis
- BSLMMDAP cBslmmDap;
- cBslmmDap.CopyFromParam(cPar);
- time_start=clock();
- cBslmmDap.DAP_EstimateHyper (kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac, Ad, dlevel);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmmDap.CopyToParam(cPar);
-
- gsl_matrix_free(Ac);
- gsl_matrix_int_free(Ad);
- gsl_vector_int_free(dlevel);
- } else {
- //
- }
-
- }
-
-
-
-
- /*
- //LDR (change 14 to 16?)
- if (cPar.a_mode==14) {
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (1, 1);
- vector<vector<unsigned char> > Xt;
-
- //set covariates matrix W and phenotype vector y
- //an intercept is included in W
- cPar.CopyCvtPhen (W, y, 0);
-
- //read in genotype matrix X
- cPar.ReadGenotypes (Xt, G, false);
-
- LDR cLdr;
- cLdr.CopyFromParam(cPar);
- time_start=clock();
-
- cLdr.VB(Xt, W, y);
-
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cLdr.CopyToParam(cPar);
-
- gsl_vector_free (y);
- gsl_matrix_free (W);
- gsl_matrix_free (G);
- }
- */
-
- cPar.time_total=(clock()-time_begin)/(double(CLOCKS_PER_SEC)*60.0);
-
- return;
+void GEMMA::BatchRun(PARAM &cPar) {
+ clock_t time_begin, time_start;
+ time_begin = clock();
+
+ // Read Files.
+ cout << "Reading Files ... " << endl;
+ cPar.ReadFiles();
+ if (cPar.error == true) {
+ cout << "error! fail to read files. " << endl;
+ return;
+ }
+ cPar.CheckData();
+ if (cPar.error == true) {
+ cout << "error! fail to check data. " << endl;
+ return;
+ }
+
+ // Prediction for bslmm
+ if (cPar.a_mode == 41 || cPar.a_mode == 42) {
+ gsl_vector *y_prdt;
+
+ y_prdt = gsl_vector_alloc(cPar.ni_total - cPar.ni_test);
+
+ // set to zero
+ gsl_vector_set_zero(y_prdt);
+
+ PRDT cPRDT;
+ cPRDT.CopyFromParam(cPar);
+
+ // add breeding value if needed
+ if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
+ cout << "Adding Breeding Values ... " << endl;
+
+ gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+ gsl_vector *u_hat = gsl_vector_alloc(cPar.ni_test);
+
+ // read kinship matrix and set u_hat
+ vector<int> indicator_all;
+ size_t c_bv = 0;
+ for (size_t i = 0; i < cPar.indicator_idv.size(); i++) {
+ indicator_all.push_back(1);
+ if (cPar.indicator_bv[i] == 1) {
+ gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]);
+ c_bv++;
+ }
+ }
+
+ ReadFile_kin(cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // read u
+ cPRDT.AddBV(G, u_hat, y_prdt);
+
+ gsl_matrix_free(G);
+ gsl_vector_free(u_hat);
+ }
+
+ // add beta
+ if (!cPar.file_bfile.empty()) {
+ cPRDT.AnalyzePlink(y_prdt);
+ } else {
+ cPRDT.AnalyzeBimbam(y_prdt);
+ }
+
+ // add mu
+ gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
+
+ // convert y to probability if needed
+ if (cPar.a_mode == 42) {
+ double d;
+ for (size_t i = 0; i < y_prdt->size; i++) {
+ d = gsl_vector_get(y_prdt, i);
+ d = gsl_cdf_gaussian_P(d, 1.0);
+ gsl_vector_set(y_prdt, i, d);
+ }
+ }
+
+ cPRDT.CopyToParam(cPar);
+
+ cPRDT.WriteFiles(y_prdt);
+
+ gsl_vector_free(y_prdt);
+ }
+
+ // Prediction with kinship matrix only; for one or more phenotypes
+ if (cPar.a_mode == 43) {
+ // first, use individuals with full phenotypes to obtain estimates of Vg and
+ // Ve
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
+ gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
+ gsl_vector *eval = gsl_vector_alloc(Y->size1);
+
+ gsl_matrix *Y_full = gsl_matrix_alloc(cPar.ni_cvt, cPar.n_ph);
+ gsl_matrix *W_full = gsl_matrix_alloc(Y_full->size1, cPar.n_cvt);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+ cPar.CopyCvtPhen(W_full, Y_full, 1);
+
+ gsl_matrix *Y_hat = gsl_matrix_alloc(Y_full->size1, cPar.n_ph);
+ gsl_matrix *G_full = gsl_matrix_alloc(Y_full->size1, Y_full->size1);
+ gsl_matrix *H_full = gsl_matrix_alloc(Y_full->size1 * Y_hat->size2,
+ Y_full->size1 * Y_hat->size2);
+
+ // read relatedness matrix G, and matrix G_full
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+ ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G_full);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ CenterMatrix(G_full);
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ // calculate variance component and beta estimates
+ // and then obtain predicted values
+ if (cPar.n_ph == 1) {
+ gsl_vector *beta = gsl_vector_alloc(W->size2);
+ gsl_vector *se_beta = gsl_vector_alloc(W->size2);
+
+ double lambda, logl, vg, ve;
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ // obtain estimates
+ CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, lambda, logl);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, lambda, vg, ve, beta,
+ se_beta);
+
+ cout << "REMLE estimate for vg in the null model = " << vg << endl;
+ cout << "REMLE estimate for ve in the null model = " << ve << endl;
+ cPar.vg_remle_null = vg;
+ cPar.ve_remle_null = ve;
+
+ // obtain Y_hat from fixed effects
+ gsl_vector_view Yhat_col = gsl_matrix_column(Y_hat, 0);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
+
+ // obtain H
+ gsl_matrix_set_identity(H_full);
+ gsl_matrix_scale(H_full, ve);
+ gsl_matrix_scale(G_full, vg);
+ gsl_matrix_add(H_full, G_full);
+
+ // free matrices
+ gsl_vector_free(beta);
+ gsl_vector_free(se_beta);
+ } else {
+ gsl_matrix *Vg = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
+ gsl_matrix *Ve = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
+ gsl_matrix *B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+ gsl_matrix *se_B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+
+ // obtain estimates
+ CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter,
+ cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max,
+ cPar.n_region, Vg, Ve, B, se_B);
+
+ cout << "REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < Vg->size1; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(Vg, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < Ve->size1; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(Ve, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cPar.Vg_remle_null.clear();
+ cPar.Ve_remle_null.clear();
+ for (size_t i = 0; i < Vg->size1; i++) {
+ for (size_t j = i; j < Vg->size2; j++) {
+ cPar.Vg_remle_null.push_back(gsl_matrix_get(Vg, i, j));
+ cPar.Ve_remle_null.push_back(gsl_matrix_get(Ve, i, j));
+ }
+ }
+
+ // obtain Y_hat from fixed effects
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
+
+ // obtain H
+ KroneckerSym(G_full, Vg, H_full);
+ for (size_t i = 0; i < G_full->size1; i++) {
+ gsl_matrix_view H_sub = gsl_matrix_submatrix(
+ H_full, i * Ve->size1, i * Ve->size2, Ve->size1, Ve->size2);
+ gsl_matrix_add(&H_sub.matrix, Ve);
+ }
+
+ // free matrices
+ gsl_matrix_free(Vg);
+ gsl_matrix_free(Ve);
+ gsl_matrix_free(B);
+ gsl_matrix_free(se_B);
+ }
+
+ PRDT cPRDT;
+
+ cPRDT.CopyFromParam(cPar);
+
+ cout << "Predicting Missing Phentypes ... " << endl;
+ time_start = clock();
+ cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ cPRDT.WriteFiles(Y_full);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_matrix_free(UtY);
+ gsl_vector_free(eval);
+
+ gsl_matrix_free(Y_full);
+ gsl_matrix_free(Y_hat);
+ gsl_matrix_free(W_full);
+ gsl_matrix_free(G_full);
+ gsl_matrix_free(H_full);
+ }
+
+ // Generate Kinship matrix
+ if (cPar.a_mode == 21 || cPar.a_mode == 22) {
+ cout << "Calculating Relatedness Matrix ... " << endl;
+
+ gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+
+ time_start = clock();
+ cPar.CalcKin(G);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate relatedness matrix. " << endl;
+ return;
+ }
+
+ if (cPar.a_mode == 21) {
+ cPar.WriteMatrix(G, "cXX");
+ } else {
+ cPar.WriteMatrix(G, "sXX");
+ }
+
+ gsl_matrix_free(G);
+ }
+
+ // Compute the LDSC weights (not implemented yet)
+ if (cPar.a_mode == 72) {
+ cout << "Calculating Weights ... " << endl;
+
+ VARCOV cVarcov;
+ cVarcov.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cVarcov.AnalyzePlink();
+ } else {
+ cVarcov.AnalyzeBimbam();
+ }
+
+ cVarcov.CopyToParam(cPar);
+ }
+
+ // Compute the S matrix (and its variance), that is used for
+ // variance component estimation using summary statistics.
+ if (cPar.a_mode == 25 || cPar.a_mode == 26) {
+ cout << "Calculating the S Matrix ... " << endl;
+
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
+ gsl_vector *ns = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_matrix_set_zero(S);
+ gsl_vector_set_zero(ns);
+
+ gsl_matrix_view S_mat = gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+ gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc);
+
+ gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix_set_zero(K);
+ gsl_matrix_set_zero(A);
+
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+
+ cPar.CopyCvtPhen(W, y, 0);
+
+ set<string> setSnps_beta;
+ map<string, double> mapRS2wA, mapRS2wK;
+
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &ns_vec.vector);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ gsl_vector_set(ns, cPar.n_vc, cPar.ni_test);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteVector(ns, "size");
+ cPar.WriteVar("snps");
+
+ gsl_matrix_free(S);
+ gsl_vector_free(ns);
+
+ gsl_matrix_free(A);
+ gsl_matrix_free(K);
+
+ gsl_vector_free(y);
+ gsl_matrix_free(K);
+ }
+
+ // Compute the q vector, that is used for variance component estimation using
+ // summary statistics
+ if (cPar.a_mode == 27 || cPar.a_mode == 28) {
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s);
+
+ gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_ni;
+ vector<double> vec_weight, vec_z2;
+ map<string, double> mapRS2weight;
+ mapRS2weight.clear();
+
+ time_start = clock();
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_total, cPar.ns_total,
+ cPar.ns_test);
+ cout << "## number of total individuals = " << cPar.ni_total << endl;
+ cout << "## number of total SNPs = " << cPar.ns_total << endl;
+ cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+ cout << "Calculating the q vector ... " << endl;
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the q vector. " << endl;
+ return;
+ }
+
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_total);
+
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+ /*
+ for (size_t i=0; i<cPar.n_vc; i++) {
+ cout<<gsl_vector_get(q, i)<<endl;
+ }
+ */
+ gsl_matrix_free(Vq);
+ gsl_vector_free(q);
+ gsl_vector_free(s);
+ }
+
+ // Calculate SNP covariance.
+ if (cPar.a_mode == 71) {
+ VARCOV cVarcov;
+ cVarcov.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cVarcov.AnalyzePlink();
+ } else {
+ cVarcov.AnalyzeBimbam();
+ }
+
+ cVarcov.CopyToParam(cPar);
+ }
+
+ // LM.
+ if (cPar.a_mode == 51 || cPar.a_mode == 52 || cPar.a_mode == 53 ||
+ cPar.a_mode == 54) { // Fit LM
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+
+ // Fit LM or mvLM
+ if (cPar.n_ph == 1) {
+ LM cLm;
+ cLm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+
+ if (!cPar.file_gene.empty()) {
+ cLm.AnalyzeGene(W,
+ &Y_col.vector); // y is the predictor, not the phenotype
+ } else if (!cPar.file_bfile.empty()) {
+ cLm.AnalyzePlink(W, &Y_col.vector);
+ } else if (!cPar.file_oxford.empty()) {
+ cLm.Analyzebgen(W, &Y_col.vector);
+ } else {
+ cLm.AnalyzeBimbam(W, &Y_col.vector);
+ }
+
+ cLm.WriteFiles();
+ cLm.CopyToParam(cPar);
+ }
+ /*
+ else {
+ MVLM cMvlm;
+ cMvlm.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cMvlm.AnalyzePlink (W, Y);
+ } else {
+ cMvlm.AnalyzeBimbam (W, Y);
+ }
+
+ cMvlm.WriteFiles();
+ cMvlm.CopyToParam(cPar);
+ }
+ */
+ // release all matrices and vectors
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ }
+
+ // VC estimation with one or multiple kinship matrices
+ // REML approach only
+ // if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already,
+ // in param.cpp
+ // for one phenotype only;
+ if (cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) {
+ if (!cPar.file_beta.empty()) {
+ // need to obtain a common set of SNPs between beta file and the genotype
+ // file; these are saved in mapRS2wA and mapRS2wK
+ // normalize the weight in mapRS2wK to have an average of one; each
+ // element of mapRS2wA is 1
+ // update indicator_snps, so that the numbers are in accordance with
+ // mapRS2wK
+ set<string> setSnps_beta;
+ ReadFile_snps_header(cPar.file_beta, setSnps_beta);
+
+ map<string, double> mapRS2wA, mapRS2wK;
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ cPar.UpdateSNP(mapRS2wK);
+
+ // Setup matrices and vectors.
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+
+ gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+
+ gsl_matrix_set_zero(K);
+ gsl_matrix_set_zero(A);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Vq);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s);
+
+ cPar.CopyCvtPhen(W, y, 0);
+
+ gsl_matrix_view S_mat =
+ gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+ gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_ni;
+ vector<double> vec_weight, vec_z2;
+
+ // read beta, based on the mapRS2wK
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_study, cPar.ns_study,
+ cPar.ns_test);
+
+ cout << "Study Panel: " << endl;
+ cout << "## number of total individuals = " << cPar.ni_study << endl;
+ cout << "## number of total SNPs = " << cPar.ns_study << endl;
+ cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+
+ // compute q
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+
+ // compute S
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &s_vec.vector);
+ cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ // compute vc estimates
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
+ cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
+ cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
+ cPar.v_enrich, cPar.v_se_enrich);
+
+ // if LDSC weights, then compute the weights and run the above steps again
+ if (cPar.a_mode == 62) {
+ // compute the weights and normalize the weights for A
+ cPar.UpdateWeight(1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
+
+ // read beta file again, and update weigths vector
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_study, cPar.ns_total,
+ cPar.ns_test);
+
+ // compute q
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+
+ // compute S
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &s_vec.vector);
+ cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ // compute vc estimates
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
+ cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
+ cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
+ cPar.v_enrich, cPar.v_se_enrich);
+ }
+
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_test);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Vq);
+ gsl_vector_free(q);
+ gsl_vector_free(s);
+
+ gsl_matrix_free(A);
+ gsl_matrix_free(K);
+ gsl_vector_free(y);
+ gsl_matrix_free(W);
+ } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
+ if (!cPar.file_study.empty()) {
+ string sfile = cPar.file_study + ".size.txt";
+ CountFileLines(sfile, cPar.n_vc);
+ } else {
+ string file_name;
+ igzstream infile(cPar.file_mstudy.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open mstudy file: " << cPar.file_study
+ << endl;
+ return;
+ }
+
+ safeGetline(infile, file_name);
+
+ infile.clear();
+ infile.close();
+
+ string sfile = file_name + ".size.txt";
+ CountFileLines(sfile, cPar.n_vc);
+ }
+
+ cPar.n_vc = cPar.n_vc - 1;
+
+ gsl_matrix *S = gsl_matrix_alloc(2 * cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ // gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1,
+ // (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
+ // gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1,
+ // (n_lines*(n_lines+1))/2*(n_lines+1) );
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s_study = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_view S_mat =
+ gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+
+ gsl_matrix_set_zero(Vq);
+ // gsl_matrix_set_zero(V);
+ // gsl_matrix_set_zero(Vslope);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s_study);
+ gsl_vector_set_zero(s_ref);
+
+ if (!cPar.file_study.empty()) {
+ ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
+ } else {
+ ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
+ }
+
+ if (!cPar.file_ref.empty()) {
+ ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
+ cPar.ni_ref);
+ } else {
+ ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
+ cPar.ni_ref);
+ }
+
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+ cout << "## number of individuals in the sample = " << cPar.ni_study
+ << endl;
+ cout << "## number of individuals in the reference = " << cPar.ni_ref
+ << endl;
+
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study,
+ cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total,
+ cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich,
+ cPar.v_se_enrich);
+
+ gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
+ gsl_vector_memcpy(&s_sub.vector, s_ref);
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_ref);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Vq);
+ // gsl_matrix_free (V);
+ // gsl_matrix_free (Vslope);
+ gsl_vector_free(q);
+ gsl_vector_free(s_study);
+ gsl_vector_free(s_ref);
+ gsl_vector_free(s);
+ } else {
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1 * cPar.n_vc);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+
+ // read kinship matrices
+ if (!(cPar.file_mk).empty()) {
+ ReadFile_mk(cPar.file_mk, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G, and obtain v_traceG
+ double d = 0;
+ (cPar.v_traceG).clear();
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ gsl_matrix_view G_sub =
+ gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1);
+ CenterMatrix(&G_sub.matrix);
+ d = 0;
+ for (size_t j = 0; j < G->size1; j++) {
+ d += gsl_matrix_get(&G_sub.matrix, j, j);
+ }
+ d /= (double)G->size1;
+ (cPar.v_traceG).push_back(d);
+ }
+ } else if (!(cPar.file_kin).empty()) {
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+
+ (cPar.v_traceG).clear();
+ double d = 0;
+ for (size_t j = 0; j < G->size1; j++) {
+ d += gsl_matrix_get(G, j, j);
+ }
+ d /= (double)G->size1;
+ (cPar.v_traceG).push_back(d);
+ }
+ /*
+ //eigen-decomposition and calculate trace_G
+ cout<<"Start Eigen-Decomposition..."<<endl;
+ time_start=clock();
+
+ if (cPar.a_mode==31) {
+ cPar.trace_G=EigenDecomp (G, U, eval, 1);
+ } else {
+ cPar.trace_G=EigenDecomp (G, U, eval, 0);
+ }
+
+ cPar.trace_G=0.0;
+ for (size_t i=0; i<eval->size; i++) {
+ if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
+ cPar.trace_G+=gsl_vector_get (eval, i);
+ }
+ cPar.trace_G/=(double)eval->size;
+
+ cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+} else {
+ ReadFile_eigenU (cPar.file_ku, cPar.error, U);
+ if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl;
+return;}
+
+ ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
+ if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl;
+return;}
+
+ cPar.trace_G=0.0;
+ for (size_t i=0; i<eval->size; i++) {
+ if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
+ cPar.trace_G+=gsl_vector_get(eval, i);
+ }
+ cPar.trace_G/=(double)eval->size;
}
+*/
+ // fit multiple variance components
+ if (cPar.n_ph == 1) {
+ // if (cPar.n_vc==1) {
+ /*
+ //calculate UtW and Uty
+ CalcUtX (U, W, UtW);
+ CalcUtX (U, Y, UtY);
+
+ gsl_vector_view beta=gsl_matrix_row (B, 0);
+ gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
+ gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
+
+ CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null,
+ cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
+
+ cPar.beta_mle_null.clear();
+ cPar.se_beta_mle_null.clear();
+ for (size_t i=0; i<B->size2; i++) {
+ cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
+ cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
+ }
+
+ CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null,
+ cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
+ cPar.beta_remle_null.clear();
+ cPar.se_beta_remle_null.clear();
+ for (size_t i=0; i<B->size2; i++) {
+ cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
+ cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
+ }
+
+ CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
+ cPar.pve_null, cPar.pve_se_null);
+ cPar.PrintSummary();
+
+ //calculate and output residuals
+ if (cPar.a_mode==5) {
+ gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
+
+ //obtain Utu and Ute
+ gsl_vector_memcpy (y_hat, &UtY_col.vector);
+ gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
+
+ double d, u, e;
+ for (size_t i=0; i<eval->size; i++) {
+ d=gsl_vector_get (eval, i);
+ u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat,
+ i);
+ e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
+ gsl_vector_set (Utu_hat, i, u);
+ gsl_vector_set (Ute_hat, i, e);
+ }
+
+ //obtain u and e
+ gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
+
+ //output residuals
+ cPar.WriteVector(u_hat, "residU");
+ cPar.WriteVector(e_hat, "residE");
+
+ gsl_vector_free(u_hat);
+ gsl_vector_free(e_hat);
+ gsl_vector_free(y_hat);
+ }
+*/
+ // } else {
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ VC cVc;
+ cVc.CopyFromParam(cPar);
+ if (cPar.a_mode == 61) {
+ cVc.CalcVChe(G, W, &Y_col.vector);
+ } else if (cPar.a_mode == 62) {
+ cVc.CalcVCreml(cPar.noconstrain, G, W, &Y_col.vector);
+ } else {
+ cVc.CalcVCacl(G, W, &Y_col.vector);
+ }
+ cVc.CopyToParam(cPar);
+ // obtain pve from sigma2
+ // obtain se_pve from se_sigma2
+
+ //}
+ }
+ }
+ }
+
+ // compute confidence intervals with additional summary statistics
+ // we do not check the sign of z-scores here, but they have to be matched with
+ // the genotypes
+ if (cPar.a_mode == 66 || cPar.a_mode == 67) {
+ // read reference file first
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Svar = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Svar);
+ gsl_vector_set_zero(s_ref);
+
+ if (!cPar.file_ref.empty()) {
+ ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
+ } else {
+ ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
+ }
+
+ // need to obtain a common set of SNPs between beta file and the genotype
+ // file; these are saved in mapRS2wA and mapRS2wK
+ // normalize the weight in mapRS2wK to have an average of one; each element
+ // of mapRS2wA is 1
+ set<string> setSnps_beta;
+ ReadFile_snps_header(cPar.file_beta, setSnps_beta);
+
+ // obtain the weights for wA, which contains the SNP weights for SNPs used
+ // in the model
+ map<string, double> mapRS2wK;
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ // set up matrices and vector
+ gsl_matrix *Xz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XWz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XtXWz =
+ gsl_matrix_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
+ gsl_vector *w = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *w1 = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *z = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *s_vec = gsl_vector_alloc(cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_size;
+ vector<double> vec_z;
+
+ map<string, double> mapRS2z, mapRS2wA;
+ map<string, string> mapRS2A1;
+ string file_str;
+
+ // update s_vec, the number of snps in each category
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ vec_size.push_back(0);
+ }
+
+ for (map<string, double>::const_iterator it = mapRS2wK.begin();
+ it != mapRS2wK.end(); ++it) {
+ vec_size[cPar.mapRS2cat[it->first]]++;
+ }
+
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ gsl_vector_set(s_vec, i, vec_size[i]);
+ }
+
+ // update mapRS2wA using v_pve and s_vec
+ if (cPar.a_mode == 66) {
+ for (map<string, double>::const_iterator it = mapRS2wK.begin();
+ it != mapRS2wK.end(); ++it) {
+ mapRS2wA[it->first] = 1;
+ }
+ } else {
+ cPar.UpdateWeight(0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
+ }
+
+ // read in z-scores based on allele 0, and save that into a vector
+ ReadFile_beta(cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
+
+ // update snp indicator, save weights to w, save z-scores to vec_z, save
+ // category label to vec_cat
+ // sign of z is determined by matching alleles
+ cPar.UpdateSNPnZ(mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
+
+ // compute an n by k matrix of X_iWz
+ cout << "Calculating Xz ... " << endl;
+
+ gsl_matrix_set_zero(Xz);
+ gsl_vector_set_all(w1, 1);
+
+ if (!cPar.file_bfile.empty()) {
+ file_str = cPar.file_bfile + ".bed";
+ PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+ vec_cat, w1, z, 0, Xz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+ cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
+ } else if (!cPar.file_mbfile.empty()) {
+ MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w1, z, Xz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w1, z, Xz);
+ }
+ /*
+ cout<<"Xz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (Xz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ if (cPar.a_mode == 66) {
+ gsl_matrix_memcpy(XWz, Xz);
+ } else if (cPar.a_mode == 67) {
+ cout << "Calculating XWz ... " << endl;
+
+ gsl_matrix_set_zero(XWz);
+
+ if (!cPar.file_bfile.empty()) {
+ file_str = cPar.file_bfile + ".bed";
+ PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+ vec_cat, w, z, 0, XWz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+ cPar.indicator_snp, vec_cat, w, z, 0, XWz);
+ } else if (!cPar.file_mbfile.empty()) {
+ MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w, z, XWz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w, z, XWz);
+ }
+ }
+ /*
+ cout<<"XWz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ cout<<gsl_vector_get (w, i)<<endl;
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (XWz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ // compute an p by k matrix of X_j^TWX_iWz
+ cout << "Calculating XtXWz ... " << endl;
+ gsl_matrix_set_zero(XtXWz);
+
+ if (!cPar.file_bfile.empty()) {
+ file_str = cPar.file_bfile + ".bed";
+ PlinkXtXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+ XWz, 0, XtXWz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXtXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+ cPar.indicator_snp, XWz, 0, XtXWz);
+ } else if (!cPar.file_mbfile.empty()) {
+ MFILEXtXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, XWz, XtXWz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXtXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, XWz, XtXWz);
+ }
+ /*
+ cout<<"XtXWz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (XtXWz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ // compute confidence intervals
+ CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve,
+ cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2,
+ cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+
+ // write files
+ // cPar.WriteMatrix (XWz, "XWz");
+ // cPar.WriteMatrix (XtXWz, "XtXWz");
+ // cPar.WriteVector (w, "w");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Svar);
+ gsl_vector_free(s_ref);
+
+ gsl_matrix_free(Xz);
+ gsl_matrix_free(XWz);
+ gsl_matrix_free(XtXWz);
+ gsl_vector_free(w);
+ gsl_vector_free(w1);
+ gsl_vector_free(z);
+ gsl_vector_free(s_vec);
+ }
+
+ // LMM or mvLMM or Eigen-Decomposition
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4 || cPar.a_mode == 5 ||
+ cPar.a_mode == 31) { // Fit LMM or mvLMM or eigen
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c
+ // matrix
+ gsl_matrix *se_B = gsl_matrix_alloc(Y->size2, W->size2);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
+ gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
+ gsl_vector *eval = gsl_vector_alloc(Y->size1);
+ gsl_vector *env = gsl_vector_alloc(Y->size1);
+ gsl_vector *weight = gsl_vector_alloc(Y->size1);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+ if (!cPar.file_gxe.empty()) {
+ cPar.CopyGxe(env);
+ }
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+
+ // is residual weights are provided, then
+ if (!cPar.file_weight.empty()) {
+ cPar.CopyWeight(weight);
+ double d, wi, wj;
+ for (size_t i = 0; i < G->size1; i++) {
+ wi = gsl_vector_get(weight, i);
+ for (size_t j = i; j < G->size2; j++) {
+ wj = gsl_vector_get(weight, j);
+ d = gsl_matrix_get(G, i, j);
+ if (wi <= 0 || wj <= 0) {
+ d = 0;
+ } else {
+ d /= sqrt(wi * wj);
+ }
+ gsl_matrix_set(G, i, j, d);
+ if (j != i) {
+ gsl_matrix_set(G, j, i, d);
+ }
+ }
+ }
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+
+ if (cPar.a_mode == 31) {
+ cPar.trace_G = EigenDecomp(G, U, eval, 1);
+ } else {
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ }
+
+ if (!cPar.file_weight.empty()) {
+ double wi;
+ for (size_t i = 0; i < U->size1; i++) {
+ wi = gsl_vector_get(weight, i);
+ if (wi <= 0) {
+ wi = 0;
+ } else {
+ wi = sqrt(wi);
+ }
+ gsl_vector_view Urow = gsl_matrix_row(U, i);
+ gsl_vector_scale(&Urow.vector, wi);
+ }
+ }
+
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ } else {
+ ReadFile_eigenU(cPar.file_ku, cPar.error, U);
+ if (cPar.error == true) {
+ cout << "error! fail to read the U file. " << endl;
+ return;
+ }
+
+ ReadFile_eigenD(cPar.file_kd, cPar.error, eval);
+ if (cPar.error == true) {
+ cout << "error! fail to read the D file. " << endl;
+ return;
+ }
+
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ }
+
+ if (cPar.a_mode == 31) {
+ cPar.WriteMatrix(U, "eigenU");
+ cPar.WriteVector(eval, "eigenD");
+ } else if (!cPar.file_gene.empty()) {
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ LMM cLmm;
+ cLmm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector); // y is the predictor, not the phenotype
+
+ cLmm.WriteFiles();
+ cLmm.CopyToParam(cPar);
+ } else {
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ // calculate REMLE/MLE estimate and pve for univariate model
+ if (cPar.n_ph == 1) {
+ gsl_vector_view beta = gsl_matrix_row(B, 0);
+ gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null,
+ cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector,
+ &se_beta.vector);
+
+ cPar.beta_mle_null.clear();
+ cPar.se_beta_mle_null.clear();
+ for (size_t i = 0; i < B->size2; i++) {
+ cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i));
+ cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i));
+ }
+
+ CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_remle_null,
+ cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector,
+ &se_beta.vector);
+ cPar.beta_remle_null.clear();
+ cPar.se_beta_remle_null.clear();
+ for (size_t i = 0; i < B->size2; i++) {
+ cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i));
+ cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i));
+ }
+
+ CalcPve(eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
+ cPar.pve_null, cPar.pve_se_null);
+ cPar.PrintSummary();
+
+ // calculate and output residuals
+ if (cPar.a_mode == 5) {
+ gsl_vector *Utu_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *Ute_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *u_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *e_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *y_hat = gsl_vector_alloc(Y->size1);
+
+ // obtain Utu and Ute
+ gsl_vector_memcpy(y_hat, &UtY_col.vector);
+ gsl_blas_dgemv(CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
+
+ double d, u, e;
+ for (size_t i = 0; i < eval->size; i++) {
+ d = gsl_vector_get(eval, i);
+ u = cPar.l_remle_null * d / (cPar.l_remle_null * d + 1.0) *
+ gsl_vector_get(y_hat, i);
+ e = 1.0 / (cPar.l_remle_null * d + 1.0) * gsl_vector_get(y_hat, i);
+ gsl_vector_set(Utu_hat, i, u);
+ gsl_vector_set(Ute_hat, i, e);
+ }
+
+ // obtain u and e
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
+
+ // output residuals
+ cPar.WriteVector(u_hat, "residU");
+ cPar.WriteVector(e_hat, "residE");
+
+ gsl_vector_free(u_hat);
+ gsl_vector_free(e_hat);
+ gsl_vector_free(y_hat);
+ }
+ }
+
+ // Fit LMM or mvLMM
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4) {
+ if (cPar.n_ph == 1) {
+ LMM cLmm;
+ cLmm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ if (!cPar.file_bfile.empty()) {
+ if (cPar.file_gxe.empty()) {
+ cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector);
+ } else {
+ cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector, env);
+ }
+ }
+ // WJA added
+ else if (!cPar.file_oxford.empty()) {
+ cLmm.Analyzebgen(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
+ } else {
+ if (cPar.file_gxe.empty()) {
+ cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector);
+ } else {
+ cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector, env);
+ }
+ }
+
+ cLmm.WriteFiles();
+ cLmm.CopyToParam(cPar);
+ } else {
+ MVLMM cMvlmm;
+ cMvlmm.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ if (cPar.file_gxe.empty()) {
+ cMvlmm.AnalyzePlink(U, eval, UtW, UtY);
+ } else {
+ cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env);
+ }
+ } else if (!cPar.file_oxford.empty()) {
+ cMvlmm.Analyzebgen(U, eval, UtW, UtY);
+ } else {
+ if (cPar.file_gxe.empty()) {
+ cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY);
+ } else {
+ cMvlmm.AnalyzeBimbamGXE(U, eval, UtW, UtY, env);
+ }
+ }
+
+ cMvlmm.WriteFiles();
+ cMvlmm.CopyToParam(cPar);
+ }
+ }
+ }
+
+ // release all matrices and vectors
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ gsl_matrix_free(B);
+ gsl_matrix_free(se_B);
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_matrix_free(UtY);
+ gsl_vector_free(eval);
+ gsl_vector_free(env);
+ }
+
+ // BSLMM
+ if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
+ gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+
+ // set covariates matrix W and phenotype vector y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, y, 0);
+
+ // center y, even for case/control data
+ cPar.pheno_mean = CenterVector(y);
+
+ // run bvsr if rho==1
+ if (cPar.rho_min == 1 && cPar.rho_max == 1) {
+ // read genotypes X (not UtX)
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // perform BSLMM analysis
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmm.MCMC(UtX, y);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ // else, if rho!=1
+ } else {
+ gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
+ gsl_vector *eval = gsl_vector_alloc(y->size);
+ gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
+ gsl_vector *Uty = gsl_vector_alloc(y->size);
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // read relatedness matrix G
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ } else {
+ cPar.ReadGenotypes(UtX, G, true);
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, y, Uty);
+
+ // calculate REMLE/MLE estimate and pve
+ CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
+ cPar.pve_se_null);
+
+ cPar.PrintSummary();
+
+ // Creat and calcualte UtX, use a large memory
+ cout << "Calculating UtX..." << endl;
+ time_start = clock();
+ CalcUtX(U, UtX);
+ cPar.time_UtX = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // perform BSLMM or BSLMMDAP analysis
+ if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ if (cPar.a_mode == 12) { // ridge regression
+ cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
+ } else { // Run MCMC
+ cBslmm.MCMC(U, UtX, Uty, eval, y);
+ }
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ } else {
+ }
+
+ // release all matrices and vectors
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_vector_free(eval);
+ gsl_vector_free(Uty);
+ }
+ gsl_matrix_free(W);
+ gsl_vector_free(y);
+ gsl_matrix_free(UtX);
+ }
+
+ // BSLMM-DAP
+ if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) {
+ if (cPar.a_mode == 14) {
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
+ gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+
+ // set covariates matrix W and phenotype vector y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, y, 0);
+
+ // center y, even for case/control data
+ cPar.pheno_mean = CenterVector(y);
+
+ // run bvsr if rho==1
+ if (cPar.rho_min == 1 && cPar.rho_max == 1) {
+ // read genotypes X (not UtX)
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // perform BSLMM analysis
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmm.MCMC(UtX, y);
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ // else, if rho!=1
+ } else {
+ gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
+ gsl_vector *eval = gsl_vector_alloc(y->size);
+ gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
+ gsl_vector *Uty = gsl_vector_alloc(y->size);
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // read relatedness matrix G
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ } else {
+ cPar.ReadGenotypes(UtX, G, true);
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, y, Uty);
+
+ // calculate REMLE/MLE estimate and pve
+ CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
+ cPar.pve_se_null);
+
+ cPar.PrintSummary();
+
+ // Creat and calcualte UtX, use a large memory
+ cout << "Calculating UtX..." << endl;
+ time_start = clock();
+ CalcUtX(U, UtX);
+ cPar.time_UtX =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // perform analysis; assume X and y are already centered
+ BSLMMDAP cBslmmDap;
+ cBslmmDap.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmmDap.DAP_CalcBF(U, UtX, Uty, eval, y);
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmmDap.CopyToParam(cPar);
+
+ // release all matrices and vectors
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_vector_free(eval);
+ gsl_vector_free(Uty);
+ }
+
+ gsl_matrix_free(W);
+ gsl_vector_free(y);
+ gsl_matrix_free(UtX);
+ } else if (cPar.a_mode == 15) {
+ // perform EM algorithm and estimate parameters
+ vector<string> vec_rs;
+ vector<double> vec_sa2, vec_sb2, wab;
+ vector<vector<vector<double>>> BF;
+
+ // read hyp and bf files (functions defined in BSLMMDAP)
+ ReadFile_hyb(cPar.file_hyp, vec_sa2, vec_sb2, wab);
+ ReadFile_bf(cPar.file_bf, vec_rs, BF);
+
+ cPar.ns_test = vec_rs.size();
+ if (wab.size() != BF[0][0].size()) {
+ cout << "error! hyp and bf files dimension do not match" << endl;
+ }
+
+ // load annotations
+ gsl_matrix *Ac;
+ gsl_matrix_int *Ad;
+ gsl_vector_int *dlevel;
+ size_t kc, kd;
+ if (!cPar.file_cat.empty()) {
+ ReadFile_cat(cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
+ } else {
+ kc = 0;
+ kd = 0;
+ }
+
+ cout << "## number of blocks = " << BF.size() << endl;
+ cout << "## number of analyzed SNPs = " << vec_rs.size() << endl;
+ cout << "## grid size for hyperparameters = " << wab.size() << endl;
+ cout << "## number of continuous annotations = " << kc << endl;
+ cout << "## number of discrete annotations = " << kd << endl;
+
+ // DAP_EstimateHyper (const size_t kc, const size_t kd, const
+ // vector<string> &vec_rs, const vector<double> &vec_sa2, const
+ // vector<double> &vec_sb2, const vector<double> &wab, const
+ // vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int
+ // *Ad, gsl_vector_int *dlevel);
+
+ // perform analysis
+ BSLMMDAP cBslmmDap;
+ cBslmmDap.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmmDap.DAP_EstimateHyper(kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac,
+ Ad, dlevel);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmmDap.CopyToParam(cPar);
+
+ gsl_matrix_free(Ac);
+ gsl_matrix_int_free(Ad);
+ gsl_vector_int_free(dlevel);
+ } else {
+ //
+ }
+ }
+
+ /*
+ //LDR (change 14 to 16?)
+ if (cPar.a_mode==14) {
+ gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
+ gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
+ gsl_matrix *G=gsl_matrix_alloc (1, 1);
+ vector<vector<unsigned char> > Xt;
+
+ //set covariates matrix W and phenotype vector y
+ //an intercept is included in W
+ cPar.CopyCvtPhen (W, y, 0);
+ //read in genotype matrix X
+ cPar.ReadGenotypes (Xt, G, false);
+ LDR cLdr;
+ cLdr.CopyFromParam(cPar);
+ time_start=clock();
+ cLdr.VB(Xt, W, y);
-void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
-{
- string file_str;
- file_str=cPar.path_out+"/"+cPar.file_out;
- file_str+=".log.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {cout<<"error writing log file: "<<file_str.c_str()<<endl; return;}
-
- outfile<<"##"<<endl;
- outfile<<"## GEMMA Version = "<<version<<endl;
-
- outfile<<"##"<<endl;
- outfile<<"## Command Line Input = ";
- for(int i = 0; i < argc; i++) {
- outfile<<argv[i]<<" ";
- }
- outfile<<endl;
-
- outfile<<"##"<<endl;
- time_t rawtime;
- time(&rawtime);
- tm *ptm = localtime (&rawtime);
-
- outfile<<"## Date = "<<asctime(ptm);
- //ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl;
-
- outfile<<"##"<<endl;
- outfile<<"## Summary Statistics:"<<endl;
- if (!cPar.file_cor.empty() || !cPar.file_study.empty() || !cPar.file_mstudy.empty() ) {
- outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl;
- outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl;
- //outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
- //outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_ref<<endl;
- //outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- //outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl;
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
-
- outfile<<"## pve estimates = ";
- for (size_t i=0; i<cPar.v_pve.size(); i++) {
- outfile<<" "<<cPar.v_pve[i];
- }
- outfile<<endl;
-
- outfile<<"## se(pve) = ";
- for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
- outfile<<" "<<cPar.v_se_pve[i];
- }
- outfile<<endl;
-
- if (cPar.n_vc>1) {
- outfile<<"## total pve = "<<cPar.pve_total<<endl;
- outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
- }
-
- outfile<<"## sigma2 per snp = ";
- for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## se(sigma2 per snp) = ";
- for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_se_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## enrichment = ";
- for (size_t i=0; i<cPar.v_enrich.size(); i++) {
- outfile<<" "<<cPar.v_enrich[i];
- }
- outfile<<endl;
-
- outfile<<"## se(enrichment) = ";
- for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
- outfile<<" "<<cPar.v_se_enrich[i];
- }
- outfile<<endl;
- } else if (!cPar.file_beta.empty() && (cPar.a_mode==61 || cPar.a_mode==62) ) {
- outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl;
- outfile<<"## number of total individuals in the reference = "<<cPar.ni_total<<endl;
- outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
- outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_total<<endl;
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
- } else if (!cPar.file_beta.empty() && (cPar.a_mode==66 || cPar.a_mode==67) ) {
- outfile<<"## number of total individuals in the sample = "<<cPar.ni_total<<endl;
- outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl;
- outfile<<"## number of total SNPs in the sample = "<<cPar.ns_total<<endl;
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
-
- outfile<<"## pve estimates = ";
- for (size_t i=0; i<cPar.v_pve.size(); i++) {
- outfile<<" "<<cPar.v_pve[i];
- }
- outfile<<endl;
-
- outfile<<"## se(pve) = ";
- for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
- outfile<<" "<<cPar.v_se_pve[i];
- }
- outfile<<endl;
-
- if (cPar.n_vc>1) {
- outfile<<"## total pve = "<<cPar.pve_total<<endl;
- outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
- }
-
- outfile<<"## sigma2 per snp = ";
- for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## se(sigma2 per snp) = ";
- for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_se_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## enrichment = ";
- for (size_t i=0; i<cPar.v_enrich.size(); i++) {
- outfile<<" "<<cPar.v_enrich[i];
- }
- outfile<<endl;
-
- outfile<<"## se(enrichment) = ";
- for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
- outfile<<" "<<cPar.v_se_enrich[i];
- }
- outfile<<endl;
- } else {
- outfile<<"## number of total individuals = "<<cPar.ni_total<<endl;
-
- if (cPar.a_mode==43) {
- outfile<<"## number of analyzed individuals = "<<cPar.ni_cvt<<endl;
- outfile<<"## number of individuals with full phenotypes = "<<cPar.ni_test<<endl;
- } else if (cPar.a_mode!=27 && cPar.a_mode!=28) {
- outfile<<"## number of analyzed individuals = "<<cPar.ni_test<<endl;
- }
-
- outfile<<"## number of covariates = "<<cPar.n_cvt<<endl;
- outfile<<"## number of phenotypes = "<<cPar.n_ph<<endl;
- if (cPar.a_mode==43) {
- outfile<<"## number of observed data = "<<cPar.np_obs<<endl;
- outfile<<"## number of missing data = "<<cPar.np_miss<<endl;
- }
- if (cPar.a_mode==25 || cPar.a_mode==26 || cPar.a_mode==27 || cPar.a_mode==28 || cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63 || cPar.a_mode==66 || cPar.a_mode==67) {
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
- }
-
- if (!(cPar.file_gene).empty()) {
- outfile<<"## number of total genes = "<<cPar.ng_total<<endl;
- outfile<<"## number of analyzed genes = "<<cPar.ng_test<<endl;
- } else if (cPar.file_epm.empty()) {
- outfile<<"## number of total SNPs = "<<cPar.ns_total<<endl;
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- } else {
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- }
-
- if (cPar.a_mode==13) {
- outfile<<"## number of cases = "<<cPar.ni_case<<endl;
- outfile<<"## number of controls = "<<cPar.ni_control<<endl;
- }
- }
-
- if ( (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) && cPar.file_cor.empty() && cPar.file_study.empty() && cPar.file_mstudy.empty() ) {
- // outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
- if (cPar.n_ph==1) {
- outfile<<"## pve estimates = ";
- for (size_t i=0; i<cPar.v_pve.size(); i++) {
- outfile<<" "<<cPar.v_pve[i];
- }
- outfile<<endl;
-
- outfile<<"## se(pve) = ";
- for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
- outfile<<" "<<cPar.v_se_pve[i];
- }
- outfile<<endl;
-
- if (cPar.n_vc>1) {
- outfile<<"## total pve = "<<cPar.pve_total<<endl;
- outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
- }
-
- outfile<<"## sigma2 estimates = ";
- for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## se(sigma2) = ";
- for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_se_sigma2[i];
- }
- outfile<<endl;
-
- if (!cPar.file_beta.empty() ) {
- outfile<<"## enrichment = ";
- for (size_t i=0; i<cPar.v_enrich.size(); i++) {
- outfile<<" "<<cPar.v_enrich[i];
- }
- outfile<<endl;
-
- outfile<<"## se(enrichment) = ";
- for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
- outfile<<" "<<cPar.v_se_enrich[i];
- }
- outfile<<endl;
- }
- /*
- outfile<<"## beta estimate in the null model = ";
- for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
- outfile<<" "<<cPar.beta_remle_null[i];
- }
- outfile<<endl;
- outfile<<"## se(beta) = ";
- for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
- outfile<<" "<<cPar.se_beta_remle_null[i];
- }
- outfile<<endl;
- */
- }
- }
-
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
- outfile<<"## MLE log-likelihood in the null model = "<<cPar.logl_mle_H0<<endl;
- if (cPar.n_ph==1) {
- //outfile<<"## lambda REMLE estimate in the null (linear mixed) model = "<<cPar.l_remle_null<<endl;
- //outfile<<"## lambda MLE estimate in the null (linear mixed) model = "<<cPar.l_mle_null<<endl;
- outfile<<"## pve estimate in the null model = "<<cPar.pve_null<<endl;
- outfile<<"## se(pve) in the null model = "<<cPar.pve_se_null<<endl;
- outfile<<"## vg estimate in the null model = "<<cPar.vg_remle_null<<endl;
- outfile<<"## ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
- outfile<<"## beta estimate in the null model = ";
- for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
- outfile<<" "<<cPar.beta_remle_null[i];
- }
- outfile<<endl;
- outfile<<"## se(beta) = ";
- for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
- outfile<<" "<<cPar.se_beta_remle_null[i];
- }
- outfile<<endl;
-
- } else {
- size_t c;
- outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Vg_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(Vg): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<sqrt(cPar.VVg_remle_null[c])<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Ve_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(Ve): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<sqrt(cPar.VVe_remle_null[c])<<"\t";
- }
- outfile<<endl;
- }
-
- outfile<<"## MLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<cPar.n_ph; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Vg_mle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(Vg): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<sqrt(cPar.VVg_mle_null[c])<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## MLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<cPar.n_ph; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Ve_mle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(Ve): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<sqrt(cPar.VVe_mle_null[c])<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## estimate for B (d by c) in the null model (columns correspond to the covariates provided in the file): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<cPar.n_cvt; j++) {
- c=i*cPar.n_cvt+j;
- outfile<<cPar.beta_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(B): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<cPar.n_cvt; j++) {
- c=i*cPar.n_cvt+j;
- outfile<<cPar.se_beta_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- }
- }
-
- /*
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- if (cPar.n_ph==1) {
- outfile<<"## REMLE vg estimate in the null model = "<<cPar.vg_remle_null<<endl;
- outfile<<"## REMLE ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
- } else {
- size_t c;
- outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Vg_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Ve_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- }
- }
- */
-
-
- if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
- outfile<<"## estimated mean = "<<cPar.pheno_mean<<endl;
- }
-
- if (cPar.a_mode==11 || cPar.a_mode==13) {
- outfile<<"##"<<endl;
- outfile<<"## MCMC related:"<<endl;
- outfile<<"## initial value of h = "<<cPar.cHyp_initial.h<<endl;
- outfile<<"## initial value of rho = "<<cPar.cHyp_initial.rho<<endl;
- outfile<<"## initial value of pi = "<<exp(cPar.cHyp_initial.logp)<<endl;
- outfile<<"## initial value of |gamma| = "<<cPar.cHyp_initial.n_gamma<<endl;
- outfile<<"## random seed = "<<cPar.randseed<<endl;
- outfile<<"## acceptance ratio = "<<(double)cPar.n_accept/(double)((cPar.w_step+cPar.s_step)*cPar.n_mh)<<endl;
- }
-
- outfile<<"##"<<endl;
- outfile<<"## Computation Time:"<<endl;
- outfile<<"## total computation time = "<<cPar.time_total<<" min "<<endl;
- outfile<<"## computation time break down: "<<endl;
- if (cPar.a_mode==21 || cPar.a_mode==22 || cPar.a_mode==11 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
- outfile<<"## time on calculating relatedness matrix = "<<cPar.time_G<<" min "<<endl;
- }
- if (cPar.a_mode==31) {
- outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- }
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
- outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- outfile<<"## time on calculating UtX = "<<cPar.time_UtX<<" min "<<endl;
- }
- if ((cPar.a_mode>=1 && cPar.a_mode<=4) || (cPar.a_mode>=51 && cPar.a_mode<=54) ) {
- outfile<<"## time on optimization = "<<cPar.time_opt<<" min "<<endl;
- }
- if (cPar.a_mode==11 || cPar.a_mode==13) {
- outfile<<"## time on proposal = "<<cPar.time_Proposal<<" min "<<endl;
- outfile<<"## time on mcmc = "<<cPar.time_opt<<" min "<<endl;
- outfile<<"## time on Omega = "<<cPar.time_Omega<<" min "<<endl;
- }
- if (cPar.a_mode==41 || cPar.a_mode==42) {
- outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- }
- if (cPar.a_mode==43) {
- outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- outfile<<"## time on predicting phenotypes = "<<cPar.time_opt<<" min "<<endl;
- }
- outfile<<"##"<<endl;
-
- outfile.close();
- outfile.clear();
- return;
+ cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ cLdr.CopyToParam(cPar);
+
+ gsl_vector_free (y);
+ gsl_matrix_free (W);
+ gsl_matrix_free (G);
+ }
+ */
+
+ cPar.time_total = (clock() - time_begin) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ return;
}
+void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
+ string file_str;
+ file_str = cPar.path_out + "/" + cPar.file_out;
+ file_str += ".log.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing log file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ outfile << "##" << endl;
+ outfile << "## GEMMA Version = " << version << endl;
+ outfile << "##" << endl;
+ outfile << "## Command Line Input = ";
+ for (int i = 0; i < argc; i++) {
+ outfile << argv[i] << " ";
+ }
+ outfile << endl;
+
+ outfile << "##" << endl;
+ time_t rawtime;
+ time(&rawtime);
+ tm *ptm = localtime(&rawtime);
+
+ outfile << "## Date = " << asctime(ptm);
+ // ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl;
+
+ outfile << "##" << endl;
+ outfile << "## Summary Statistics:" << endl;
+ if (!cPar.file_cor.empty() || !cPar.file_study.empty() ||
+ !cPar.file_mstudy.empty()) {
+ outfile << "## number of total individuals in the sample = "
+ << cPar.ni_study << endl;
+ outfile << "## number of total individuals in the reference = "
+ << cPar.ni_ref << endl;
+ // outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
+ // outfile<<"## number of total SNPs in the reference panel =
+ // "<<cPar.ns_ref<<endl;
+ // outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ // outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl;
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+
+ outfile << "## pve estimates = ";
+ for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+ outfile << " " << cPar.v_pve[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(pve) = ";
+ for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+ outfile << " " << cPar.v_se_pve[i];
+ }
+ outfile << endl;
+
+ if (cPar.n_vc > 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 per snp = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2 per snp) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ } else if (!cPar.file_beta.empty() &&
+ (cPar.a_mode == 61 || cPar.a_mode == 62)) {
+ outfile << "## number of total individuals in the sample = "
+ << cPar.ni_study << endl;
+ outfile << "## number of total individuals in the reference = "
+ << cPar.ni_total << endl;
+ outfile << "## number of total SNPs in the sample = " << cPar.ns_study
+ << endl;
+ outfile << "## number of total SNPs in the reference panel = "
+ << cPar.ns_total << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+ } else if (!cPar.file_beta.empty() &&
+ (cPar.a_mode == 66 || cPar.a_mode == 67)) {
+ outfile << "## number of total individuals in the sample = "
+ << cPar.ni_total << endl;
+ outfile << "## number of total individuals in the reference = "
+ << cPar.ni_ref << endl;
+ outfile << "## number of total SNPs in the sample = " << cPar.ns_total
+ << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+
+ outfile << "## pve estimates = ";
+ for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+ outfile << " " << cPar.v_pve[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(pve) = ";
+ for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+ outfile << " " << cPar.v_se_pve[i];
+ }
+ outfile << endl;
+
+ if (cPar.n_vc > 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 per snp = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2 per snp) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ } else {
+ outfile << "## number of total individuals = " << cPar.ni_total << endl;
+
+ if (cPar.a_mode == 43) {
+ outfile << "## number of analyzed individuals = " << cPar.ni_cvt << endl;
+ outfile << "## number of individuals with full phenotypes = "
+ << cPar.ni_test << endl;
+ } else if (cPar.a_mode != 27 && cPar.a_mode != 28) {
+ outfile << "## number of analyzed individuals = " << cPar.ni_test << endl;
+ }
+
+ outfile << "## number of covariates = " << cPar.n_cvt << endl;
+ outfile << "## number of phenotypes = " << cPar.n_ph << endl;
+ if (cPar.a_mode == 43) {
+ outfile << "## number of observed data = " << cPar.np_obs << endl;
+ outfile << "## number of missing data = " << cPar.np_miss << endl;
+ }
+ if (cPar.a_mode == 25 || cPar.a_mode == 26 || cPar.a_mode == 27 ||
+ cPar.a_mode == 28 || cPar.a_mode == 61 || cPar.a_mode == 62 ||
+ cPar.a_mode == 63 || cPar.a_mode == 66 || cPar.a_mode == 67) {
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+ }
+
+ if (!(cPar.file_gene).empty()) {
+ outfile << "## number of total genes = " << cPar.ng_total << endl;
+ outfile << "## number of analyzed genes = " << cPar.ng_test << endl;
+ } else if (cPar.file_epm.empty()) {
+ outfile << "## number of total SNPs = " << cPar.ns_total << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ } else {
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ }
+
+ if (cPar.a_mode == 13) {
+ outfile << "## number of cases = " << cPar.ni_case << endl;
+ outfile << "## number of controls = " << cPar.ni_control << endl;
+ }
+ }
+
+ if ((cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) &&
+ cPar.file_cor.empty() && cPar.file_study.empty() &&
+ cPar.file_mstudy.empty()) {
+ // outfile<<"## REMLE log-likelihood in the null model =
+ //"<<cPar.logl_remle_H0<<endl;
+ if (cPar.n_ph == 1) {
+ outfile << "## pve estimates = ";
+ for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+ outfile << " " << cPar.v_pve[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(pve) = ";
+ for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+ outfile << " " << cPar.v_se_pve[i];
+ }
+ outfile << endl;
+
+ if (cPar.n_vc > 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 estimates = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ if (!cPar.file_beta.empty()) {
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ }
+ /*
+ outfile<<"## beta estimate in the null model = ";
+ for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
+ outfile<<" "<<cPar.beta_remle_null[i];
+ }
+ outfile<<endl;
+ outfile<<"## se(beta) = ";
+ for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
+ outfile<<" "<<cPar.se_beta_remle_null[i];
+ }
+ outfile<<endl;
+ */
+ }
+ }
+
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 ||
+ cPar.a_mode == 12 || cPar.a_mode == 13) {
+ outfile << "## REMLE log-likelihood in the null model = "
+ << cPar.logl_remle_H0 << endl;
+ outfile << "## MLE log-likelihood in the null model = " << cPar.logl_mle_H0
+ << endl;
+ if (cPar.n_ph == 1) {
+ // outfile<<"## lambda REMLE estimate in the null (linear mixed) model =
+ // "<<cPar.l_remle_null<<endl;
+ // outfile<<"## lambda MLE estimate in the null (linear mixed) model =
+ // "<<cPar.l_mle_null<<endl;
+ outfile << "## pve estimate in the null model = " << cPar.pve_null
+ << endl;
+ outfile << "## se(pve) in the null model = " << cPar.pve_se_null << endl;
+ outfile << "## vg estimate in the null model = " << cPar.vg_remle_null
+ << endl;
+ outfile << "## ve estimate in the null model = " << cPar.ve_remle_null
+ << endl;
+ outfile << "## beta estimate in the null model = ";
+ for (size_t i = 0; i < cPar.beta_remle_null.size(); i++) {
+ outfile << " " << cPar.beta_remle_null[i];
+ }
+ outfile << endl;
+ outfile << "## se(beta) = ";
+ for (size_t i = 0; i < cPar.se_beta_remle_null.size(); i++) {
+ outfile << " " << cPar.se_beta_remle_null[i];
+ }
+ outfile << endl;
+
+ } else {
+ size_t c;
+ outfile << "## REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << cPar.Vg_remle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(Vg): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << sqrt(cPar.VVg_remle_null[c]) << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << cPar.Ve_remle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(Ve): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << sqrt(cPar.VVe_remle_null[c]) << "\t";
+ }
+ outfile << endl;
+ }
+
+ outfile << "## MLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j < cPar.n_ph; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << cPar.Vg_mle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(Vg): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << sqrt(cPar.VVg_mle_null[c]) << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## MLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j < cPar.n_ph; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << cPar.Ve_mle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(Ve): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << sqrt(cPar.VVe_mle_null[c]) << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## estimate for B (d by c) in the null model (columns "
+ "correspond to the covariates provided in the file): "
+ << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j < cPar.n_cvt; j++) {
+ c = i * cPar.n_cvt + j;
+ outfile << cPar.beta_remle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(B): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j < cPar.n_cvt; j++) {
+ c = i * cPar.n_cvt + j;
+ outfile << cPar.se_beta_remle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ }
+ }
+
+ /*
+ if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 ||
+ cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
+ if (cPar.n_ph==1) {
+ outfile<<"## REMLE vg estimate in the null model =
+ "<<cPar.vg_remle_null<<endl;
+ outfile<<"## REMLE ve estimate in the null model =
+ "<<cPar.ve_remle_null<<endl;
+ } else {
+ size_t c;
+ outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
+ for (size_t i=0; i<cPar.n_ph; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
+ outfile<<cPar.Vg_remle_null[c]<<"\t";
+ }
+ outfile<<endl;
+ }
+ outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
+ for (size_t i=0; i<cPar.n_ph; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
+ outfile<<cPar.Ve_remle_null[c]<<"\t";
+ }
+ outfile<<endl;
+ }
+ }
+ }
+ */
+
+ if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13 ||
+ cPar.a_mode == 14 || cPar.a_mode == 16) {
+ outfile << "## estimated mean = " << cPar.pheno_mean << endl;
+ }
+
+ if (cPar.a_mode == 11 || cPar.a_mode == 13) {
+ outfile << "##" << endl;
+ outfile << "## MCMC related:" << endl;
+ outfile << "## initial value of h = " << cPar.cHyp_initial.h << endl;
+ outfile << "## initial value of rho = " << cPar.cHyp_initial.rho << endl;
+ outfile << "## initial value of pi = " << exp(cPar.cHyp_initial.logp)
+ << endl;
+ outfile << "## initial value of |gamma| = " << cPar.cHyp_initial.n_gamma
+ << endl;
+ outfile << "## random seed = " << cPar.randseed << endl;
+ outfile << "## acceptance ratio = "
+ << (double)cPar.n_accept /
+ (double)((cPar.w_step + cPar.s_step) * cPar.n_mh)
+ << endl;
+ }
+
+ outfile << "##" << endl;
+ outfile << "## Computation Time:" << endl;
+ outfile << "## total computation time = " << cPar.time_total << " min "
+ << endl;
+ outfile << "## computation time break down: " << endl;
+ if (cPar.a_mode == 21 || cPar.a_mode == 22 || cPar.a_mode == 11 ||
+ cPar.a_mode == 13 || cPar.a_mode == 14 || cPar.a_mode == 16) {
+ outfile << "## time on calculating relatedness matrix = "
+ << cPar.time_G << " min " << endl;
+ }
+ if (cPar.a_mode == 31) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ }
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 ||
+ cPar.a_mode == 12 || cPar.a_mode == 13 || cPar.a_mode == 14 ||
+ cPar.a_mode == 16) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ outfile << "## time on calculating UtX = " << cPar.time_UtX << " min "
+ << endl;
+ }
+ if ((cPar.a_mode >= 1 && cPar.a_mode <= 4) ||
+ (cPar.a_mode >= 51 && cPar.a_mode <= 54)) {
+ outfile << "## time on optimization = " << cPar.time_opt << " min "
+ << endl;
+ }
+ if (cPar.a_mode == 11 || cPar.a_mode == 13) {
+ outfile << "## time on proposal = " << cPar.time_Proposal << " min "
+ << endl;
+ outfile << "## time on mcmc = " << cPar.time_opt << " min " << endl;
+ outfile << "## time on Omega = " << cPar.time_Omega << " min " << endl;
+ }
+ if (cPar.a_mode == 41 || cPar.a_mode == 42) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ }
+ if (cPar.a_mode == 43) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ outfile << "## time on predicting phenotypes = " << cPar.time_opt
+ << " min " << endl;
+ }
+ outfile << "##" << endl;
+
+ outfile.close();
+ outfile.clear();
+ return;
+}