From 3935ba39d30666dd7d4a831155631847c77b70c4 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 2 Aug 2017 08:46:58 +0000
Subject: Massive patch using LLVM coding style. It was generated with:
clang-format -style=LLVM -i *.cpp *.h
Please set your editor to replace tabs with spaces and use indentation of 2 spaces.
---
src/gemma.cpp | 6549 +++++++++++++++++++++++++++++++--------------------------
1 file changed, 3584 insertions(+), 2965 deletions(-)
(limited to 'src/gemma.cpp')
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 1a9ca9b..c72475b 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -16,427 +16,670 @@
along with this program. If not, see .
*/
-#include
+#include
+#include
+#include
#include
+#include
#include
-#include
#include
-#include
-#include
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
-#include "gsl/gsl_eigen.h"
#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
-#include "lapack.h"
-#include "io.h"
-#include "gemma.h"
-#include "vc.h"
-#include "lm.h"
#include "bslmm.h"
#include "bslmmdap.h"
+#include "gemma.h"
+#include "io.h"
+#include "lapack.h"
#include "ldr.h"
+#include "lm.h"
#include "lmm.h"
+#include "mathfunc.h"
#include "mvlmm.h"
#include "prdt.h"
#include "varcov.h"
-#include "mathfunc.h"
+#include "vc.h"
using namespace std;
-GEMMA::GEMMA(void):
-version("0.97"), date("07/27/2017"), year("2017")
-{}
-
-void GEMMA::PrintHeader (void) {
- cout< indicator_all;
- size_t c_bv=0;
- for (size_t i=0; isize; i++) {
- d=gsl_vector_get(y_prdt, i);
- d=gsl_cdf_gaussian_P(d, 1.0);
- gsl_vector_set(y_prdt, i, d);
- }
- }
-
-
- cPRDT.CopyToParam(cPar);
-
- cPRDT.WriteFiles(y_prdt);
-
- gsl_vector_free(y_prdt);
- }
-
- //Prediction with kinship matrix only; for one or more phenotypes
- if (cPar.a_mode==43) {
- //first, use individuals with full phenotypes to obtain estimates of Vg and Ve
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
- gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
- gsl_vector *eval=gsl_vector_alloc (Y->size1);
-
- gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph);
- gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
- cPar.CopyCvtPhen (W_full, Y_full, 1);
-
- gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph);
- gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1);
- gsl_matrix *H_full=gsl_matrix_alloc (Y_full->size1*Y_hat->size2, Y_full->size1*Y_hat->size2);
-
- //read relatedness matrix G, and matrix G_full
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- //calculate variance component and beta estimates
- //and then obtain predicted values
- if (cPar.n_ph==1) {
- gsl_vector *beta=gsl_vector_alloc (W->size2);
- gsl_vector *se_beta=gsl_vector_alloc (W->size2);
-
- double lambda, logl, vg, ve;
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- //obtain estimates
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, lambda, logl);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, lambda, vg, ve, beta, se_beta);
-
- cout<<"REMLE estimate for vg in the null model = "<size1; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<size1; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<size1; i++) {
- for (size_t j=i; jsize2; j++) {
- cPar.Vg_remle_null.push_back(gsl_matrix_get (Vg, i, j) );
- cPar.Ve_remle_null.push_back(gsl_matrix_get (Ve, i, j) );
- }
- }
-
- //obtain Y_hat from fixed effects
- gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
-
- //obtain H
- KroneckerSym(G_full, Vg, H_full);
- for (size_t i=0; isize1; i++) {
- gsl_matrix_view H_sub=gsl_matrix_submatrix (H_full, i*Ve->size1, i*Ve->size2, Ve->size1, Ve->size2);
- gsl_matrix_add (&H_sub.matrix, Ve);
- }
-
- //free matrices
- gsl_matrix_free (Vg);
- gsl_matrix_free (Ve);
- gsl_matrix_free (B);
- gsl_matrix_free (se_B);
- }
-
- PRDT cPRDT;
-
- cPRDT.CopyFromParam(cPar);
-
- cout<<"Predicting Missing Phentypes ... "< setSnps_beta;
- map mapRS2wA, mapRS2wK;
-
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- time_start=clock();
- cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &ns_vec.vector);
- cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "< vec_cat, vec_ni;
- vector vec_weight, vec_z2;
- map mapRS2weight;
- mapRS2weight.clear();
-
- time_start=clock();
- ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, cPar.ns_test);
- cout<<"## number of total individuals = "<size1, cPar.n_cvt);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
-
- //Fit LM or mvLM
- if (cPar.n_ph==1) {
- LM cLm;
- cLm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
-
- if (!cPar.file_gene.empty()) {
- cLm.AnalyzeGene (W, &Y_col.vector); //y is the predictor, not the phenotype
- } else if (!cPar.file_bfile.empty()) {
- cLm.AnalyzePlink (W, &Y_col.vector);
- } else if (!cPar.file_oxford.empty()) {
- cLm.Analyzebgen (W, &Y_col.vector);
- } else {
- cLm.AnalyzeBimbam (W, &Y_col.vector);
- }
-
- cLm.WriteFiles();
- cLm.CopyToParam(cPar);
- }
- /*
- else {
- MVLM cMvlm;
- cMvlm.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- cMvlm.AnalyzePlink (W, Y);
- } else {
- cMvlm.AnalyzeBimbam (W, Y);
- }
-
- cMvlm.WriteFiles();
- cMvlm.CopyToParam(cPar);
- }
- */
- //release all matrices and vectors
- gsl_matrix_free (Y);
- gsl_matrix_free (W);
- }
-
- //VC estimation with one or multiple kinship matrices
- //REML approach only
- //if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp
- //for one phenotype only;
- if (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) {
- if (!cPar.file_beta.empty() ) {
- //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
- //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
- //update indicator_snps, so that the numbers are in accordance with mapRS2wK
- set setSnps_beta;
- ReadFile_snps_header (cPar.file_beta, setSnps_beta);
-
- map mapRS2wA, mapRS2wK;
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- cPar.UpdateSNP (mapRS2wK);
-
- // Setup matrices and vectors.
- gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
- gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
-
- gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
- gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
-
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
-
- gsl_matrix_set_zero (K);
- gsl_matrix_set_zero (A);
-
- gsl_matrix_set_zero(S);
- gsl_matrix_set_zero(Vq);
- gsl_vector_set_zero (q);
- gsl_vector_set_zero (s);
-
- cPar.CopyCvtPhen (W, y, 0);
-
- gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
- gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
- gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
-
- vector vec_cat, vec_ni;
- vector vec_weight, vec_z2;
-
- //read beta, based on the mapRS2wK
- ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, cPar.ns_test);
-
- cout<<"Study Panel: "<size1, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1*cPar.n_vc );
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
-
- //read kinship matrices
- if (!(cPar.file_mk).empty()) {
- ReadFile_mk (cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size1, G->size1, G->size1);
- CenterMatrix (&G_sub.matrix);
- d=0;
- for (size_t j=0; jsize1; j++) {
- d+=gsl_matrix_get (&G_sub.matrix, j, j);
- }
- d/=(double)G->size1;
- (cPar.v_traceG).push_back(d);
- }
- } else if (!(cPar.file_kin).empty()) {
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size1; j++) {
- d+=gsl_matrix_get (G, j, j);
- }
- d/=(double)G->size1;
- (cPar.v_traceG).push_back(d);
- }
- /*
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
-
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- } else {
- ReadFile_eigenU (cPar.file_ku, cPar.error, U);
- if (cPar.error==true) {cout<<"error! fail to read the U file. "<size; i++) {
- if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
- cPar.trace_G+=gsl_vector_get(eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- }
- */
- //fit multiple variance components
- if (cPar.n_ph==1) {
- // if (cPar.n_vc==1) {
- /*
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- gsl_vector_view beta=gsl_matrix_row (B, 0);
- gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
-
- cPar.beta_mle_null.clear();
- cPar.se_beta_mle_null.clear();
- for (size_t i=0; isize2; i++) {
- cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
- cPar.beta_remle_null.clear();
- cPar.se_beta_remle_null.clear();
- for (size_t i=0; isize2; i++) {
- cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
- cPar.PrintSummary();
-
- //calculate and output residuals
- if (cPar.a_mode==5) {
- gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
- //obtain Utu and Ute
- gsl_vector_memcpy (y_hat, &UtY_col.vector);
- gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
- double d, u, e;
- for (size_t i=0; isize; i++) {
- d=gsl_vector_get (eval, i);
- u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- gsl_vector_set (Utu_hat, i, u);
- gsl_vector_set (Ute_hat, i, e);
- }
-
- //obtain u and e
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
- //output residuals
- cPar.WriteVector(u_hat, "residU");
- cPar.WriteVector(e_hat, "residE");
-
- gsl_vector_free(u_hat);
- gsl_vector_free(e_hat);
- gsl_vector_free(y_hat);
- }
-*/
- // } else {
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- VC cVc;
- cVc.CopyFromParam(cPar);
- if (cPar.a_mode==61) {
- cVc.CalcVChe (G, W, &Y_col.vector);
- } else if (cPar.a_mode==62) {
- cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector);
- } else {
- cVc.CalcVCacl (G, W, &Y_col.vector);
- }
- cVc.CopyToParam(cPar);
- //obtain pve from sigma2
- //obtain se_pve from se_sigma2
-
- //}
- }
- }
-
- }
-
-
- //compute confidence intervals with additional summary statistics
- //we do not check the sign of z-scores here, but they have to be matched with the genotypes
- if (cPar.a_mode==66 || cPar.a_mode==67) {
- //read reference file first
- gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_matrix *Svar=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
-
- gsl_matrix_set_zero(S);
- gsl_matrix_set_zero(Svar);
- gsl_vector_set_zero(s_ref);
-
- if (!cPar.file_ref.empty()) {
- ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
- } else {
- ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
- }
-
- //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
- //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
- set setSnps_beta;
- ReadFile_snps_header (cPar.file_beta, setSnps_beta);
-
- //obtain the weights for wA, which contains the SNP weights for SNPs used in the model
- map mapRS2wK;
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- //set up matrices and vector
- gsl_matrix *Xz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
- gsl_matrix *XWz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
- gsl_matrix *XtXWz=gsl_matrix_alloc (mapRS2wK.size(), cPar.n_vc*cPar.n_vc);
- gsl_vector *w=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *w1=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *z=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *s_vec=gsl_vector_alloc (cPar.n_vc);
-
- vector vec_cat, vec_size;
- vector vec_z;
-
- map mapRS2z, mapRS2wA;
- map mapRS2A1;
- string file_str;
-
- //update s_vec, the number of snps in each category
- for (size_t i=0; i::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
- vec_size[cPar.mapRS2cat[it->first]]++;
- }
-
- for (size_t i=0; i::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
- mapRS2wA[it->first]=1;
- }
- } else {
- cPar.UpdateWeight (0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
- }
-
- //read in z-scores based on allele 0, and save that into a vector
- ReadFile_beta (cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
-
- //update snp indicator, save weights to w, save z-scores to vec_z, save category label to vec_cat
- //sign of z is determined by matching alleles
- cPar.UpdateSNPnZ (mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
-
- //compute an n by k matrix of X_iWz
- cout<<"Calculating Xz ... "<size1, cPar.n_cvt);
- gsl_matrix *B=gsl_matrix_alloc (Y->size2, W->size2); //B is a d by c matrix
- gsl_matrix *se_B=gsl_matrix_alloc (Y->size2, W->size2);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
- gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
- gsl_vector *eval=gsl_vector_alloc (Y->size1);
- gsl_vector *env=gsl_vector_alloc (Y->size1);
- gsl_vector *weight=gsl_vector_alloc (Y->size1);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
- if (!cPar.file_gxe.empty()) {cPar.CopyGxe (env);}
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size1; i++) {
- wi=gsl_vector_get(weight, i);
- for (size_t j=i; jsize2; j++) {
- wj=gsl_vector_get(weight, j);
- d=gsl_matrix_get(G, i, j);
- if (wi<=0 || wj<=0) {d=0;} else {d/=sqrt(wi*wj);}
- gsl_matrix_set(G, i, j, d);
- if (j!=i) {gsl_matrix_set(G, j, i, d);}
- }
- }
- }
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<size1; i++) {
- wi=gsl_vector_get(weight, i);
- if (wi<=0) {wi=0;} else {wi=sqrt(wi);}
- gsl_vector_view Urow=gsl_matrix_row (U, i);
- gsl_vector_scale (&Urow.vector, wi);
- }
- }
-
- cPar.trace_G=0.0;
- for (size_t i=0; isize; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
-
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- } else {
- ReadFile_eigenU (cPar.file_ku, cPar.error, U);
- if (cPar.error==true) {cout<<"error! fail to read the U file. "<size; i++) {
- if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
- cPar.trace_G+=gsl_vector_get(eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- }
-
- if (cPar.a_mode==31) {
- cPar.WriteMatrix(U, "eigenU");
- cPar.WriteVector(eval, "eigenD");
- } else if (!cPar.file_gene.empty() ) {
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- LMM cLmm;
- cLmm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype
-
- cLmm.WriteFiles();
- cLmm.CopyToParam(cPar);
- } else {
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- //calculate REMLE/MLE estimate and pve for univariate model
- if (cPar.n_ph==1) {
- gsl_vector_view beta=gsl_matrix_row (B, 0);
- gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
-
- cPar.beta_mle_null.clear();
- cPar.se_beta_mle_null.clear();
- for (size_t i=0; isize2; i++) {
- cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
- cPar.beta_remle_null.clear();
- cPar.se_beta_remle_null.clear();
- for (size_t i=0; isize2; i++) {
- cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
- cPar.PrintSummary();
-
- //calculate and output residuals
- if (cPar.a_mode==5) {
- gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
- //obtain Utu and Ute
- gsl_vector_memcpy (y_hat, &UtY_col.vector);
- gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
- double d, u, e;
- for (size_t i=0; isize; i++) {
- d=gsl_vector_get (eval, i);
- u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- gsl_vector_set (Utu_hat, i, u);
- gsl_vector_set (Ute_hat, i, e);
- }
-
- //obtain u and e
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
- //output residuals
- cPar.WriteVector(u_hat, "residU");
- cPar.WriteVector(e_hat, "residE");
-
- gsl_vector_free(u_hat);
- gsl_vector_free(e_hat);
- gsl_vector_free(y_hat);
- }
- }
-
- //Fit LMM or mvLMM
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4) {
- if (cPar.n_ph==1) {
- LMM cLmm;
- cLmm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- if (!cPar.file_bfile.empty()) {
- if (cPar.file_gxe.empty()) {
- cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- } else {
- cLmm.AnalyzePlinkGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
- }
- }
- // WJA added
- else if(!cPar.file_oxford.empty()) {
- cLmm.Analyzebgen (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- }
- else {
- if (cPar.file_gxe.empty()) {
- cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- } else {
- cLmm.AnalyzeBimbamGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
- }
- }
-
- cLmm.WriteFiles();
- cLmm.CopyToParam(cPar);
- } else {
- MVLMM cMvlmm;
- cMvlmm.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- if (cPar.file_gxe.empty()) {
- cMvlmm.AnalyzePlink (U, eval, UtW, UtY);
- } else {
- cMvlmm.AnalyzePlinkGXE (U, eval, UtW, UtY, env);
- }
- }
- else if(!cPar.file_oxford.empty())
- {
- cMvlmm.Analyzebgen (U, eval, UtW, UtY);
- }
- else {
- if (cPar.file_gxe.empty()) {
- cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY);
- } else {
- cMvlmm.AnalyzeBimbamGXE (U, eval, UtW, UtY, env);
- }
- }
-
- cMvlmm.WriteFiles();
- cMvlmm.CopyToParam(cPar);
- }
- }
- }
-
-
- //release all matrices and vectors
- gsl_matrix_free (Y);
- gsl_matrix_free (W);
- gsl_matrix_free(B);
- gsl_matrix_free(se_B);
- gsl_matrix_free (G);
- gsl_matrix_free (U);
- gsl_matrix_free (UtW);
- gsl_matrix_free (UtY);
- gsl_vector_free (eval);
- gsl_vector_free (env);
- }
-
-
- //BSLMM
- if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
- gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
- //set covariates matrix W and phenotype vector y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, y, 0);
-
- //center y, even for case/control data
- cPar.pheno_mean=CenterVector(y);
-
- //run bvsr if rho==1
- if (cPar.rho_min==1 && cPar.rho_max==1) {
- //read genotypes X (not UtX)
- cPar.ReadGenotypes (UtX, G, false);
-
- //perform BSLMM analysis
- BSLMM cBslmm;
- cBslmm.CopyFromParam(cPar);
- time_start=clock();
- cBslmm.MCMC(UtX, y);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmm.CopyToParam(cPar);
- //else, if rho!=1
- } else {
- gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
- gsl_vector *eval=gsl_vector_alloc (y->size);
- gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
- gsl_vector *Uty=gsl_vector_alloc (y->size);
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- cPar.ReadGenotypes (UtX, G, false);
-
- //read relatedness matrix G
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, y, Uty);
-
- //calculate REMLE/MLE estimate and pve
- CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
- cPar.PrintSummary();
-
- //Creat and calcualte UtX, use a large memory
- cout<<"Calculating UtX..."<size, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
- gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
- //set covariates matrix W and phenotype vector y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, y, 0);
-
- //center y, even for case/control data
- cPar.pheno_mean=CenterVector(y);
-
- //run bvsr if rho==1
- if (cPar.rho_min==1 && cPar.rho_max==1) {
- //read genotypes X (not UtX)
- cPar.ReadGenotypes (UtX, G, false);
-
- //perform BSLMM analysis
- BSLMM cBslmm;
- cBslmm.CopyFromParam(cPar);
- time_start=clock();
- cBslmm.MCMC(UtX, y);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmm.CopyToParam(cPar);
- //else, if rho!=1
- } else {
- gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
- gsl_vector *eval=gsl_vector_alloc (y->size);
- gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
- gsl_vector *Uty=gsl_vector_alloc (y->size);
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- cPar.ReadGenotypes (UtX, G, false);
-
- //read relatedness matrix G
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, y, Uty);
-
- //calculate REMLE/MLE estimate and pve
- CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
- cPar.PrintSummary();
-
- //Creat and calcualte UtX, use a large memory
- cout<<"Calculating UtX..."< vec_rs;
- vector vec_sa2, vec_sb2, wab;
- vector > > BF;
-
- //read hyp and bf files (functions defined in BSLMMDAP)
- ReadFile_hyb (cPar.file_hyp, vec_sa2, vec_sb2, wab);
- ReadFile_bf (cPar.file_bf, vec_rs, BF);
-
- cPar.ns_test=vec_rs.size();
- if (wab.size()!=BF[0][0].size()) {cout<<"error! hyp and bf files dimension do not match"< indicator_all;
+ size_t c_bv = 0;
+ for (size_t i = 0; i < cPar.indicator_idv.size(); i++) {
+ indicator_all.push_back(1);
+ if (cPar.indicator_bv[i] == 1) {
+ gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]);
+ c_bv++;
+ }
+ }
+
+ ReadFile_kin(cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // read u
+ cPRDT.AddBV(G, u_hat, y_prdt);
+
+ gsl_matrix_free(G);
+ gsl_vector_free(u_hat);
+ }
+
+ // add beta
+ if (!cPar.file_bfile.empty()) {
+ cPRDT.AnalyzePlink(y_prdt);
+ } else {
+ cPRDT.AnalyzeBimbam(y_prdt);
+ }
+
+ // add mu
+ gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
+
+ // convert y to probability if needed
+ if (cPar.a_mode == 42) {
+ double d;
+ for (size_t i = 0; i < y_prdt->size; i++) {
+ d = gsl_vector_get(y_prdt, i);
+ d = gsl_cdf_gaussian_P(d, 1.0);
+ gsl_vector_set(y_prdt, i, d);
+ }
+ }
+
+ cPRDT.CopyToParam(cPar);
+
+ cPRDT.WriteFiles(y_prdt);
+
+ gsl_vector_free(y_prdt);
+ }
+
+ // Prediction with kinship matrix only; for one or more phenotypes
+ if (cPar.a_mode == 43) {
+ // first, use individuals with full phenotypes to obtain estimates of Vg and
+ // Ve
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
+ gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
+ gsl_vector *eval = gsl_vector_alloc(Y->size1);
+
+ gsl_matrix *Y_full = gsl_matrix_alloc(cPar.ni_cvt, cPar.n_ph);
+ gsl_matrix *W_full = gsl_matrix_alloc(Y_full->size1, cPar.n_cvt);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+ cPar.CopyCvtPhen(W_full, Y_full, 1);
+
+ gsl_matrix *Y_hat = gsl_matrix_alloc(Y_full->size1, cPar.n_ph);
+ gsl_matrix *G_full = gsl_matrix_alloc(Y_full->size1, Y_full->size1);
+ gsl_matrix *H_full = gsl_matrix_alloc(Y_full->size1 * Y_hat->size2,
+ Y_full->size1 * Y_hat->size2);
+
+ // read relatedness matrix G, and matrix G_full
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+ ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G_full);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ CenterMatrix(G_full);
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ // calculate variance component and beta estimates
+ // and then obtain predicted values
+ if (cPar.n_ph == 1) {
+ gsl_vector *beta = gsl_vector_alloc(W->size2);
+ gsl_vector *se_beta = gsl_vector_alloc(W->size2);
+
+ double lambda, logl, vg, ve;
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ // obtain estimates
+ CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, lambda, logl);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, lambda, vg, ve, beta,
+ se_beta);
+
+ cout << "REMLE estimate for vg in the null model = " << vg << endl;
+ cout << "REMLE estimate for ve in the null model = " << ve << endl;
+ cPar.vg_remle_null = vg;
+ cPar.ve_remle_null = ve;
+
+ // obtain Y_hat from fixed effects
+ gsl_vector_view Yhat_col = gsl_matrix_column(Y_hat, 0);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
+
+ // obtain H
+ gsl_matrix_set_identity(H_full);
+ gsl_matrix_scale(H_full, ve);
+ gsl_matrix_scale(G_full, vg);
+ gsl_matrix_add(H_full, G_full);
+
+ // free matrices
+ gsl_vector_free(beta);
+ gsl_vector_free(se_beta);
+ } else {
+ gsl_matrix *Vg = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
+ gsl_matrix *Ve = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
+ gsl_matrix *B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+ gsl_matrix *se_B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+
+ // obtain estimates
+ CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter,
+ cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max,
+ cPar.n_region, Vg, Ve, B, se_B);
+
+ cout << "REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < Vg->size1; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(Vg, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < Ve->size1; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(Ve, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cPar.Vg_remle_null.clear();
+ cPar.Ve_remle_null.clear();
+ for (size_t i = 0; i < Vg->size1; i++) {
+ for (size_t j = i; j < Vg->size2; j++) {
+ cPar.Vg_remle_null.push_back(gsl_matrix_get(Vg, i, j));
+ cPar.Ve_remle_null.push_back(gsl_matrix_get(Ve, i, j));
+ }
+ }
+
+ // obtain Y_hat from fixed effects
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
+
+ // obtain H
+ KroneckerSym(G_full, Vg, H_full);
+ for (size_t i = 0; i < G_full->size1; i++) {
+ gsl_matrix_view H_sub = gsl_matrix_submatrix(
+ H_full, i * Ve->size1, i * Ve->size2, Ve->size1, Ve->size2);
+ gsl_matrix_add(&H_sub.matrix, Ve);
+ }
+
+ // free matrices
+ gsl_matrix_free(Vg);
+ gsl_matrix_free(Ve);
+ gsl_matrix_free(B);
+ gsl_matrix_free(se_B);
+ }
+
+ PRDT cPRDT;
+
+ cPRDT.CopyFromParam(cPar);
+
+ cout << "Predicting Missing Phentypes ... " << endl;
+ time_start = clock();
+ cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ cPRDT.WriteFiles(Y_full);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_matrix_free(UtY);
+ gsl_vector_free(eval);
+
+ gsl_matrix_free(Y_full);
+ gsl_matrix_free(Y_hat);
+ gsl_matrix_free(W_full);
+ gsl_matrix_free(G_full);
+ gsl_matrix_free(H_full);
+ }
+
+ // Generate Kinship matrix
+ if (cPar.a_mode == 21 || cPar.a_mode == 22) {
+ cout << "Calculating Relatedness Matrix ... " << endl;
+
+ gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+
+ time_start = clock();
+ cPar.CalcKin(G);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate relatedness matrix. " << endl;
+ return;
+ }
+
+ if (cPar.a_mode == 21) {
+ cPar.WriteMatrix(G, "cXX");
+ } else {
+ cPar.WriteMatrix(G, "sXX");
+ }
+
+ gsl_matrix_free(G);
+ }
+
+ // Compute the LDSC weights (not implemented yet)
+ if (cPar.a_mode == 72) {
+ cout << "Calculating Weights ... " << endl;
+
+ VARCOV cVarcov;
+ cVarcov.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cVarcov.AnalyzePlink();
+ } else {
+ cVarcov.AnalyzeBimbam();
+ }
+
+ cVarcov.CopyToParam(cPar);
+ }
+
+ // Compute the S matrix (and its variance), that is used for
+ // variance component estimation using summary statistics.
+ if (cPar.a_mode == 25 || cPar.a_mode == 26) {
+ cout << "Calculating the S Matrix ... " << endl;
+
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
+ gsl_vector *ns = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_matrix_set_zero(S);
+ gsl_vector_set_zero(ns);
+
+ gsl_matrix_view S_mat = gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+ gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc);
+
+ gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix_set_zero(K);
+ gsl_matrix_set_zero(A);
+
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+
+ cPar.CopyCvtPhen(W, y, 0);
+
+ set setSnps_beta;
+ map mapRS2wA, mapRS2wK;
+
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &ns_vec.vector);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ gsl_vector_set(ns, cPar.n_vc, cPar.ni_test);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteVector(ns, "size");
+ cPar.WriteVar("snps");
+
+ gsl_matrix_free(S);
+ gsl_vector_free(ns);
+
+ gsl_matrix_free(A);
+ gsl_matrix_free(K);
+
+ gsl_vector_free(y);
+ gsl_matrix_free(K);
+ }
+
+ // Compute the q vector, that is used for variance component estimation using
+ // summary statistics
+ if (cPar.a_mode == 27 || cPar.a_mode == 28) {
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s);
+
+ gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
+
+ vector vec_cat, vec_ni;
+ vector vec_weight, vec_z2;
+ map mapRS2weight;
+ mapRS2weight.clear();
+
+ time_start = clock();
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_total, cPar.ns_total,
+ cPar.ns_test);
+ cout << "## number of total individuals = " << cPar.ni_total << endl;
+ cout << "## number of total SNPs = " << cPar.ns_total << endl;
+ cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+ cout << "Calculating the q vector ... " << endl;
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the q vector. " << endl;
+ return;
+ }
+
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_total);
+
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+ /*
+ for (size_t i=0; isize1, cPar.n_cvt);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+
+ // Fit LM or mvLM
+ if (cPar.n_ph == 1) {
+ LM cLm;
+ cLm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+
+ if (!cPar.file_gene.empty()) {
+ cLm.AnalyzeGene(W,
+ &Y_col.vector); // y is the predictor, not the phenotype
+ } else if (!cPar.file_bfile.empty()) {
+ cLm.AnalyzePlink(W, &Y_col.vector);
+ } else if (!cPar.file_oxford.empty()) {
+ cLm.Analyzebgen(W, &Y_col.vector);
+ } else {
+ cLm.AnalyzeBimbam(W, &Y_col.vector);
+ }
+
+ cLm.WriteFiles();
+ cLm.CopyToParam(cPar);
+ }
+ /*
+ else {
+ MVLM cMvlm;
+ cMvlm.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cMvlm.AnalyzePlink (W, Y);
+ } else {
+ cMvlm.AnalyzeBimbam (W, Y);
+ }
+
+ cMvlm.WriteFiles();
+ cMvlm.CopyToParam(cPar);
+ }
+ */
+ // release all matrices and vectors
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ }
+
+ // VC estimation with one or multiple kinship matrices
+ // REML approach only
+ // if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already,
+ // in param.cpp
+ // for one phenotype only;
+ if (cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) {
+ if (!cPar.file_beta.empty()) {
+ // need to obtain a common set of SNPs between beta file and the genotype
+ // file; these are saved in mapRS2wA and mapRS2wK
+ // normalize the weight in mapRS2wK to have an average of one; each
+ // element of mapRS2wA is 1
+ // update indicator_snps, so that the numbers are in accordance with
+ // mapRS2wK
+ set setSnps_beta;
+ ReadFile_snps_header(cPar.file_beta, setSnps_beta);
+
+ map mapRS2wA, mapRS2wK;
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ cPar.UpdateSNP(mapRS2wK);
+
+ // Setup matrices and vectors.
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+
+ gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+
+ gsl_matrix_set_zero(K);
+ gsl_matrix_set_zero(A);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Vq);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s);
+
+ cPar.CopyCvtPhen(W, y, 0);
+
+ gsl_matrix_view S_mat =
+ gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+ gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
+
+ vector vec_cat, vec_ni;
+ vector vec_weight, vec_z2;
+
+ // read beta, based on the mapRS2wK
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_study, cPar.ns_study,
+ cPar.ns_test);
+
+ cout << "Study Panel: " << endl;
+ cout << "## number of total individuals = " << cPar.ni_study << endl;
+ cout << "## number of total SNPs = " << cPar.ns_study << endl;
+ cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+
+ // compute q
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+
+ // compute S
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &s_vec.vector);
+ cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ // compute vc estimates
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
+ cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
+ cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
+ cPar.v_enrich, cPar.v_se_enrich);
+
+ // if LDSC weights, then compute the weights and run the above steps again
+ if (cPar.a_mode == 62) {
+ // compute the weights and normalize the weights for A
+ cPar.UpdateWeight(1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
+
+ // read beta file again, and update weigths vector
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_study, cPar.ns_total,
+ cPar.ns_test);
+
+ // compute q
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+
+ // compute S
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &s_vec.vector);
+ cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ // compute vc estimates
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
+ cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
+ cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
+ cPar.v_enrich, cPar.v_se_enrich);
+ }
+
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_test);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Vq);
+ gsl_vector_free(q);
+ gsl_vector_free(s);
+
+ gsl_matrix_free(A);
+ gsl_matrix_free(K);
+ gsl_vector_free(y);
+ gsl_matrix_free(W);
+ } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
+ if (!cPar.file_study.empty()) {
+ string sfile = cPar.file_study + ".size.txt";
+ CountFileLines(sfile, cPar.n_vc);
+ } else {
+ string file_name;
+ igzstream infile(cPar.file_mstudy.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open mstudy file: " << cPar.file_study
+ << endl;
+ return;
+ }
+
+ safeGetline(infile, file_name);
+
+ infile.clear();
+ infile.close();
+
+ string sfile = file_name + ".size.txt";
+ CountFileLines(sfile, cPar.n_vc);
+ }
+
+ cPar.n_vc = cPar.n_vc - 1;
+
+ gsl_matrix *S = gsl_matrix_alloc(2 * cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ // gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1,
+ // (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
+ // gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1,
+ // (n_lines*(n_lines+1))/2*(n_lines+1) );
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s_study = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_view S_mat =
+ gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+
+ gsl_matrix_set_zero(Vq);
+ // gsl_matrix_set_zero(V);
+ // gsl_matrix_set_zero(Vslope);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s_study);
+ gsl_vector_set_zero(s_ref);
+
+ if (!cPar.file_study.empty()) {
+ ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
+ } else {
+ ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
+ }
+
+ if (!cPar.file_ref.empty()) {
+ ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
+ cPar.ni_ref);
+ } else {
+ ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
+ cPar.ni_ref);
+ }
+
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+ cout << "## number of individuals in the sample = " << cPar.ni_study
+ << endl;
+ cout << "## number of individuals in the reference = " << cPar.ni_ref
+ << endl;
+
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study,
+ cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total,
+ cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich,
+ cPar.v_se_enrich);
+
+ gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
+ gsl_vector_memcpy(&s_sub.vector, s_ref);
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_ref);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Vq);
+ // gsl_matrix_free (V);
+ // gsl_matrix_free (Vslope);
+ gsl_vector_free(q);
+ gsl_vector_free(s_study);
+ gsl_vector_free(s_ref);
+ gsl_vector_free(s);
+ } else {
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1 * cPar.n_vc);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+
+ // read kinship matrices
+ if (!(cPar.file_mk).empty()) {
+ ReadFile_mk(cPar.file_mk, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G, and obtain v_traceG
+ double d = 0;
+ (cPar.v_traceG).clear();
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ gsl_matrix_view G_sub =
+ gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1);
+ CenterMatrix(&G_sub.matrix);
+ d = 0;
+ for (size_t j = 0; j < G->size1; j++) {
+ d += gsl_matrix_get(&G_sub.matrix, j, j);
+ }
+ d /= (double)G->size1;
+ (cPar.v_traceG).push_back(d);
+ }
+ } else if (!(cPar.file_kin).empty()) {
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+
+ (cPar.v_traceG).clear();
+ double d = 0;
+ for (size_t j = 0; j < G->size1; j++) {
+ d += gsl_matrix_get(G, j, j);
+ }
+ d /= (double)G->size1;
+ (cPar.v_traceG).push_back(d);
+ }
+ /*
+ //eigen-decomposition and calculate trace_G
+ cout<<"Start Eigen-Decomposition..."<size; i++) {
+ if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
+ cPar.trace_G+=gsl_vector_get (eval, i);
+ }
+ cPar.trace_G/=(double)eval->size;
+
+ cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+} else {
+ ReadFile_eigenU (cPar.file_ku, cPar.error, U);
+ if (cPar.error==true) {cout<<"error! fail to read the U file. "<size; i++) {
+ if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
+ cPar.trace_G+=gsl_vector_get(eval, i);
+ }
+ cPar.trace_G/=(double)eval->size;
}
+*/
+ // fit multiple variance components
+ if (cPar.n_ph == 1) {
+ // if (cPar.n_vc==1) {
+ /*
+ //calculate UtW and Uty
+ CalcUtX (U, W, UtW);
+ CalcUtX (U, Y, UtY);
+
+ gsl_vector_view beta=gsl_matrix_row (B, 0);
+ gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
+ gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
+
+ CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null,
+ cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
+
+ cPar.beta_mle_null.clear();
+ cPar.se_beta_mle_null.clear();
+ for (size_t i=0; isize2; i++) {
+ cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
+ cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
+ }
+
+ CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null,
+ cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
+ cPar.beta_remle_null.clear();
+ cPar.se_beta_remle_null.clear();
+ for (size_t i=0; isize2; i++) {
+ cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
+ cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
+ }
+
+ CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
+ cPar.pve_null, cPar.pve_se_null);
+ cPar.PrintSummary();
+
+ //calculate and output residuals
+ if (cPar.a_mode==5) {
+ gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
+
+ //obtain Utu and Ute
+ gsl_vector_memcpy (y_hat, &UtY_col.vector);
+ gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
+
+ double d, u, e;
+ for (size_t i=0; isize; i++) {
+ d=gsl_vector_get (eval, i);
+ u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat,
+ i);
+ e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
+ gsl_vector_set (Utu_hat, i, u);
+ gsl_vector_set (Ute_hat, i, e);
+ }
+
+ //obtain u and e
+ gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
+
+ //output residuals
+ cPar.WriteVector(u_hat, "residU");
+ cPar.WriteVector(e_hat, "residE");
+
+ gsl_vector_free(u_hat);
+ gsl_vector_free(e_hat);
+ gsl_vector_free(y_hat);
+ }
+*/
+ // } else {
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ VC cVc;
+ cVc.CopyFromParam(cPar);
+ if (cPar.a_mode == 61) {
+ cVc.CalcVChe(G, W, &Y_col.vector);
+ } else if (cPar.a_mode == 62) {
+ cVc.CalcVCreml(cPar.noconstrain, G, W, &Y_col.vector);
+ } else {
+ cVc.CalcVCacl(G, W, &Y_col.vector);
+ }
+ cVc.CopyToParam(cPar);
+ // obtain pve from sigma2
+ // obtain se_pve from se_sigma2
+
+ //}
+ }
+ }
+ }
+
+ // compute confidence intervals with additional summary statistics
+ // we do not check the sign of z-scores here, but they have to be matched with
+ // the genotypes
+ if (cPar.a_mode == 66 || cPar.a_mode == 67) {
+ // read reference file first
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Svar = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Svar);
+ gsl_vector_set_zero(s_ref);
+
+ if (!cPar.file_ref.empty()) {
+ ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
+ } else {
+ ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
+ }
+
+ // need to obtain a common set of SNPs between beta file and the genotype
+ // file; these are saved in mapRS2wA and mapRS2wK
+ // normalize the weight in mapRS2wK to have an average of one; each element
+ // of mapRS2wA is 1
+ set setSnps_beta;
+ ReadFile_snps_header(cPar.file_beta, setSnps_beta);
+
+ // obtain the weights for wA, which contains the SNP weights for SNPs used
+ // in the model
+ map mapRS2wK;
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ // set up matrices and vector
+ gsl_matrix *Xz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XWz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XtXWz =
+ gsl_matrix_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
+ gsl_vector *w = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *w1 = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *z = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *s_vec = gsl_vector_alloc(cPar.n_vc);
+
+ vector vec_cat, vec_size;
+ vector vec_z;
+
+ map mapRS2z, mapRS2wA;
+ map mapRS2A1;
+ string file_str;
+
+ // update s_vec, the number of snps in each category
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ vec_size.push_back(0);
+ }
+
+ for (map::const_iterator it = mapRS2wK.begin();
+ it != mapRS2wK.end(); ++it) {
+ vec_size[cPar.mapRS2cat[it->first]]++;
+ }
+
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ gsl_vector_set(s_vec, i, vec_size[i]);
+ }
+
+ // update mapRS2wA using v_pve and s_vec
+ if (cPar.a_mode == 66) {
+ for (map::const_iterator it = mapRS2wK.begin();
+ it != mapRS2wK.end(); ++it) {
+ mapRS2wA[it->first] = 1;
+ }
+ } else {
+ cPar.UpdateWeight(0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
+ }
+
+ // read in z-scores based on allele 0, and save that into a vector
+ ReadFile_beta(cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
+
+ // update snp indicator, save weights to w, save z-scores to vec_z, save
+ // category label to vec_cat
+ // sign of z is determined by matching alleles
+ cPar.UpdateSNPnZ(mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
+
+ // compute an n by k matrix of X_iWz
+ cout << "Calculating Xz ... " << endl;
+
+ gsl_matrix_set_zero(Xz);
+ gsl_vector_set_all(w1, 1);
+
+ if (!cPar.file_bfile.empty()) {
+ file_str = cPar.file_bfile + ".bed";
+ PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+ vec_cat, w1, z, 0, Xz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+ cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
+ } else if (!cPar.file_mbfile.empty()) {
+ MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w1, z, Xz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w1, z, Xz);
+ }
+ /*
+ cout<<"Xz: "<size1, cPar.n_cvt);
+ gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c
+ // matrix
+ gsl_matrix *se_B = gsl_matrix_alloc(Y->size2, W->size2);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
+ gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
+ gsl_vector *eval = gsl_vector_alloc(Y->size1);
+ gsl_vector *env = gsl_vector_alloc(Y->size1);
+ gsl_vector *weight = gsl_vector_alloc(Y->size1);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+ if (!cPar.file_gxe.empty()) {
+ cPar.CopyGxe(env);
+ }
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+
+ // is residual weights are provided, then
+ if (!cPar.file_weight.empty()) {
+ cPar.CopyWeight(weight);
+ double d, wi, wj;
+ for (size_t i = 0; i < G->size1; i++) {
+ wi = gsl_vector_get(weight, i);
+ for (size_t j = i; j < G->size2; j++) {
+ wj = gsl_vector_get(weight, j);
+ d = gsl_matrix_get(G, i, j);
+ if (wi <= 0 || wj <= 0) {
+ d = 0;
+ } else {
+ d /= sqrt(wi * wj);
+ }
+ gsl_matrix_set(G, i, j, d);
+ if (j != i) {
+ gsl_matrix_set(G, j, i, d);
+ }
+ }
+ }
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+
+ if (cPar.a_mode == 31) {
+ cPar.trace_G = EigenDecomp(G, U, eval, 1);
+ } else {
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ }
+
+ if (!cPar.file_weight.empty()) {
+ double wi;
+ for (size_t i = 0; i < U->size1; i++) {
+ wi = gsl_vector_get(weight, i);
+ if (wi <= 0) {
+ wi = 0;
+ } else {
+ wi = sqrt(wi);
+ }
+ gsl_vector_view Urow = gsl_matrix_row(U, i);
+ gsl_vector_scale(&Urow.vector, wi);
+ }
+ }
+
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ } else {
+ ReadFile_eigenU(cPar.file_ku, cPar.error, U);
+ if (cPar.error == true) {
+ cout << "error! fail to read the U file. " << endl;
+ return;
+ }
+
+ ReadFile_eigenD(cPar.file_kd, cPar.error, eval);
+ if (cPar.error == true) {
+ cout << "error! fail to read the D file. " << endl;
+ return;
+ }
+
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ }
+
+ if (cPar.a_mode == 31) {
+ cPar.WriteMatrix(U, "eigenU");
+ cPar.WriteVector(eval, "eigenD");
+ } else if (!cPar.file_gene.empty()) {
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ LMM cLmm;
+ cLmm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector); // y is the predictor, not the phenotype
+
+ cLmm.WriteFiles();
+ cLmm.CopyToParam(cPar);
+ } else {
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ // calculate REMLE/MLE estimate and pve for univariate model
+ if (cPar.n_ph == 1) {
+ gsl_vector_view beta = gsl_matrix_row(B, 0);
+ gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null,
+ cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector,
+ &se_beta.vector);
+
+ cPar.beta_mle_null.clear();
+ cPar.se_beta_mle_null.clear();
+ for (size_t i = 0; i < B->size2; i++) {
+ cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i));
+ cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i));
+ }
+
+ CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_remle_null,
+ cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector,
+ &se_beta.vector);
+ cPar.beta_remle_null.clear();
+ cPar.se_beta_remle_null.clear();
+ for (size_t i = 0; i < B->size2; i++) {
+ cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i));
+ cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i));
+ }
+
+ CalcPve(eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
+ cPar.pve_null, cPar.pve_se_null);
+ cPar.PrintSummary();
+
+ // calculate and output residuals
+ if (cPar.a_mode == 5) {
+ gsl_vector *Utu_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *Ute_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *u_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *e_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *y_hat = gsl_vector_alloc(Y->size1);
+
+ // obtain Utu and Ute
+ gsl_vector_memcpy(y_hat, &UtY_col.vector);
+ gsl_blas_dgemv(CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
+
+ double d, u, e;
+ for (size_t i = 0; i < eval->size; i++) {
+ d = gsl_vector_get(eval, i);
+ u = cPar.l_remle_null * d / (cPar.l_remle_null * d + 1.0) *
+ gsl_vector_get(y_hat, i);
+ e = 1.0 / (cPar.l_remle_null * d + 1.0) * gsl_vector_get(y_hat, i);
+ gsl_vector_set(Utu_hat, i, u);
+ gsl_vector_set(Ute_hat, i, e);
+ }
+
+ // obtain u and e
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
+
+ // output residuals
+ cPar.WriteVector(u_hat, "residU");
+ cPar.WriteVector(e_hat, "residE");
+
+ gsl_vector_free(u_hat);
+ gsl_vector_free(e_hat);
+ gsl_vector_free(y_hat);
+ }
+ }
+
+ // Fit LMM or mvLMM
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4) {
+ if (cPar.n_ph == 1) {
+ LMM cLmm;
+ cLmm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ if (!cPar.file_bfile.empty()) {
+ if (cPar.file_gxe.empty()) {
+ cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector);
+ } else {
+ cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector, env);
+ }
+ }
+ // WJA added
+ else if (!cPar.file_oxford.empty()) {
+ cLmm.Analyzebgen(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
+ } else {
+ if (cPar.file_gxe.empty()) {
+ cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector);
+ } else {
+ cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector, env);
+ }
+ }
+
+ cLmm.WriteFiles();
+ cLmm.CopyToParam(cPar);
+ } else {
+ MVLMM cMvlmm;
+ cMvlmm.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ if (cPar.file_gxe.empty()) {
+ cMvlmm.AnalyzePlink(U, eval, UtW, UtY);
+ } else {
+ cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env);
+ }
+ } else if (!cPar.file_oxford.empty()) {
+ cMvlmm.Analyzebgen(U, eval, UtW, UtY);
+ } else {
+ if (cPar.file_gxe.empty()) {
+ cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY);
+ } else {
+ cMvlmm.AnalyzeBimbamGXE(U, eval, UtW, UtY, env);
+ }
+ }
+
+ cMvlmm.WriteFiles();
+ cMvlmm.CopyToParam(cPar);
+ }
+ }
+ }
+
+ // release all matrices and vectors
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ gsl_matrix_free(B);
+ gsl_matrix_free(se_B);
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_matrix_free(UtY);
+ gsl_vector_free(eval);
+ gsl_vector_free(env);
+ }
+
+ // BSLMM
+ if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
+ gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+
+ // set covariates matrix W and phenotype vector y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, y, 0);
+
+ // center y, even for case/control data
+ cPar.pheno_mean = CenterVector(y);
+
+ // run bvsr if rho==1
+ if (cPar.rho_min == 1 && cPar.rho_max == 1) {
+ // read genotypes X (not UtX)
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // perform BSLMM analysis
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmm.MCMC(UtX, y);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ // else, if rho!=1
+ } else {
+ gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
+ gsl_vector *eval = gsl_vector_alloc(y->size);
+ gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
+ gsl_vector *Uty = gsl_vector_alloc(y->size);
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // read relatedness matrix G
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ } else {
+ cPar.ReadGenotypes(UtX, G, true);
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, y, Uty);
+
+ // calculate REMLE/MLE estimate and pve
+ CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
+ cPar.pve_se_null);
+
+ cPar.PrintSummary();
+
+ // Creat and calcualte UtX, use a large memory
+ cout << "Calculating UtX..." << endl;
+ time_start = clock();
+ CalcUtX(U, UtX);
+ cPar.time_UtX = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // perform BSLMM or BSLMMDAP analysis
+ if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ if (cPar.a_mode == 12) { // ridge regression
+ cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
+ } else { // Run MCMC
+ cBslmm.MCMC(U, UtX, Uty, eval, y);
+ }
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ } else {
+ }
+
+ // release all matrices and vectors
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_vector_free(eval);
+ gsl_vector_free(Uty);
+ }
+ gsl_matrix_free(W);
+ gsl_vector_free(y);
+ gsl_matrix_free(UtX);
+ }
+
+ // BSLMM-DAP
+ if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) {
+ if (cPar.a_mode == 14) {
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
+ gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+
+ // set covariates matrix W and phenotype vector y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, y, 0);
+
+ // center y, even for case/control data
+ cPar.pheno_mean = CenterVector(y);
+
+ // run bvsr if rho==1
+ if (cPar.rho_min == 1 && cPar.rho_max == 1) {
+ // read genotypes X (not UtX)
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // perform BSLMM analysis
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmm.MCMC(UtX, y);
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ // else, if rho!=1
+ } else {
+ gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
+ gsl_vector *eval = gsl_vector_alloc(y->size);
+ gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
+ gsl_vector *Uty = gsl_vector_alloc(y->size);
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // read relatedness matrix G
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ } else {
+ cPar.ReadGenotypes(UtX, G, true);
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, y, Uty);
+
+ // calculate REMLE/MLE estimate and pve
+ CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
+ cPar.pve_se_null);
+
+ cPar.PrintSummary();
+
+ // Creat and calcualte UtX, use a large memory
+ cout << "Calculating UtX..." << endl;
+ time_start = clock();
+ CalcUtX(U, UtX);
+ cPar.time_UtX =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // perform analysis; assume X and y are already centered
+ BSLMMDAP cBslmmDap;
+ cBslmmDap.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmmDap.DAP_CalcBF(U, UtX, Uty, eval, y);
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmmDap.CopyToParam(cPar);
+
+ // release all matrices and vectors
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_vector_free(eval);
+ gsl_vector_free(Uty);
+ }
+
+ gsl_matrix_free(W);
+ gsl_vector_free(y);
+ gsl_matrix_free(UtX);
+ } else if (cPar.a_mode == 15) {
+ // perform EM algorithm and estimate parameters
+ vector vec_rs;
+ vector vec_sa2, vec_sb2, wab;
+ vector>> BF;
+
+ // read hyp and bf files (functions defined in BSLMMDAP)
+ ReadFile_hyb(cPar.file_hyp, vec_sa2, vec_sb2, wab);
+ ReadFile_bf(cPar.file_bf, vec_rs, BF);
+
+ cPar.ns_test = vec_rs.size();
+ if (wab.size() != BF[0][0].size()) {
+ cout << "error! hyp and bf files dimension do not match" << endl;
+ }
+
+ // load annotations
+ gsl_matrix *Ac;
+ gsl_matrix_int *Ad;
+ gsl_vector_int *dlevel;
+ size_t kc, kd;
+ if (!cPar.file_cat.empty()) {
+ ReadFile_cat(cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
+ } else {
+ kc = 0;
+ kd = 0;
+ }
+
+ cout << "## number of blocks = " << BF.size() << endl;
+ cout << "## number of analyzed SNPs = " << vec_rs.size() << endl;
+ cout << "## grid size for hyperparameters = " << wab.size() << endl;
+ cout << "## number of continuous annotations = " << kc << endl;
+ cout << "## number of discrete annotations = " << kd << endl;
+
+ // DAP_EstimateHyper (const size_t kc, const size_t kd, const
+ // vector &vec_rs, const vector &vec_sa2, const
+ // vector &vec_sb2, const vector &wab, const
+ // vector > > &BF, gsl_matrix *Ac, gsl_matrix_int
+ // *Ad, gsl_vector_int *dlevel);
+
+ // perform analysis
+ BSLMMDAP cBslmmDap;
+ cBslmmDap.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmmDap.DAP_EstimateHyper(kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac,
+ Ad, dlevel);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmmDap.CopyToParam(cPar);
+
+ gsl_matrix_free(Ac);
+ gsl_matrix_int_free(Ad);
+ gsl_vector_int_free(dlevel);
+ } else {
+ //
+ }
+ }
+
+ /*
+ //LDR (change 14 to 16?)
+ if (cPar.a_mode==14) {
+ gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
+ gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
+ gsl_matrix *G=gsl_matrix_alloc (1, 1);
+ vector > Xt;
+
+ //set covariates matrix W and phenotype vector y
+ //an intercept is included in W
+ cPar.CopyCvtPhen (W, y, 0);
+ //read in genotype matrix X
+ cPar.ReadGenotypes (Xt, G, false);
+ LDR cLdr;
+ cLdr.CopyFromParam(cPar);
+ time_start=clock();
+ cLdr.VB(Xt, W, y);
-void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
-{
- string file_str;
- file_str=cPar.path_out+"/"+cPar.file_out;
- file_str+=".log.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {cout<<"error writing log file: "<tm_month<<":"<tm_day":"<tm_hour<<":"<tm_min<1) {
- outfile<<"## total pve = "<1) {
- outfile<<"## total pve = "<1) {
- outfile<<"## total pve = "<=1 && cPar.a_mode<=4) || (cPar.a_mode>=51 && cPar.a_mode<=54) ) {
- outfile<<"## time on optimization = "<tm_month<<":"<tm_day":"<tm_hour<<":"<tm_min< 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 per snp = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2 per snp) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ } else if (!cPar.file_beta.empty() &&
+ (cPar.a_mode == 61 || cPar.a_mode == 62)) {
+ outfile << "## number of total individuals in the sample = "
+ << cPar.ni_study << endl;
+ outfile << "## number of total individuals in the reference = "
+ << cPar.ni_total << endl;
+ outfile << "## number of total SNPs in the sample = " << cPar.ns_study
+ << endl;
+ outfile << "## number of total SNPs in the reference panel = "
+ << cPar.ns_total << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+ } else if (!cPar.file_beta.empty() &&
+ (cPar.a_mode == 66 || cPar.a_mode == 67)) {
+ outfile << "## number of total individuals in the sample = "
+ << cPar.ni_total << endl;
+ outfile << "## number of total individuals in the reference = "
+ << cPar.ni_ref << endl;
+ outfile << "## number of total SNPs in the sample = " << cPar.ns_total
+ << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+
+ outfile << "## pve estimates = ";
+ for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+ outfile << " " << cPar.v_pve[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(pve) = ";
+ for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+ outfile << " " << cPar.v_se_pve[i];
+ }
+ outfile << endl;
+
+ if (cPar.n_vc > 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 per snp = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2 per snp) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ } else {
+ outfile << "## number of total individuals = " << cPar.ni_total << endl;
+
+ if (cPar.a_mode == 43) {
+ outfile << "## number of analyzed individuals = " << cPar.ni_cvt << endl;
+ outfile << "## number of individuals with full phenotypes = "
+ << cPar.ni_test << endl;
+ } else if (cPar.a_mode != 27 && cPar.a_mode != 28) {
+ outfile << "## number of analyzed individuals = " << cPar.ni_test << endl;
+ }
+
+ outfile << "## number of covariates = " << cPar.n_cvt << endl;
+ outfile << "## number of phenotypes = " << cPar.n_ph << endl;
+ if (cPar.a_mode == 43) {
+ outfile << "## number of observed data = " << cPar.np_obs << endl;
+ outfile << "## number of missing data = " << cPar.np_miss << endl;
+ }
+ if (cPar.a_mode == 25 || cPar.a_mode == 26 || cPar.a_mode == 27 ||
+ cPar.a_mode == 28 || cPar.a_mode == 61 || cPar.a_mode == 62 ||
+ cPar.a_mode == 63 || cPar.a_mode == 66 || cPar.a_mode == 67) {
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+ }
+
+ if (!(cPar.file_gene).empty()) {
+ outfile << "## number of total genes = " << cPar.ng_total << endl;
+ outfile << "## number of analyzed genes = " << cPar.ng_test << endl;
+ } else if (cPar.file_epm.empty()) {
+ outfile << "## number of total SNPs = " << cPar.ns_total << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ } else {
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ }
+
+ if (cPar.a_mode == 13) {
+ outfile << "## number of cases = " << cPar.ni_case << endl;
+ outfile << "## number of controls = " << cPar.ni_control << endl;
+ }
+ }
+
+ if ((cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) &&
+ cPar.file_cor.empty() && cPar.file_study.empty() &&
+ cPar.file_mstudy.empty()) {
+ // outfile<<"## REMLE log-likelihood in the null model =
+ //"< 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 estimates = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ if (!cPar.file_beta.empty()) {
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ }
+ /*
+ outfile<<"## beta estimate in the null model = ";
+ for (size_t i=0; i= 1 && cPar.a_mode <= 4) ||
+ (cPar.a_mode >= 51 && cPar.a_mode <= 54)) {
+ outfile << "## time on optimization = " << cPar.time_opt << " min "
+ << endl;
+ }
+ if (cPar.a_mode == 11 || cPar.a_mode == 13) {
+ outfile << "## time on proposal = " << cPar.time_Proposal << " min "
+ << endl;
+ outfile << "## time on mcmc = " << cPar.time_opt << " min " << endl;
+ outfile << "## time on Omega = " << cPar.time_Omega << " min " << endl;
+ }
+ if (cPar.a_mode == 41 || cPar.a_mode == 42) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ }
+ if (cPar.a_mode == 43) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ outfile << "## time on predicting phenotypes = " << cPar.time_opt
+ << " min " << endl;
+ }
+ outfile << "##" << endl;
+
+ outfile.close();
+ outfile.clear();
+ return;
+}
--
cgit v1.2.3