From c18588b6d00650b9ce742229fdf1eca7133f58fc Mon Sep 17 00:00:00 2001
From: Peter Carbonetto
Date: Thu, 4 May 2017 14:41:32 -0500
Subject: Local updates made by Xiang---shared via email on May 4, 2017,
 subject: gemma on expression data.

---
 src/gemma.cpp    | 241 +++++++++++++++++++++++++++++++++++++++-----
 src/io.cpp       | 156 +++++++++++++++++++++++++++--
 src/io.h         |   3 +
 src/lmm.cpp      |  24 +++++
 src/mathfunc.cpp |  27 +++++
 src/mathfunc.h   |   1 +
 src/param.cpp    |  84 ++++++++++++++--
 src/param.h      |  11 +-
 src/vc.cpp       | 300 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/vc.h         |   1 +
 10 files changed, 797 insertions(+), 51 deletions(-)

diff --git a/src/gemma.cpp b/src/gemma.cpp
index 682835f..ca9c4aa 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -39,9 +39,12 @@
 #include "vc_float.h"
 #include "lm_float.h"  //for LM class
 #include "bslmm_float.h"  //for BSLMM class
+#include "bslmmdap_float.h"  //for BSLMMDAP class
+#include "ldr_float.h"  //for LDR class
 #include "lmm_float.h"  //for LMM class, and functions CalcLambda, CalcPve, CalcVgVe
 #include "mvlmm_float.h"  //for MVLMM class
 #include "prdt_float.h"	//for PRDT class
+#include "varcov_float.h"  //for MVLMM class
 #include "mathfunc_float.h"	//for a few functions
 #else
 #include "io.h"
@@ -49,9 +52,12 @@
 #include "vc.h"
 #include "lm.h"
 #include "bslmm.h"
+#include "bslmmdap.h"
+#include "ldr.h"
 #include "lmm.h"
 #include "mvlmm.h"
 #include "prdt.h"
+#include "varcov.h"
 #include "mathfunc.h"
 #endif
 
@@ -365,6 +371,8 @@ void GEMMA::PrintHelp(size_t option)
 		cout<<"          options: 1: BSLMM"<<endl;
 		cout<<"                   2: standard ridge regression/GBLUP (no mcmc)"<<endl;
 		cout<<"                   3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
+		cout<<"                   4: BSLMM with DAP for Hyper Parameter Estimation"<<endl;
+		cout<<"                   5: BSLMM with DAP for Fine Mapping"<<endl;
 
 		cout<<" -ldr	  [num]			 "<<" specify analysis options (default 1)."<<endl;
 		cout<<"          options: 1: LDR"<<endl;
@@ -433,7 +441,7 @@ void GEMMA::PrintHelp(size_t option)
 //gq: 27-28
 //eigen: 31-32
 //lmm: 1-5
-//bslmm: 11-13
+//bslmm: 11-15
 //predict: 41-43
 //lm: 51
 //vc: 61
@@ -576,6 +584,20 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
 			str.assign(argv[i]);
 			cPar.file_mcat=str;
 		}
+		else if (strcmp(argv[i], "-catc")==0) {
+			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+			++i;
+			str.clear();
+			str.assign(argv[i]);
+			cPar.file_catc=str;
+		}
+		else if (strcmp(argv[i], "-mcatc")==0) {
+			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+			++i;
+			str.clear();
+			str.assign(argv[i]);
+			cPar.file_mcatc=str;
+		}
 		else if (strcmp(argv[i], "-beta")==0) {
 			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
 			++i;
@@ -583,6 +605,20 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
 			str.assign(argv[i]);
 			cPar.file_beta=str;
 		}
+		else if (strcmp(argv[i], "-bf")==0) {
+			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+			++i;
+			str.clear();
+			str.assign(argv[i]);
+			cPar.file_bf=str;
+		}
+		else if (strcmp(argv[i], "-hyp")==0) {
+			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
+			++i;
+			str.clear();
+			str.assign(argv[i]);
+			cPar.file_hyp=str;
+		}
 		else if (strcmp(argv[i], "-cor")==0) {
 			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
 			++i;
@@ -920,6 +956,7 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
 			str.assign(argv[i]);
 			cPar.a_mode=10+atoi(str.c_str());
 		}
+		/*
 		else if (strcmp(argv[i], "-ldr")==0) {
 			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
 			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=14; continue;}
@@ -928,6 +965,7 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar)
 			str.assign(argv[i]);
 			cPar.a_mode=13+atoi(str.c_str());
 		}
+		*/
 		else if (strcmp(argv[i], "-hmin")==0) {
 			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
 			++i;
@@ -1347,7 +1385,6 @@ void GEMMA::BatchRun (PARAM &cPar)
 		gsl_matrix_free (G);
 	}
 
-	/*
 	//Compute the LDSC weights (not implemented yet)
 	if (cPar.a_mode==72) {
 		cout<<"Calculating Weights ... "<<endl;
@@ -1363,7 +1400,7 @@ void GEMMA::BatchRun (PARAM &cPar)
 
 		cVarcov.CopyToParam(cPar);
 	}
-	*/
+
 
 	//Compute the S matrix (and its variance), that is used for variance component estimation using summary statistics
 	if (cPar.a_mode==25 || cPar.a_mode==26) {
@@ -1471,7 +1508,7 @@ void GEMMA::BatchRun (PARAM &cPar)
 	  gsl_vector_free (s);
 	}
 
-	/*
+
     //Calculate SNP covariance
 	if (cPar.a_mode==71) {
 	  VARCOV cVarcov;
@@ -1485,7 +1522,7 @@ void GEMMA::BatchRun (PARAM &cPar)
 
 	  cVarcov.CopyToParam(cPar);
 	}
-	*/
+
 
 	//LM
 	if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) {  //Fit LM
@@ -1541,7 +1578,7 @@ void GEMMA::BatchRun (PARAM &cPar)
 	//REML approach only
 	//if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp
 	//for one phenotype only;
-	if (cPar.a_mode==61 || cPar.a_mode==62) {
+	if (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) {
 	  if (!cPar.file_beta.empty() ) {
 	    //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
 	    //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
@@ -1866,8 +1903,10 @@ void GEMMA::BatchRun (PARAM &cPar)
 		    cVc.CopyFromParam(cPar);
 		    if (cPar.a_mode==61) {
 		      cVc.CalcVChe (G, W, &Y_col.vector);
-		    } else {
+		    } else if (cPar.a_mode==62) {
 		      cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector);
+		    } else {
+		      cVc.CalcVCacl (G, W, &Y_col.vector);
 		    }
 		    cVc.CopyToParam(cPar);
 		    //obtain pve from sigma2
@@ -2310,7 +2349,7 @@ void GEMMA::BatchRun (PARAM &cPar)
 		//center y, even for case/control data
 		cPar.pheno_mean=CenterVector(y);
 
-		//run bslmm if rho==1
+		//run bvsr if rho==1
 		if (cPar.rho_min==1 && cPar.rho_max==1) {
 		  //read genotypes X (not UtX)
 		  cPar.ReadGenotypes (UtX, G, false);
@@ -2329,7 +2368,6 @@ void GEMMA::BatchRun (PARAM &cPar)
 		gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
 		gsl_vector *Uty=gsl_vector_alloc (y->size);
 
-
 		//read relatedness matrix G
 		if (!(cPar.file_kin).empty()) {
 			cPar.ReadGenotypes (UtX, G, false);
@@ -2373,17 +2411,20 @@ void GEMMA::BatchRun (PARAM &cPar)
 		CalcUtX (U, UtX);
 		cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
 
-		//perform BSLMM analysis
-		BSLMM cBslmm;
-		cBslmm.CopyFromParam(cPar);
-		time_start=clock();
-		if (cPar.a_mode==12) {  //ridge regression
-			cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
-		} else {	//Run MCMC
-			cBslmm.MCMC(U, UtX, Uty, eval, y);
+		//perform BSLMM or BSLMMDAP analysis
+		if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
+		  BSLMM cBslmm;
+		  cBslmm.CopyFromParam(cPar);
+		  time_start=clock();
+		  if (cPar.a_mode==12) {  //ridge regression
+		    cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
+		  } else {	//Run MCMC
+		    cBslmm.MCMC(U, UtX, Uty, eval, y);
+		  }
+		  cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		  cBslmm.CopyToParam(cPar);
+		} else {
 		}
-		cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		cBslmm.CopyToParam(cPar);
 
 		//release all matrices and vectors
 		gsl_matrix_free (G);
@@ -2399,8 +2440,157 @@ void GEMMA::BatchRun (PARAM &cPar)
 	}
 
 
+
+	//BSLMM-DAP
+	if (cPar.a_mode==14 || cPar.a_mode==15 || cPar.a_mode==16) {
+	  if (cPar.a_mode==14) {
+	    gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
+	    gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
+	    gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
+	    gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
+
+	    //set covariates matrix W and phenotype vector y
+	    //an intercept should be included in W,
+	    cPar.CopyCvtPhen (W, y, 0);
+
+	    //center y, even for case/control data
+	    cPar.pheno_mean=CenterVector(y);
+
+	    //run bvsr if rho==1
+	    if (cPar.rho_min==1 && cPar.rho_max==1) {
+	      //read genotypes X (not UtX)
+	      cPar.ReadGenotypes (UtX, G, false);
+
+	      //perform BSLMM analysis
+	      BSLMM cBslmm;
+	      cBslmm.CopyFromParam(cPar);
+	      time_start=clock();
+	      cBslmm.MCMC(UtX, y);
+	      cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+	      cBslmm.CopyToParam(cPar);
+	      //else, if rho!=1
+	    } else {
+	      gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
+	      gsl_vector *eval=gsl_vector_alloc (y->size);
+	      gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
+	      gsl_vector *Uty=gsl_vector_alloc (y->size);
+
+	      //read relatedness matrix G
+	      if (!(cPar.file_kin).empty()) {
+		cPar.ReadGenotypes (UtX, G, false);
+
+		//read relatedness matrix G
+		ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
+		if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
+
+		//center matrix G
+		CenterMatrix (G);
+	      } else {
+		cPar.ReadGenotypes (UtX, G, true);
+	      }
+
+	      //eigen-decomposition and calculate trace_G
+	      cout<<"Start Eigen-Decomposition..."<<endl;
+	      time_start=clock();
+	      cPar.trace_G=EigenDecomp (G, U, eval, 0);
+	      cPar.trace_G=0.0;
+	      for (size_t i=0; i<eval->size; i++) {
+		if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
+		cPar.trace_G+=gsl_vector_get (eval, i);
+	      }
+	      cPar.trace_G/=(double)eval->size;
+	      cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+	      //calculate UtW and Uty
+	      CalcUtX (U, W, UtW);
+	      CalcUtX (U, y, Uty);
+
+	      //calculate REMLE/MLE estimate and pve
+	      CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+	      CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+	      CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
+
+	      cPar.PrintSummary();
+
+	      //Creat and calcualte UtX, use a large memory
+	      cout<<"Calculating UtX..."<<endl;
+	      time_start=clock();
+	      CalcUtX (U, UtX);
+	      cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+	      //perform analysis; assume X and y are already centered
+	      BSLMMDAP cBslmmDap;
+	      cBslmmDap.CopyFromParam(cPar);
+	      time_start=clock();
+	      cBslmmDap.DAP_CalcBF (U, UtX, Uty, eval, y);
+	      cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+	      cBslmmDap.CopyToParam(cPar);
+
+	      //release all matrices and vectors
+	      gsl_matrix_free (G);
+	      gsl_matrix_free (U);
+	      gsl_matrix_free (UtW);
+	      gsl_vector_free (eval);
+	      gsl_vector_free (Uty);
+	    }
+
+	    gsl_matrix_free (W);
+	    gsl_vector_free (y);
+	    gsl_matrix_free (UtX);
+	  } else if (cPar.a_mode==15) {
+	    //perform EM algorithm and estimate parameters
+	    vector<string> vec_rs;
+	    vector<double> vec_sa2, vec_sb2, wab;
+	    vector<vector<vector<double> > > BF;
+
+	    //read hyp and bf files (functions defined in BSLMMDAP)
+	    ReadFile_hyb (cPar.file_hyp, vec_sa2, vec_sb2, wab);
+	    ReadFile_bf (cPar.file_bf, vec_rs, BF);
+
+	    cPar.ns_test=vec_rs.size();
+	    if (wab.size()!=BF[0][0].size()) {cout<<"error! hyp and bf files dimension do not match"<<endl;}
+
+	    //load annotations
+	    gsl_matrix *Ac;
+	    gsl_matrix_int *Ad;
+	    gsl_vector_int *dlevel;
+	    size_t kc, kd;
+	    if (!cPar.file_cat.empty()) {
+	      ReadFile_cat (cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
+	    } else {
+	      kc=0; kd=0;
+	    }
+	    
+	    cout<<"## number of blocks = "<<BF.size()<<endl;
+	    cout<<"## number of analyzed SNPs = "<<vec_rs.size()<<endl;
+	    cout<<"## grid size for hyperparameters = "<<wab.size()<<endl;
+	    cout<<"## number of continuous annotations = "<<kc<<endl;
+	    cout<<"## number of discrete annotations = "<<kd<<endl;
+
+	    //DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel);
+
+	    //perform analysis
+	    BSLMMDAP cBslmmDap;
+	    cBslmmDap.CopyFromParam(cPar);
+	    time_start=clock();
+	    cBslmmDap.DAP_EstimateHyper (kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac, Ad, dlevel);
+	    cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+	    cBslmmDap.CopyToParam(cPar);
+
+	    gsl_matrix_free(Ac);
+	    gsl_matrix_int_free(Ad);
+	    gsl_vector_int_free(dlevel);
+	  } else {
+	    //
+	  }
+
+	}
+
+
+
+
 	/*
-	//LDR
+	//LDR (change 14 to 16?)
 	if (cPar.a_mode==14) {
 		gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
 		gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
@@ -2428,6 +2618,7 @@ void GEMMA::BatchRun (PARAM &cPar)
 		gsl_matrix_free (G);
 	}
 	*/
+
 	cPar.time_total=(clock()-time_begin)/(double(CLOCKS_PER_SEC)*60.0);
 
 	return;
@@ -2584,7 +2775,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
 	    outfile<<"## number of observed data = "<<cPar.np_obs<<endl;
 	    outfile<<"## number of missing data = "<<cPar.np_miss<<endl;
 	  }
-	  if (cPar.a_mode==25 || cPar.a_mode==26 || cPar.a_mode==27 || cPar.a_mode==28 || cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==66 || cPar.a_mode==67) {
+	  if (cPar.a_mode==25 || cPar.a_mode==26 || cPar.a_mode==27 || cPar.a_mode==28 || cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63 || cPar.a_mode==66 || cPar.a_mode==67) {
 	    outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
 	  }
 
@@ -2604,7 +2795,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
 	  }
 	}
 
-	if ( (cPar.a_mode==61 || cPar.a_mode==62) && cPar.file_cor.empty() && cPar.file_study.empty() && cPar.file_mstudy.empty() ) {
+	if ( (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) && cPar.file_cor.empty() && cPar.file_study.empty() && cPar.file_mstudy.empty() ) {
 	    //	        outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
 	  if (cPar.n_ph==1) {
 	    outfile<<"## pve estimates = ";
@@ -2799,7 +2990,7 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
 	 */
 
 
-	if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
+	if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
 	  outfile<<"## estimated mean = "<<cPar.pheno_mean<<endl;
 	}
 
@@ -2818,13 +3009,13 @@ void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
 	outfile<<"## Computation Time:"<<endl;
 	outfile<<"## total computation time = "<<cPar.time_total<<" min "<<endl;
 	outfile<<"## computation time break down: "<<endl;
-	if (cPar.a_mode==21 || cPar.a_mode==22 || cPar.a_mode==11 || cPar.a_mode==13) {
+	if (cPar.a_mode==21 || cPar.a_mode==22 || cPar.a_mode==11 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
 		outfile<<"##      time on calculating relatedness matrix = "<<cPar.time_G<<" min "<<endl;
 	}
 	if (cPar.a_mode==31) {
 		outfile<<"##      time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
 	}
-	if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
+	if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
 		outfile<<"##      time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
 		outfile<<"##      time on calculating UtX = "<<cPar.time_UtX<<" min "<<endl;
 	}
diff --git a/src/io.cpp b/src/io.cpp
index fcbbec7..97b29b0 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -47,9 +47,10 @@
 #include "io.h"
 #endif
 
+
 using namespace std;
 
-bool ReadHeader_io (const string &line, HEADER &header);
+
 
 //Print process bar
 void ProgressBar (string str, double p, double total)
@@ -180,7 +181,7 @@ bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps)
 	//read header
 	HEADER header;
 	!safeGetline(infile, line).eof();
-	ReadHeader_io (line, header);
+	ReadHeader (line, header);
 
 	if (header.rs_col==0 && (header.chr_col==0 || header.pos_col==0) ) {
 	  cout<<"missing rs id in the hearder"<<endl;
@@ -2500,7 +2501,7 @@ bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k
 
 
 //read header to determine which column contains which item
-bool ReadHeader_io (const string &line, HEADER &header)
+bool ReadHeader (const string &line, HEADER &header)
 {
   string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID","MarkerName"};
   set<string> rs_set(rs_ptr, rs_ptr+11);
@@ -2594,8 +2595,17 @@ bool ReadHeader_io (const string &line, HEADER &header)
       if (header.af_col==0) {header.af_col=header.coln+1;} else {cout<<"error! more than two af columns in the file."<<endl; n_error++;}
     } else if (cor_set.count(type)!=0) {
       if (header.cor_col==0) {header.cor_col=header.coln+1;} else {cout<<"error! more than two cor columns in the file."<<endl; n_error++;}
-    } else {}
-
+    } else {
+      string str = ch_ptr;
+      string cat = str.substr(str.size()-2, 2);
+      // continuous
+      if(cat == "_c" || cat =="_C"){
+	header.catc_col.insert(header.coln+1);
+      } else { //discrete
+	header.catd_col.insert(header.coln+1);
+      }
+    }
+    
     ch_ptr=strtok (NULL, " , \t");
     header.coln++;
   }
@@ -2635,7 +2645,7 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_
   //read header
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader (line, header);
 
   //use the header to count the number of categories
   n_vc=header.coln;
@@ -2715,6 +2725,118 @@ bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, siz
 }
 
 
+
+
+
+/*
+//read the continuous category file, record mapR2catc
+bool ReadFile_catc (const string &file_cat, map<string, vector<double> > &mapRS2catc, size_t &n_cat)
+{
+  mapRS2catc.clear();
+
+  igzstream infile (file_cat.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return false;}
+
+  string line;
+  char *ch_ptr;
+
+  string rs, chr, a1, a0, pos, cm;
+  size_t i_cat;// ns_vc=0;
+
+  //read header
+  HEADER header;
+  !safeGetline(infile, line).eof();
+  ReadHeader (line, header);
+
+  //use the header to count the number of categories
+  n_cat=header.coln;
+  if (header.rs_col!=0) {n_cat--;}
+  if (header.chr_col!=0) {n_cat--;}
+  if (header.pos_col!=0) {n_cat--;}
+  if (header.cm_col!=0) {n_cat--;}
+  if (header.a1_col!=0) {n_cat--;}
+  if (header.a0_col!=0) {n_cat--;}
+
+  //set up continous category
+  vector<double> catc;
+  for (size_t i=0; i<n_cat; i++) {
+    catc.push_back(0);
+  }
+
+  //read the following lines to record mapRS2cat
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+    i_cat=0;
+    if (header.rs_col==0) {
+      rs=chr+":"+pos;
+    }
+
+    for (size_t i=0; i<header.coln; i++) {
+      if (header.rs_col!=0 && header.rs_col==i+1) {
+	rs=ch_ptr;
+      } else if (header.chr_col!=0 && header.chr_col==i+1) {
+	chr=ch_ptr;
+      } else if (header.pos_col!=0 && header.pos_col==i+1) {
+	pos=ch_ptr;
+      } else if (header.cm_col!=0 && header.cm_col==i+1) {
+	cm=ch_ptr;
+      } else if (header.a1_col!=0 && header.a1_col==i+1) {
+	a1=ch_ptr;
+      } else if (header.a0_col!=0 && header.a0_col==i+1) {
+	a0=ch_ptr;
+      } else {
+	catc[i_cat]=atof(ch_ptr);
+	i_cat++;
+      }
+
+      ch_ptr=strtok (NULL, " , \t");
+    }
+
+    if (mapRS2catc.count(rs)==0) {mapRS2catc[rs]=catc;}
+
+    //if (mapRS2cat.count(rs)==0) {mapRS2cat[rs]=n_vc+1; ns_vc++;}
+  }
+
+  //if (ns_vc>0) {n_vc++;}
+
+  infile.clear();
+  infile.close();
+
+  return true;
+}
+
+
+
+
+bool ReadFile_mcatc (const string &file_mcat, map<string, vector<double> > &mapRS2catc, size_t &n_cat)
+{
+  mapRS2catc.clear();
+
+  igzstream infile (file_mcat.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open mcategory file: "<<file_mcat<<endl; return false;}
+
+  string file_name;
+  map<string, vector<double> > mapRS2catc_tmp;
+  size_t n_cat_tmp, t=0;
+
+  while (!safeGetline(infile, file_name).eof()) {
+    mapRS2catc_tmp.clear();
+    ReadFile_catc (file_name, mapRS2catc_tmp, n_cat_tmp);
+    mapRS2catc.insert(mapRS2catc_tmp.begin(), mapRS2catc_tmp.end());
+    if (t==0) {n_cat=n_cat_tmp;}
+    if (n_cat!=n_cat_tmp) {cout<<"number of category differs in different mcatc files."<<endl;;}
+
+    t++;
+  }
+
+  return true;
+}
+*/
+
+
+
+
 //read bimbam mean genotype file and calculate kinship matrix; this time, the kinship matrix is not centered, and can contain multiple K matrix
 bool BimbamKin (const string &file_geno, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns)
 {
@@ -3208,7 +3330,7 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vect
   //read header
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader (line, header);
 
   while (!safeGetline(infile, line).eof()) {
     if (isBlankLine(line)) {continue;}
@@ -3281,7 +3403,7 @@ void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2ca
   //read header
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader (line, header);
 
   if (header.n_col==0 ) {
     if ( (header.nobs_col==0 && header.nmis_col==0) && (header.ncase_col==0 && header.ncontrol_col==0) ) {
@@ -3412,7 +3534,7 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
   //read header
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader (line, header);
 
   if (header.n_col==0 ) {
     if ( (header.nobs_col==0 && header.nmis_col==0) && (header.ncase_col==0 && header.ncontrol_col==0) ) {
@@ -3503,7 +3625,6 @@ void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA
 
 
 
-
 void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<size_t> &vec_ni, const vector<double> &vec_weight, const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q, gsl_vector *s)
 {
   gsl_matrix_set_zero (Vq);
@@ -3845,6 +3966,21 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, gsl_vector
   return;
 }
 
+
+//copied from lmm.cpp; is used in the following function compKtoV
+//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
+size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
+	if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
+	size_t index;
+	size_t l, h;
+	if (b>a) {l=a; h=b;} else {l=b; h=a;}
+
+	size_t n=n_cvt+2;
+	index=(2*n-l+2)*(l-1)/2+h-l;
+
+	return index;
+}
+
 //read reference file
 void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni)
 {
diff --git a/src/io.h b/src/io.h
index 14dfcc9..c1b762d 100644
--- a/src/io.h
+++ b/src/io.h
@@ -83,6 +83,9 @@ bool ReadHeader (const string &line, HEADER &header);
 bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_t &n_vc);
 bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, size_t &n_vc);
 
+bool ReadFile_catc (const string &file_cat, map<string, vector<double> > &mapRS2catc, size_t &n_cat);
+bool ReadFile_mcatc (const string &file_mcat, map<string, vector<double> > &mapRS2catc, size_t &n_cat);
+
 bool BimbamKin (const string &file_geno, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns);
 bool PlinkKin (const string &file_bed, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns);
 bool MFILEKin (const size_t mfile_mode, const string &file_mfile, const int display_pace, const vector<int> &indicator_idv, const vector<vector<int> > &mindicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<vector<SNPINFO> > &msnpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns);
diff --git a/src/lmm.cpp b/src/lmm.cpp
index a707534..044f33c 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -183,6 +183,30 @@ void LMM::WriteFiles ()
 	return;
 }
 
+
+
+
+
+
+
+
+
+
+
+//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
+size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
+	if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
+	size_t index;
+	size_t l, h;
+	if (b>a) {l=a; h=b;} else {l=b; h=a;}
+
+	size_t n=n_cvt+2;
+	index=(2*n-l+2)*(l-1)/2+h-l;
+
+	return index;
+}
+
+
 void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, const gsl_matrix *Uab, const gsl_vector *ab, gsl_matrix *Pab)
 {
 	size_t index_ab, index_aw, index_bw, index_ww;
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 915245b..4417f8a 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -165,6 +165,33 @@ void CenterMatrix (gsl_matrix *G, const gsl_matrix *W)
 }
 
 
+//standardize the matrix G such that all diagonal elements = 1
+void StandardizeMatrix (gsl_matrix *G)
+{
+	double d=0.0;
+	vector<double> vec_d;
+
+	for (size_t i=0; i<G->size1; ++i) {
+	  vec_d.push_back(gsl_matrix_get(G, i, i));
+	}
+	for (size_t i=0; i<G->size1; ++i) {
+	  for (size_t j=i; j<G->size2; ++j) {
+	    if (j==i) {
+	      gsl_matrix_set(G, i, j, 1);
+	    } else {
+	      d=gsl_matrix_get(G, i, j);
+	      d/=sqrt(vec_d[i]*vec_d[j]);
+	      gsl_matrix_set(G, i, j, d);
+	      gsl_matrix_set(G, j, i, d);
+	    }
+	  }
+	}
+
+	return;
+}
+
+
+
 //scale the matrix G such that the mean diagonal = 1
 double ScaleMatrix (gsl_matrix *G)
 {
diff --git a/src/mathfunc.h b/src/mathfunc.h
index 98c0e35..84f3186 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -30,6 +30,7 @@ double VectorVar (const gsl_vector *v);
 void CenterMatrix (gsl_matrix *G);
 void CenterMatrix (gsl_matrix *G, const gsl_vector *w);
 void CenterMatrix (gsl_matrix *G, const gsl_matrix *W);
+void StandardizeMatrix (gsl_matrix *G);
 double ScaleMatrix (gsl_matrix *G);
 double CenterVector (gsl_vector *y);
 void CenterVector (gsl_vector *y, const gsl_matrix *W);
diff --git a/src/param.cpp b/src/param.cpp
index 0a63a16..4b8c3a4 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -57,6 +57,7 @@ pheno_mean(0), noconstrain (false),
 h_min(-1), h_max(-1),	h_scale(-1),
 rho_min(0.0), rho_max(1.0),	rho_scale(-1),
 logp_min(0.0), logp_max(0.0), logp_scale(-1),
+h_ngrid(10), rho_ngrid(10),
 s_min(0), s_max(300),
 w_step(100000),	s_step(1000000),
 r_pace(10), w_pace(1000),
@@ -66,7 +67,7 @@ geo_mean(2000.0),
 randseed(-1),
 window_cm(0), window_bp(0), window_ns(0), n_block(200),
 error(false),
-ni_subsample(0), n_cvt(1), n_vc(1),
+ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0),
 time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), time_UtZ(0.0), time_opt(0.0), time_Omega(0.0)
 {}
 
@@ -76,7 +77,14 @@ time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), time_UtZ(0.0), tim
 void PARAM::ReadFiles (void)
 {
 	string file_str;
-
+	/*
+	//read continuous cat file
+	if (!file_mcatc.empty()) {
+	  if (ReadFile_mcatc (file_mcatc, mapRS2catc, n_cat)==false) {error=true;}
+	} else if (!file_catc.empty()) {
+	  if (ReadFile_catc (file_catc, mapRS2catc, n_cat)==false) {error=true;}
+	}
+	*/
 	//read cat file
 	if (!file_mcat.empty()) {
 	  if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;}
@@ -398,7 +406,7 @@ void PARAM::CheckParam (void)
 
 	//check parameters
 	if (k_mode!=1 && k_mode!=2) {cout<<"error! unknown kinship/relatedness input mode: "<<k_mode<<endl; error=true;}
-	if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && a_mode!=21 && a_mode!=22 && a_mode!=25 && a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && a_mode!=62 && a_mode!=66 && a_mode!=67 && a_mode!=71)
+	if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 && a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 && a_mode!=71)
 	{cout<<"error! unknown analysis mode: "<<a_mode<<". make sure -gk or -eigen or -lmm or -bslmm -predict or -calccov is sepcified correctly."<<endl; error=true;}
 	if (miss_level>1) {cout<<"error! missing level needs to be between 0 and 1. current value = "<<miss_level<<endl; error=true;}
 	if (maf_level>0.5) {cout<<"error! maf level needs to be between 0 and 0.5. current value = "<<maf_level<<endl; error=true;}
@@ -550,7 +558,7 @@ void PARAM::CheckParam (void)
 	// WJA added
 	if (!file_oxford.empty()) {flag++;}
 
-	if (flag!=1 && a_mode!=27 && a_mode!=28 && a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62 && a_mode!=66 && a_mode!=67) {
+	if (flag!=1 && a_mode!=15 && a_mode!=27 && a_mode!=28 && a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67) {
 		cout<<"error! either plink binary files, or bimbam mean genotype files, or gene expression files are required."<<endl; error=true;
 	}
 
@@ -578,6 +586,16 @@ void PARAM::CheckParam (void)
 	  }
 	}
 
+
+	if (a_mode==63) {
+	  if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) && file_mk.empty() ) {
+	    cout<<"error! missing relatedness file. "<<endl;  error=true;
+	  }
+	  if ( file_pheno.empty() ) {
+	    cout<<"error! missing phenotype file."<<endl; error=true;
+	  }
+	}
+
 	if (a_mode==66 || a_mode==67) {
 	  if (file_beta.empty() || ( file_mbfile.empty() && file_bfile.empty() && file_mgeno.empty() && file_geno.empty()) ) {
 	    cout<<"error! missing beta file or genotype file."<<endl; error=true;
@@ -630,7 +648,7 @@ void PARAM::CheckParam (void)
 
 	if ((a_mode==43) && file_kin.empty())  {cout<<"error! missing relatedness file. -predict option requires -k option to provide a relatedness file."<<endl;  error=true;}
 
-	if ((a_mode==11 || a_mode==12 || a_mode==13) && !file_cvt.empty() ) {cout<<"error! -bslmm option does not support covariates files."<<endl; error=true;}
+	if ((a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14 || a_mode==16) && !file_cvt.empty() ) {cout<<"error! -bslmm option does not support covariates files."<<endl; error=true;}
 
 	if (a_mode==41 || a_mode==42) {
 		if (!file_cvt.empty() ) {cout<<"error! -predict option does not support covariates files."<<endl; error=true;}
@@ -738,7 +756,7 @@ void PARAM::CheckData (void) {
 		}
 	}
 	*/
-	if (ni_test==0 && file_cor.empty() && file_mstudy.empty() && file_study.empty() && file_beta.empty() ) {
+	if (ni_test==0 && file_cor.empty() && file_mstudy.empty() && file_study.empty() && file_beta.empty() && file_bf.empty() ) {
 		error=true;
 		cout<<"error! number of analyzed individuals equals 0. "<<endl;
 		return;
@@ -759,7 +777,7 @@ void PARAM::CheckData (void) {
 	}
 
 	//output some information
-	if (file_cor.empty() && file_mstudy.empty() && file_study.empty() && a_mode!=27 && a_mode!=28) {
+	if (file_cor.empty() && file_mstudy.empty() && file_study.empty() && a_mode!=15 && a_mode!=27 && a_mode!=28) {
 	  cout<<"## number of total individuals = "<<ni_total<<endl;
 	  if (a_mode==43) {
 	    cout<<"## number of analyzed individuals = "<<ni_cvt<<endl;
@@ -806,7 +824,7 @@ void PARAM::CheckData (void) {
 
 	//set parameters for BSLMM
 	//and check for predict
-	if (a_mode==11 || a_mode==12 || a_mode==13) {
+	if (a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14) {
 		if (a_mode==11) {n_mh=1;}
 		if (logp_min==0) {logp_min=-1.0*log((double)ns_test);}
 
@@ -1598,7 +1616,7 @@ void PARAM::ProcessCvtPhen ()
 	}
 
 	//check ni_test
-	if (ni_test==0) {
+	if (ni_test==0 && a_mode!=15) {
 		error=true;
 		cout<<"error! number of analyzed individuals equals 0. "<<endl;
 		return;
@@ -1641,6 +1659,54 @@ void PARAM::CopyCvt (gsl_matrix *W)
 	return;
 }
 
+/*
+//if there is a column contains all 1's, then flag==1; otherwise flag=0
+void PARAM::CopyA (size_t flag, gsl_matrix *A)
+{
+	size_t ci_test=0;
+	string rs;
+	vector<size_t> flag_vec;
+	vector<double> catc;
+
+	for (size_t j=0; j<n_cat; j++) {
+	  flag_vec.push_back(0);
+	}
+
+	for (vector<int>::size_type i=0; i<indicator_snp.size(); ++i) {
+		if (indicator_snp[i]==0) {continue;}
+		rs=snpInfo[i].rs_number;
+
+		if (mapRS2catc.count(rs)==0) {
+		  for (size_t j=0; j<n_cat; j++) {
+		    gsl_matrix_set (A, ci_test, j, 0);
+		  }
+		} else {
+		  for (size_t j=0; j<n_cat; j++) {
+		    gsl_matrix_set (A, ci_test, j, mapRS2catc[rs][j]);
+		  }
+		}
+
+		if (ci_test==0) {
+		  for (size_t j=0; j<n_cat; j++) {
+		    catc.push_back(mapRS2catc[rs][j]);
+		  }
+		} else {
+		  for (size_t j=0; j<n_cat; j++) {
+		    if (catc[j]==mapRS2catc[rs][j]) {flag_vec[j]++;};
+		  }
+		}
+
+		ci_test++;
+	}
+
+	flag=0;
+	for (size_t j=0; j<n_cat; j++) {
+	  if (flag_vec[j]==0) {flag++;}
+	}
+
+	return;
+}
+*/
 
 void PARAM::CopyGxe (gsl_vector *env)
 {
diff --git a/src/param.h b/src/param.h
index 8db3590..6cb0d97 100644
--- a/src/param.h
+++ b/src/param.h
@@ -109,6 +109,8 @@ public:
     size_t ws_col;
     size_t cor_col;
     size_t coln;//number of columns
+    set<size_t> catc_col;
+    set<size_t> catd_col;
 };
 
 
@@ -129,6 +131,7 @@ public:
 	string file_gxe;		//optional
 	string file_cvt;		//optional
 	string file_cat, file_mcat;
+	string file_catc, file_mcatc;
 	string file_var;
 	string file_beta;
 	string file_cor;
@@ -138,6 +141,7 @@ public:
 	string file_ref, file_mref;
 	string file_weight, file_wsnp, file_wcat;
 	string file_out;
+	string file_bf, file_hyp;
 	string path_out;
 
 
@@ -194,6 +198,7 @@ public:
 	double h_min, h_max, h_scale;			//priors for h
 	double rho_min, rho_max, rho_scale;		//priors for rho
 	double logp_min, logp_max, logp_scale;		//priors for log(pi)
+	size_t h_ngrid, rho_ngrid;
 	size_t s_min, s_max;			//minimum and maximum number of gammas
 	size_t w_step;					//number of warm up/burn in iterations
 	size_t s_step;					//number of sampling iterations
@@ -225,6 +230,7 @@ public:
 	size_t ni_subsample;            //number of subsampled individuals
 	//size_t ni_total_ref, ns_total_ref, ns_pair;//max number of individuals, number of snps and number of snp pairs in the reference panel
 	size_t n_cvt;			//number of covariates
+	size_t n_cat;			//number of continuous categories
 	size_t n_ph;			//number of phenotypes
 	size_t n_vc;			//number of variance components (including the diagonal matrix)
 	double time_total;		//record total time
@@ -262,6 +268,7 @@ public:
 	map<string, double> mapRS2cM;		//map rs# to cM
 	map<string, double> mapRS2est;			//map rs# to parameters
 	map<string, size_t> mapRS2cat;          //map rs# to category number
+	map<string, vector<double> > mapRS2catc;          //map rs# to continuous categories
 	map<string, double> mapRS2wsnp;          //map rs# to snp weights
 	map<string, vector<double> > mapRS2wcat;          //map rs# to snp cat weights
 
@@ -281,6 +288,7 @@ public:
 	void ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K);
 	void CheckCvt ();
 	void CopyCvt (gsl_matrix *W);
+	void CopyA (size_t flag, gsl_matrix *A);
 	void CopyGxe (gsl_vector *gxe);
 	void CopyWeight (gsl_vector *w);
 	void ProcessCvtPhen();
@@ -299,7 +307,6 @@ public:
 	void UpdateSNP (const map<string, double> &mapRS2wA);
 };
 
-size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt);
-  
+
 #endif
 
diff --git a/src/vc.cpp b/src/vc.cpp
index f17a9e9..c0aa40d 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -453,7 +453,7 @@ int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, g
 
 
 //read header to determine which column contains which item
-bool ReadHeader_vc (const string &line, HEADER &header)
+bool ReadHeader (const string &line, HEADER &header)
 {
   string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID"};
   set<string> rs_set(rs_ptr, rs_ptr+10);
@@ -586,7 +586,7 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps, vector<st
 
   //header
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader (line, header);
 
   if (header.n_col==0 ) {
     if (header.nobs_col==0 && header.nmis_col==0) {
@@ -700,7 +700,7 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta, const m
   //read header
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader (line, header);
 
   if (header.n_col==0 ) {
     if (header.nobs_col==0 && header.nmis_col==0) {
@@ -869,7 +869,7 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, const v
   HEADER header;
 
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader (line, header);
 
   while (!safeGetline(infile, line).eof()) {
     //do not read cor values this time; upto col_n-1
@@ -1059,6 +1059,25 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, const v
   return;
 }
 
+
+
+
+
+//copied from lmm.cpp; is used in the following function VCss
+//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
+size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
+	if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
+	size_t index;
+	size_t l, h;
+	if (b>a) {l=a; h=b;} else {l=b; h=a;}
+
+	size_t n=n_cvt+2;
+	index=(2*n-l+2)*(l-1)/2+h-l;
+
+	return index;
+}
+
+
 //use the new method to calculate variance components with summary statistics
 //first, use a function CalcS to compute S matrix (where the diagonal elements are part of V(q) ), and then use bootstrap to compute the variance for S, use a set of genotypes, phenotypes, and individual ids, and snp category label
 void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat, const gsl_vector *q_vec, const gsl_vector *s_vec, const double df, vector<double> &v_pve, vector<double> &v_se_pve, double &pve_total, double &se_pve_total, vector<double> &v_sigma2, vector<double> &v_se_sigma2, vector<double> &v_enrich, vector<double> &v_se_enrich) {
@@ -1336,7 +1355,7 @@ void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y
     gsl_vector_set(q_vec, i, d);
   }
 
-  //compuate yKrKKry, which is used later for confidence interval
+  //compute yKrKKry, which is used later for confidence interval
   for (size_t i=0; i<n_vc; i++) {
     gsl_vector_const_view Kry_coli=gsl_matrix_const_column (Kry, i);
     for (size_t j=i; j<n_vc; j++) {
@@ -1842,6 +1861,277 @@ void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K, const gsl_matrix *W,
 
 
 
+//Ks are not scaled;
+void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y)
+{
+  size_t n1=K->size1, n2=K->size2;
+  size_t n_vc=n2/n1;
+
+  double d, y2_sum, tau_inv, se_tau_inv;
+
+  //new matrices/vectors
+  gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2);
+  gsl_vector *y_scale=gsl_vector_alloc (n1);
+  gsl_vector *y2=gsl_vector_alloc (n1);
+  gsl_vector *n1_vec=gsl_vector_alloc (n1);
+  gsl_matrix *Ay=gsl_matrix_alloc (n1, n_vc);
+  gsl_matrix *K2=gsl_matrix_alloc (n1, n_vc*n_vc);
+  gsl_matrix *K_tmp=gsl_matrix_alloc (n1, n1);
+  gsl_matrix *V_mat=gsl_matrix_alloc (n1, n1);
+
+  //old matrices/vectors
+  gsl_vector *pve=gsl_vector_alloc (n_vc);
+  gsl_vector *se_pve=gsl_vector_alloc (n_vc);
+  gsl_vector *q_vec=gsl_vector_alloc (n_vc);
+  gsl_matrix *S1=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *S2=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *J_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+
+  int sig;
+  gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+
+  //center and scale K by W
+  //and standardize K further so that all diagonal elements are 1
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_matrix_view Kscale_sub = gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+    gsl_matrix_const_view K_sub = gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1);
+    gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix);
+
+    CenterMatrix (&Kscale_sub.matrix, W);
+    StandardizeMatrix (&Kscale_sub.matrix);
+  }
+
+  //center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1)
+  gsl_vector_memcpy (y_scale, y);
+  CenterVector (y_scale, W);
+  //  StandardizeVector (y_scale);
+
+  //compute y^2 and sum(y^2), which is also the variance of y*n1
+  gsl_vector_memcpy (y2, y_scale);
+  gsl_vector_mul (y2, y_scale);
+
+  y2_sum=0;
+  for (size_t i=0; i<y2->size; i++) {
+    y2_sum+=gsl_vector_get(y2, i);
+  }
+
+  //compute the n_vc size q vector
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_matrix_const_view Kscale_sub = gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, n1_vec);
+
+    gsl_blas_ddot (n1_vec, y_scale, &d);
+    gsl_vector_set(q_vec, i, d-y2_sum);
+  }
+
+  //compute the n_vc by n_vc S1 and S2 matrix (and eventually S=S1-\tau^{-1}S2)
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_matrix_const_view Kscale_sub1 = gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+
+    for (size_t j=i; j<n_vc; j++) {
+      gsl_matrix_const_view Kscale_sub2 = gsl_matrix_const_submatrix (K_scale, 0, n1*j, n1, n1);
+
+      gsl_matrix_memcpy (K_tmp, &Kscale_sub1.matrix);
+      gsl_matrix_mul_elements (K_tmp, &Kscale_sub2.matrix);
+
+      gsl_vector_set_zero(n1_vec);
+      for (size_t t=0; t<K_tmp->size1; t++) {
+	gsl_vector_view Ktmp_col=gsl_matrix_column (K_tmp, t);
+	gsl_vector_add (n1_vec, &Ktmp_col.vector);
+      }
+      gsl_vector_add_constant (n1_vec, -1.0);
+
+      //compute S1
+      gsl_blas_ddot (n1_vec, y2, &d);
+      gsl_matrix_set (S1, i, j, 2*d);
+      if (i!=j) {gsl_matrix_set (S1, j, i, 2*d);}
+
+      //compute S2
+      d=0;
+      for (size_t t=0; t<n1_vec->size; t++) {
+	d+=gsl_vector_get (n1_vec, t);
+      }
+      gsl_matrix_set (S2, i, j, d);
+      if (i!=j) {gsl_matrix_set (S2, j, i, d);}
+
+      //save information to compute J
+      gsl_vector_view K2col1=gsl_matrix_column (K2, n_vc*i+j);
+      gsl_vector_view K2col2=gsl_matrix_column (K2, n_vc*j+i);
+
+      gsl_vector_memcpy(&K2col1.vector, n1_vec);
+      if (i!=j) {gsl_vector_memcpy(&K2col2.vector, n1_vec);}
+    }
+  }
+
+  //iterate to solve tau and h's
+  size_t it=0;
+  double s=1;
+  while (abs(s)>1e-3 && it<100) {
+    //update tau_inv
+    gsl_blas_ddot (q_vec, pve, &d);
+    if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;}
+    tau_inv=y2_sum/(double)n1-d/((double)n1*((double)n1-1));
+    if (it>0) {s/=tau_inv;}
+
+    //update S
+    gsl_matrix_memcpy (S_mat, S2);
+    gsl_matrix_scale (S_mat, -1*tau_inv);
+    gsl_matrix_add (S_mat, S1);
+
+    //update h=S^{-1}q
+    int sig;
+    gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+    LUDecomp (S_mat, pmt, &sig);
+    LUInvert (S_mat, pmt, Si_mat);
+    gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+
+    //cout<<tau_inv<<endl;
+    it++;
+  }
+
+  //compute V matrix and A matrix (K_scale is destroyed, so need to compute V first)
+  gsl_matrix_set_zero (V_mat);
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_matrix_view Kscale_sub = gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+
+    //compute V
+    gsl_matrix_memcpy (K_tmp, &Kscale_sub.matrix);
+    gsl_matrix_scale (K_tmp, gsl_vector_get(pve, i));
+    gsl_matrix_add (V_mat, K_tmp);
+
+    //compute A; the corresponding Kscale is destroyed
+    gsl_matrix_const_view K2_sub = gsl_matrix_const_submatrix (K2, 0, n_vc*i, n1, n_vc);
+    gsl_blas_dgemv (CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec);
+
+    for (size_t t=0; t<n1; t++) {
+      gsl_matrix_set (K_scale, t, n1*i+t, gsl_vector_get(n1_vec, t) );
+    }
+
+    //compute Ay
+    gsl_vector_view Ay_col=gsl_matrix_column (Ay, i);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, &Ay_col.vector);
+  }
+  gsl_matrix_scale (V_mat, tau_inv);
+
+  //compute J matrix
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_vector_view Ay_col1=gsl_matrix_column (Ay, i);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, &Ay_col1.vector, 0.0, n1_vec);
+
+    for (size_t j=i; j<n_vc; j++) {
+      gsl_vector_view Ay_col2=gsl_matrix_column (Ay, j);
+
+      gsl_blas_ddot (&Ay_col2.vector, n1_vec, &d);
+      gsl_matrix_set (J_mat, i, j, 2.0*d);
+      if (i!=j) {gsl_matrix_set (J_mat, j, i, 2.0*d);}
+    }
+  }
+
+  //compute H^{-1}JH^{-1} as V(\hat h), where H=S2*tau_inv; this is stored in Var_mat
+  gsl_matrix_memcpy (S_mat, S2);
+  gsl_matrix_scale (S_mat, tau_inv);
+
+  LUDecomp (S_mat, pmt, &sig);
+  LUInvert (S_mat, pmt, Si_mat);
+
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, J_mat, 0.0, S_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, S_mat, Si_mat, 0.0, Var_mat);
+
+  //compute variance for tau_inv
+  gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, y_scale, 0.0, n1_vec);
+  gsl_blas_ddot (y_scale, n1_vec, &d);
+  se_tau_inv=sqrt(2*d)/(double)n1;
+
+  //transform pve back to the original scale and save data
+  v_pve.clear(); v_se_pve.clear();
+  v_sigma2.clear(); v_se_sigma2.clear();
+
+  pve_total=0, se_pve_total=0;
+  for (size_t i=0; i<n_vc; i++) {
+    d=gsl_vector_get (pve, i);
+    pve_total+=d;
+
+    v_pve.push_back(d);
+    v_sigma2.push_back(d*tau_inv/v_traceG[i] );
+
+    d=sqrt(gsl_matrix_get (Var_mat, i, i));
+    v_se_pve.push_back(d);
+    v_se_sigma2.push_back(d*tau_inv/v_traceG[i]);
+
+    //d*=sqrt(var_y/v_traceG[i]-v_sigma2[i]);
+    //v_se_pve.push_back(d/var_y);
+
+    for (size_t j=0; j<n_vc; j++) {
+      se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+    }
+  }
+  v_sigma2.push_back( (1-pve_total)*tau_inv );
+  v_se_sigma2.push_back(sqrt(se_pve_total)*tau_inv );
+  se_pve_total=sqrt(se_pve_total);
+
+  cout<<"sigma2 = ";
+  for (size_t i=0; i<n_vc+1; i++) {
+    cout<<v_sigma2[i]<<" ";
+  }
+  cout<<endl;
+
+  cout<<"se(sigma2) = ";
+  for (size_t i=0; i<n_vc+1; i++) {
+    cout<<v_se_sigma2[i]<<" ";
+  }
+  cout<<endl;
+
+  cout<<"pve = ";
+  for (size_t i=0; i<n_vc; i++) {
+    cout<<v_pve[i]<<" ";
+  }
+  cout<<endl;
+
+  cout<<"se(pve) = ";
+  for (size_t i=0; i<n_vc; i++) {
+    cout<<v_se_pve[i]<<" ";
+  }
+  cout<<endl;
+
+  if (n_vc>1) {
+    cout<<"total pve = "<<pve_total<<endl;
+    cout<<"se(total pve) = "<<se_pve_total<<endl;
+  }
+
+  gsl_permutation_free(pmt);
+
+  gsl_matrix_free(K_scale);
+  gsl_vector_free(y_scale);
+  gsl_vector_free(y2);
+  gsl_vector_free(n1_vec);
+  gsl_matrix_free(Ay);
+  gsl_matrix_free(K2);
+  gsl_matrix_free(K_tmp);
+  gsl_matrix_free(V_mat);
+
+  gsl_vector_free(pve);
+  gsl_vector_free(se_pve);
+  gsl_vector_free(q_vec);
+  gsl_matrix_free(S1);
+  gsl_matrix_free(S2);
+  gsl_matrix_free(S_mat);
+  gsl_matrix_free(Si_mat);
+  gsl_matrix_free(J_mat);
+  gsl_matrix_free(Var_mat);
+
+  return;
+}
+
+
+
+
+
+
+
 //read bimbam mean genotype file and compute XWz
 bool BimbamXwz (const string &file_geno, const int display_pace, vector<int> &indicator_idv, vector<int> &indicator_snp, const vector<size_t> &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz)
 {
diff --git a/src/vc.h b/src/vc.h
index d4a9779..3358433 100644
--- a/src/vc.h
+++ b/src/vc.h
@@ -95,6 +95,7 @@ public:
 	void WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec, const gsl_vector *qvar_vec, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat);
 	void CalcVChe (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y);
 	void CalcVCreml (const bool noconstrain, const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y);
+	void CalcVCacl (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y);
 };
 
 void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat, const gsl_vector *q_vec, const gsl_vector *s_vec, const double df, vector<double> &v_pve, vector<double> &v_se_pve, double &pve_total, double &se_pve_total, vector<double> &v_sigma2, vector<double> &v_se_sigma2, vector<double> &v_enrich, vector<double> &v_se_enrich);
-- 
cgit v1.2.3