35 files changed, 25588 insertions, 24074 deletions
diff --git a/src/bslmm.cpp b/src/bslmm.cpp
index d579802..3305639 100644
--- a/src/bslmm.cpp
+++ b/src/bslmm.cpp
@@ -16,1360 +16,1428 @@
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-#include <iomanip>
+#include <algorithm>
 #include <cmath>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
 #include <iostream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <ctime>
-#include <cstring>
-#include <algorithm>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
 #include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
 #include "gsl/gsl_randist.h"
-#include "gsl/gsl_cdf.h"
 #include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
 
-#include "lapack.h"
-#include "param.h"
 #include "bslmm.h"
-#include "lmm.h"
+#include "lapack.h"
 #include "lm.h"
+#include "lmm.h"
 #include "mathfunc.h"
+#include "param.h"
 
 using namespace std;
 
-void BSLMM::CopyFromParam (PARAM &cPar) {
-	a_mode=cPar.a_mode;
-	d_pace=cPar.d_pace;
-
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
-
-	l_min=cPar.h_min;
-	l_max=cPar.h_max;
-	n_region=cPar.n_region;
-	pve_null=cPar.pve_null;
-	pheno_mean=cPar.pheno_mean;
-
-	time_UtZ=0.0;
-	time_Omega=0.0;
-	n_accept=0;
-
-	h_min=cPar.h_min;
-	h_max=cPar.h_max;
-	h_scale=cPar.h_scale;
-	rho_min=cPar.rho_min;
-	rho_max=cPar.rho_max;
-	rho_scale=cPar.rho_scale;
-	logp_min=cPar.logp_min;
-	logp_max=cPar.logp_max;
-	logp_scale=cPar.logp_scale;
-
-	s_min=cPar.s_min;
-	s_max=cPar.s_max;
-	w_step=cPar.w_step;
-	s_step=cPar.s_step;
-	r_pace=cPar.r_pace;
-	w_pace=cPar.w_pace;
-	n_mh=cPar.n_mh;
-	geo_mean=cPar.geo_mean;
-	randseed=cPar.randseed;
-	trace_G=cPar.trace_G;
-
-	ni_total=cPar.ni_total;
-	ns_total=cPar.ns_total;
-	ni_test=cPar.ni_test;
-	ns_test=cPar.ns_test;
-	n_cvt=cPar.n_cvt;
-
-	indicator_idv=cPar.indicator_idv;
-	indicator_snp=cPar.indicator_snp;
-	snpInfo=cPar.snpInfo;
-
-	return;
+void BSLMM::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
+  d_pace = cPar.d_pace;
+
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
+
+  l_min = cPar.h_min;
+  l_max = cPar.h_max;
+  n_region = cPar.n_region;
+  pve_null = cPar.pve_null;
+  pheno_mean = cPar.pheno_mean;
+
+  time_UtZ = 0.0;
+  time_Omega = 0.0;
+  n_accept = 0;
+
+  h_min = cPar.h_min;
+  h_max = cPar.h_max;
+  h_scale = cPar.h_scale;
+  rho_min = cPar.rho_min;
+  rho_max = cPar.rho_max;
+  rho_scale = cPar.rho_scale;
+  logp_min = cPar.logp_min;
+  logp_max = cPar.logp_max;
+  logp_scale = cPar.logp_scale;
+
+  s_min = cPar.s_min;
+  s_max = cPar.s_max;
+  w_step = cPar.w_step;
+  s_step = cPar.s_step;
+  r_pace = cPar.r_pace;
+  w_pace = cPar.w_pace;
+  n_mh = cPar.n_mh;
+  geo_mean = cPar.geo_mean;
+  randseed = cPar.randseed;
+  trace_G = cPar.trace_G;
+
+  ni_total = cPar.ni_total;
+  ns_total = cPar.ns_total;
+  ni_test = cPar.ni_test;
+  ns_test = cPar.ns_test;
+  n_cvt = cPar.n_cvt;
+
+  indicator_idv = cPar.indicator_idv;
+  indicator_snp = cPar.indicator_snp;
+  snpInfo = cPar.snpInfo;
+
+  return;
 }
 
-void BSLMM::CopyToParam (PARAM &cPar) {
-	cPar.time_UtZ=time_UtZ;
-	cPar.time_Omega=time_Omega;
-	cPar.time_Proposal=time_Proposal;
-	cPar.cHyp_initial=cHyp_initial;
-	cPar.n_accept=n_accept;
-	cPar.pheno_mean=pheno_mean;
-	cPar.randseed=randseed;
+void BSLMM::CopyToParam(PARAM &cPar) {
+  cPar.time_UtZ = time_UtZ;
+  cPar.time_Omega = time_Omega;
+  cPar.time_Proposal = time_Proposal;
+  cPar.cHyp_initial = cHyp_initial;
+  cPar.n_accept = n_accept;
+  cPar.pheno_mean = pheno_mean;
+  cPar.randseed = randseed;
 
-	return;
+  return;
 }
 
-void BSLMM::WriteBV (const gsl_vector *bv) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".bv.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	size_t t=0;
-	for (size_t i=0; i<ni_total; ++i) {
-		if (indicator_idv[i]==0) {
-			outfile<<"NA"<<endl;
-		}
-		else {
-			outfile<<scientific<<setprecision(6)<<
-			  gsl_vector_get(bv, t)<<endl;
-			t++;
-		}
-	}
-
-	outfile.clear();
-	outfile.close();
-	return;
+void BSLMM::WriteBV(const gsl_vector *bv) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".bv.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  size_t t = 0;
+  for (size_t i = 0; i < ni_total; ++i) {
+    if (indicator_idv[i] == 0) {
+      outfile << "NA" << endl;
+    } else {
+      outfile << scientific << setprecision(6) << gsl_vector_get(bv, t) << endl;
+      t++;
+    }
+  }
+
+  outfile.clear();
+  outfile.close();
+  return;
 }
 
-void BSLMM::WriteParam (vector<pair<double, double> > &beta_g,
-			const gsl_vector *alpha, const size_t w) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".param.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;}
-
-	outfile<<"chr"<<"\t"<<"rs"<<"\t"
-			<<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t"
-			<<"beta"<<"\t"<<"gamma"<<endl;
-
-	size_t t=0;
-	for (size_t i=0; i<ns_total; ++i) {
-		if (indicator_snp[i]==0) {continue;}
-
-		outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
-		<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t";
-
-		outfile<<scientific<<setprecision(6)<<
-		  gsl_vector_get(alpha, t)<<"\t";
-		if (beta_g[t].second!=0) {
-			outfile<<beta_g[t].first/beta_g[t].second<<
-			  "\t"<<beta_g[t].second/(double)w<<endl;
-		}
-		else {
-			outfile<<0.0<<"\t"<<0.0<<endl;
-		}
-		t++;
-	}
-
-	outfile.clear();
-	outfile.close();
-	return;
+void BSLMM::WriteParam(vector<pair<double, double>> &beta_g,
+                       const gsl_vector *alpha, const size_t w) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".param.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  outfile << "chr"
+          << "\t"
+          << "rs"
+          << "\t"
+          << "ps"
+          << "\t"
+          << "n_miss"
+          << "\t"
+          << "alpha"
+          << "\t"
+          << "beta"
+          << "\t"
+          << "gamma" << endl;
+
+  size_t t = 0;
+  for (size_t i = 0; i < ns_total; ++i) {
+    if (indicator_snp[i] == 0) {
+      continue;
+    }
+
+    outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+            << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t";
+
+    outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t)
+            << "\t";
+    if (beta_g[t].second != 0) {
+      outfile << beta_g[t].first / beta_g[t].second << "\t"
+              << beta_g[t].second / (double)w << endl;
+    } else {
+      outfile << 0.0 << "\t" << 0.0 << endl;
+    }
+    t++;
+  }
+
+  outfile.clear();
+  outfile.close();
+  return;
 }
 
-void BSLMM::WriteParam (const gsl_vector *alpha) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".param.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	outfile<<"chr"<<"\t"<<"rs"<<"\t"
-			<<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t"
-			<<"beta"<<"\t"<<"gamma"<<endl;
-
-	size_t t=0;
-	for (size_t i=0; i<ns_total; ++i) {
-		if (indicator_snp[i]==0) {continue;}
-
-		outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<
-	          snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t";
-		outfile<<scientific<<setprecision(6)<<
-		  gsl_vector_get(alpha, t)<<"\t";
-		outfile<<0.0<<"\t"<<0.0<<endl;
-		t++;
-	}
-
-	outfile.clear();
-	outfile.close();
-	return;
+void BSLMM::WriteParam(const gsl_vector *alpha) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".param.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  outfile << "chr"
+          << "\t"
+          << "rs"
+          << "\t"
+          << "ps"
+          << "\t"
+          << "n_miss"
+          << "\t"
+          << "alpha"
+          << "\t"
+          << "beta"
+          << "\t"
+          << "gamma" << endl;
+
+  size_t t = 0;
+  for (size_t i = 0; i < ns_total; ++i) {
+    if (indicator_snp[i] == 0) {
+      continue;
+    }
+
+    outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+            << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t";
+    outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t)
+            << "\t";
+    outfile << 0.0 << "\t" << 0.0 << endl;
+    t++;
+  }
+
+  outfile.clear();
+  outfile.close();
+  return;
 }
 
-void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp,
-			 const gsl_matrix *Result_gamma, const size_t w_col) {
-	string file_gamma, file_hyp;
-	file_gamma=path_out+"/"+file_out;
-	file_gamma+=".gamma.txt";
-	file_hyp=path_out+"/"+file_out;
-	file_hyp+=".hyp.txt";
-
-	ofstream outfile_gamma, outfile_hyp;
-
-	if (flag==0) {
-		outfile_gamma.open (file_gamma.c_str(), ofstream::out);
-		outfile_hyp.open (file_hyp.c_str(), ofstream::out);
-		if (!outfile_gamma) {
-		  cout<<"error writing file: "<<file_gamma<<endl;
-		  return;
-		}
-		if (!outfile_hyp) {
-		  cout<<"error writing file: "<<file_hyp<<endl;
-		  return;
-		}
-
-		outfile_hyp<<"h \t pve \t rho \t pge \t pi \t n_gamma"<<endl;
-
-		for (size_t i=0; i<s_max; ++i) {
-			outfile_gamma<<"s"<<i<<"\t";
-		}
-		outfile_gamma<<endl;
-	}
-	else {
-		outfile_gamma.open (file_gamma.c_str(), ofstream::app);
-		outfile_hyp.open (file_hyp.c_str(), ofstream::app);
-		if (!outfile_gamma) {
-		  cout<<"error writing file: "<<file_gamma<<endl;
-		  return;
-		}
-		if (!outfile_hyp) {
-		  cout<<"error writing file: "<<file_hyp<<endl;
-		  return;
-		}
-
-		size_t w;
-		if (w_col==0) {w=w_pace;}
-		else {w=w_col;}
-
-		for (size_t i=0; i<w; ++i) {
-			outfile_hyp<<scientific;
-			for (size_t j=0; j<4; ++j) {
-				outfile_hyp<<setprecision(6)<<
-				  gsl_matrix_get (Result_hyp, i, j)<<"\t";
-			}
-			outfile_hyp<<setprecision(6)<<
-			  exp(gsl_matrix_get (Result_hyp, i, 4))<<"\t";
-			outfile_hyp<<(int)gsl_matrix_get(Result_hyp,i,5)<<"\t";
-			outfile_hyp<<endl;
-		}
-
-		for (size_t i=0; i<w; ++i) {
-			for (size_t j=0; j<s_max; ++j) {
-				outfile_gamma<<
-				  (int)gsl_matrix_get(Result_gamma,i,j)<<"\t";
-			}
-			outfile_gamma<<endl;
-		}
-
-	}
-
-	outfile_hyp.close();
-	outfile_hyp.clear();
-	outfile_gamma.close();
-	outfile_gamma.clear();
-	return;
+void BSLMM::WriteResult(const int flag, const gsl_matrix *Result_hyp,
+                        const gsl_matrix *Result_gamma, const size_t w_col) {
+  string file_gamma, file_hyp;
+  file_gamma = path_out + "/" + file_out;
+  file_gamma += ".gamma.txt";
+  file_hyp = path_out + "/" + file_out;
+  file_hyp += ".hyp.txt";
+
+  ofstream outfile_gamma, outfile_hyp;
+
+  if (flag == 0) {
+    outfile_gamma.open(file_gamma.c_str(), ofstream::out);
+    outfile_hyp.open(file_hyp.c_str(), ofstream::out);
+    if (!outfile_gamma) {
+      cout << "error writing file: " << file_gamma << endl;
+      return;
+    }
+    if (!outfile_hyp) {
+      cout << "error writing file: " << file_hyp << endl;
+      return;
+    }
+
+    outfile_hyp << "h \t pve \t rho \t pge \t pi \t n_gamma" << endl;
+
+    for (size_t i = 0; i < s_max; ++i) {
+      outfile_gamma << "s" << i << "\t";
+    }
+    outfile_gamma << endl;
+  } else {
+    outfile_gamma.open(file_gamma.c_str(), ofstream::app);
+    outfile_hyp.open(file_hyp.c_str(), ofstream::app);
+    if (!outfile_gamma) {
+      cout << "error writing file: " << file_gamma << endl;
+      return;
+    }
+    if (!outfile_hyp) {
+      cout << "error writing file: " << file_hyp << endl;
+      return;
+    }
+
+    size_t w;
+    if (w_col == 0) {
+      w = w_pace;
+    } else {
+      w = w_col;
+    }
+
+    for (size_t i = 0; i < w; ++i) {
+      outfile_hyp << scientific;
+      for (size_t j = 0; j < 4; ++j) {
+        outfile_hyp << setprecision(6) << gsl_matrix_get(Result_hyp, i, j)
+                    << "\t";
+      }
+      outfile_hyp << setprecision(6) << exp(gsl_matrix_get(Result_hyp, i, 4))
+                  << "\t";
+      outfile_hyp << (int)gsl_matrix_get(Result_hyp, i, 5) << "\t";
+      outfile_hyp << endl;
+    }
+
+    for (size_t i = 0; i < w; ++i) {
+      for (size_t j = 0; j < s_max; ++j) {
+        outfile_gamma << (int)gsl_matrix_get(Result_gamma, i, j) << "\t";
+      }
+      outfile_gamma << endl;
+    }
+  }
+
+  outfile_hyp.close();
+  outfile_hyp.clear();
+  outfile_gamma.close();
+  outfile_gamma.clear();
+  return;
 }
 
-void BSLMM::CalcPgamma (double *p_gamma) {
-	double p, s=0.0;
-	for (size_t i=0; i<ns_test; ++i) {
-		p=0.7*gsl_ran_geometric_pdf (i+1, 1.0/geo_mean)+0.3/
-		  (double)ns_test;
-		p_gamma[i]=p;
-		s+=p;
-	}
-	for (size_t i=0; i<ns_test; ++i) {
-		p=p_gamma[i];
-		p_gamma[i]=p/s;
-	}
-	return;
+void BSLMM::CalcPgamma(double *p_gamma) {
+  double p, s = 0.0;
+  for (size_t i = 0; i < ns_test; ++i) {
+    p = 0.7 * gsl_ran_geometric_pdf(i + 1, 1.0 / geo_mean) +
+        0.3 / (double)ns_test;
+    p_gamma[i] = p;
+    s += p;
+  }
+  for (size_t i = 0; i < ns_test; ++i) {
+    p = p_gamma[i];
+    p_gamma[i] = p / s;
+  }
+  return;
 }
 
-void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X,
-		       vector<size_t> &rank) {
-	size_t pos;
-	for (size_t i=0; i<rank.size(); ++i) {
-		pos=mapRank2pos[rank[i]];
-		gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, i);
-		gsl_vector_const_view X_col=gsl_matrix_const_column (X, pos);
-		gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);
-	}
-
-	return;
+void BSLMM::SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X,
+                      vector<size_t> &rank) {
+  size_t pos;
+  for (size_t i = 0; i < rank.size(); ++i) {
+    pos = mapRank2pos[rank[i]];
+    gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, i);
+    gsl_vector_const_view X_col = gsl_matrix_const_column(X, pos);
+    gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector);
+  }
+
+  return;
 }
 
-double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
-			 const double sigma_a2) {
-	double pve, var_y;
+double BSLMM::CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+                        const double sigma_a2) {
+  double pve, var_y;
 
-	gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
-	gsl_vector *Xty=gsl_vector_alloc (UtXgamma->size2);
-	gsl_vector *OiXty=gsl_vector_alloc (UtXgamma->size2);
+  gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
+  gsl_vector *Xty = gsl_vector_alloc(UtXgamma->size2);
+  gsl_vector *OiXty = gsl_vector_alloc(UtXgamma->size2);
 
-	gsl_matrix_set_identity (Omega);
-	gsl_matrix_scale (Omega, 1.0/sigma_a2);
+  gsl_matrix_set_identity(Omega);
+  gsl_matrix_scale(Omega, 1.0 / sigma_a2);
 
-	lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma,
-		      1.0, Omega);
-	gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty);
+  lapack_dgemm((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega);
+  gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty);
 
-	CholeskySolve(Omega, Xty, OiXty);
+  CholeskySolve(Omega, Xty, OiXty);
 
-	gsl_blas_ddot (Xty, OiXty, &pve);
-	gsl_blas_ddot (Uty, Uty, &var_y);
+  gsl_blas_ddot(Xty, OiXty, &pve);
+  gsl_blas_ddot(Uty, Uty, &var_y);
 
-	pve/=var_y;
+  pve /= var_y;
 
-	gsl_matrix_free (Omega);
-	gsl_vector_free (Xty);
-	gsl_vector_free (OiXty);
+  gsl_matrix_free(Omega);
+  gsl_vector_free(Xty);
+  gsl_vector_free(OiXty);
 
-	return pve;
+  return pve;
 }
 
-void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
-			 vector<size_t> &rank, class HYPBSLMM &cHyp,
-			 vector<pair<size_t, double> > &pos_loglr) {
-	double q_genome=gsl_cdf_chisq_Qinv(0.05/(double)ns_test, 1);
-
-	cHyp.n_gamma=0;
-	for (size_t i=0; i<pos_loglr.size(); ++i) {
-		if (2.0*pos_loglr[i].second>q_genome) {cHyp.n_gamma++;}
-	}
-	if (cHyp.n_gamma<10) {cHyp.n_gamma=10;}
-
-	if (cHyp.n_gamma>s_max) {cHyp.n_gamma=s_max;}
-	if (cHyp.n_gamma<s_min) {cHyp.n_gamma=s_min;}
-
-	rank.clear();
-	for (size_t i=0; i<cHyp.n_gamma; ++i) {
-		rank.push_back(i);
-	}
-
-	cHyp.logp=log((double)cHyp.n_gamma/(double)ns_test);
-	cHyp.h=pve_null;
-
-	if (cHyp.logp==0) {cHyp.logp=-0.000001;}
-	if (cHyp.h==0) {cHyp.h=0.1;}
-
-	gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp.n_gamma);
-	SetXgamma (UtXgamma, UtX, rank);
-	double sigma_a2;
-	if (trace_G!=0) {
-	  sigma_a2=cHyp.h*1.0/
-	    (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
-	} else {
-	  sigma_a2=cHyp.h*1.0/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
-	}
-	if (sigma_a2==0) {sigma_a2=0.025;}
-	cHyp.rho=CalcPveLM (UtXgamma, Uty, sigma_a2)/cHyp.h;
-	gsl_matrix_free (UtXgamma);
-
-	if (cHyp.rho>1.0) {cHyp.rho=1.0;}
-
-	if (cHyp.h<h_min) {cHyp.h=h_min;}
-	if (cHyp.h>h_max) {cHyp.h=h_max;}
-	if (cHyp.rho<rho_min) {cHyp.rho=rho_min;}
-	if (cHyp.rho>rho_max) {cHyp.rho=rho_max;}
-	if (cHyp.logp<logp_min) {cHyp.logp=logp_min;}
-	if (cHyp.logp>logp_max) {cHyp.logp=logp_max;}
-
-	cout<<"initial value of h = "<<cHyp.h<<endl;
-	cout<<"initial value of rho = "<<cHyp.rho<<endl;
-	cout<<"initial value of pi = "<<exp(cHyp.logp)<<endl;
-	cout<<"initial value of |gamma| = "<<cHyp.n_gamma<<endl;
-
-	return;
+void BSLMM::InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty,
+                        vector<size_t> &rank, class HYPBSLMM &cHyp,
+                        vector<pair<size_t, double>> &pos_loglr) {
+  double q_genome = gsl_cdf_chisq_Qinv(0.05 / (double)ns_test, 1);
+
+  cHyp.n_gamma = 0;
+  for (size_t i = 0; i < pos_loglr.size(); ++i) {
+    if (2.0 * pos_loglr[i].second > q_genome) {
+      cHyp.n_gamma++;
+    }
+  }
+  if (cHyp.n_gamma < 10) {
+    cHyp.n_gamma = 10;
+  }
+
+  if (cHyp.n_gamma > s_max) {
+    cHyp.n_gamma = s_max;
+  }
+  if (cHyp.n_gamma < s_min) {
+    cHyp.n_gamma = s_min;
+  }
+
+  rank.clear();
+  for (size_t i = 0; i < cHyp.n_gamma; ++i) {
+    rank.push_back(i);
+  }
+
+  cHyp.logp = log((double)cHyp.n_gamma / (double)ns_test);
+  cHyp.h = pve_null;
+
+  if (cHyp.logp == 0) {
+    cHyp.logp = -0.000001;
+  }
+  if (cHyp.h == 0) {
+    cHyp.h = 0.1;
+  }
+
+  gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp.n_gamma);
+  SetXgamma(UtXgamma, UtX, rank);
+  double sigma_a2;
+  if (trace_G != 0) {
+    sigma_a2 = cHyp.h * 1.0 /
+               (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
+  } else {
+    sigma_a2 = cHyp.h * 1.0 / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
+  }
+  if (sigma_a2 == 0) {
+    sigma_a2 = 0.025;
+  }
+  cHyp.rho = CalcPveLM(UtXgamma, Uty, sigma_a2) / cHyp.h;
+  gsl_matrix_free(UtXgamma);
+
+  if (cHyp.rho > 1.0) {
+    cHyp.rho = 1.0;
+  }
+
+  if (cHyp.h < h_min) {
+    cHyp.h = h_min;
+  }
+  if (cHyp.h > h_max) {
+    cHyp.h = h_max;
+  }
+  if (cHyp.rho < rho_min) {
+    cHyp.rho = rho_min;
+  }
+  if (cHyp.rho > rho_max) {
+    cHyp.rho = rho_max;
+  }
+  if (cHyp.logp < logp_min) {
+    cHyp.logp = logp_min;
+  }
+  if (cHyp.logp > logp_max) {
+    cHyp.logp = logp_max;
+  }
+
+  cout << "initial value of h = " << cHyp.h << endl;
+  cout << "initial value of rho = " << cHyp.rho << endl;
+  cout << "initial value of pi = " << exp(cHyp.logp) << endl;
+  cout << "initial value of |gamma| = " << cHyp.n_gamma << endl;
+
+  return;
 }
 
-double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval,
-			     gsl_vector *Utu, gsl_vector *alpha_prime,
-			     class HYPBSLMM &cHyp) {
-	double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h));
-
-	gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size);
-	gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
-
-	double logpost=0.0;
-	double d, ds, uy, Hi_yy=0, logdet_H=0.0;
-	for (size_t i=0; i<ni_test; ++i) {
-		d=gsl_vector_get (K_eval, i)*sigma_b2;
-		ds=d/(d+1.0);
-		d=1.0/(d+1.0);
-		gsl_vector_set (weight_Hi, i, d);
-
-		logdet_H-=log(d);
-		uy=gsl_vector_get (Uty, i);
-		Hi_yy+=d*uy*uy;
-
-		gsl_vector_set (Utu_rand, i,
-				gsl_ran_gaussian(gsl_r, 1)*sqrt(ds));
-	}
-
-	// Sample tau.
-	double tau=1.0;
-	if (a_mode==11) {
-	  tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0,  2.0/Hi_yy);
-	}
-
-	// Sample alpha.
-	gsl_vector_memcpy (alpha_prime, Uty);
-	gsl_vector_mul (alpha_prime, weight_Hi);
-	gsl_vector_scale (alpha_prime, sigma_b2);
-
-	// Sample u.
-	gsl_vector_memcpy (Utu, alpha_prime);
-	gsl_vector_mul (Utu, K_eval);
-	if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));}
-	gsl_vector_add (Utu, Utu_rand);
-
-	// For quantitative traits, calculate pve and ppe.
-	if (a_mode==11) {
-		gsl_blas_ddot (Utu, Utu, &d);
-		cHyp.pve=d/(double)ni_test;
-		cHyp.pve/=cHyp.pve+1.0/tau;
-		cHyp.pge=0.0;
-	}
-
-	// Calculate likelihood.
-	logpost=-0.5*logdet_H;
-	if (a_mode==11) {logpost-=0.5*(double)ni_test*log(Hi_yy);}
-	else {logpost-=0.5*Hi_yy;}
-
-	logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
-	  ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
-
-	gsl_vector_free (Utu_rand);
-	gsl_vector_free (weight_Hi);
-
-	return logpost;
+double BSLMM::CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval,
+                            gsl_vector *Utu, gsl_vector *alpha_prime,
+                            class HYPBSLMM &cHyp) {
+  double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h));
+
+  gsl_vector *Utu_rand = gsl_vector_alloc(Uty->size);
+  gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size);
+
+  double logpost = 0.0;
+  double d, ds, uy, Hi_yy = 0, logdet_H = 0.0;
+  for (size_t i = 0; i < ni_test; ++i) {
+    d = gsl_vector_get(K_eval, i) * sigma_b2;
+    ds = d / (d + 1.0);
+    d = 1.0 / (d + 1.0);
+    gsl_vector_set(weight_Hi, i, d);
+
+    logdet_H -= log(d);
+    uy = gsl_vector_get(Uty, i);
+    Hi_yy += d * uy * uy;
+
+    gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds));
+  }
+
+  // Sample tau.
+  double tau = 1.0;
+  if (a_mode == 11) {
+    tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / Hi_yy);
+  }
+
+  // Sample alpha.
+  gsl_vector_memcpy(alpha_prime, Uty);
+  gsl_vector_mul(alpha_prime, weight_Hi);
+  gsl_vector_scale(alpha_prime, sigma_b2);
+
+  // Sample u.
+  gsl_vector_memcpy(Utu, alpha_prime);
+  gsl_vector_mul(Utu, K_eval);
+  if (a_mode == 11) {
+    gsl_vector_scale(Utu_rand, sqrt(1.0 / tau));
+  }
+  gsl_vector_add(Utu, Utu_rand);
+
+  // For quantitative traits, calculate pve and ppe.
+  if (a_mode == 11) {
+    gsl_blas_ddot(Utu, Utu, &d);
+    cHyp.pve = d / (double)ni_test;
+    cHyp.pve /= cHyp.pve + 1.0 / tau;
+    cHyp.pge = 0.0;
+  }
+
+  // Calculate likelihood.
+  logpost = -0.5 * logdet_H;
+  if (a_mode == 11) {
+    logpost -= 0.5 * (double)ni_test * log(Hi_yy);
+  } else {
+    logpost -= 0.5 * Hi_yy;
+  }
+
+  logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+             ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp));
+
+  gsl_vector_free(Utu_rand);
+  gsl_vector_free(weight_Hi);
+
+  return logpost;
 }
 
-double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma,
-			     const gsl_vector *Uty, const gsl_vector *K_eval,
-			     gsl_vector *UtXb, gsl_vector *Utu,
-			     gsl_vector *alpha_prime, gsl_vector *beta,
-			     class HYPBSLMM &cHyp) {
-	clock_t time_start;
-
-	double sigma_a2=cHyp.h*cHyp.rho/
-	  (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
-	double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h));
-
-	double logpost=0.0;
-	double d, ds, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;
-
-	gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
-						    UtXgamma->size2);
-	gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
-	gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
-	gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
-	gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1);
-	gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
-
-	gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
-
-	logdet_H=0.0; P_yy=0.0;
-	for (size_t i=0; i<ni_test; ++i) {
-		gsl_vector_view UtXgamma_row=
-		  gsl_matrix_row (UtXgamma_eval, i);
-		d=gsl_vector_get (K_eval, i)*sigma_b2;
-		ds=d/(d+1.0);
-		d=1.0/(d+1.0);
-		gsl_vector_set (weight_Hi, i, d);
-
-		logdet_H-=log(d);
-		uy=gsl_vector_get (Uty, i);
-		P_yy+=d*uy*uy;
-		gsl_vector_scale (&UtXgamma_row.vector, d);
-
-		gsl_vector_set(Utu_rand,i,gsl_ran_gaussian(gsl_r,1)*sqrt(ds));
-	}
-
-	// Calculate Omega.
-	gsl_matrix_set_identity (Omega);
-
-	time_start=clock();
-	lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval,
-		      UtXgamma, 1.0, Omega);
-	time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-
-	// Calculate beta_hat.
-	gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
-
-	logdet_O=CholeskySolve(Omega, XtHiy, beta_hat);
-
-	gsl_vector_scale (beta_hat, sigma_a2);
-
-	gsl_blas_ddot (XtHiy, beta_hat, &d);
-	P_yy-=d;
-
-	// Sample tau.
-	double tau=1.0;
-	if (a_mode==11) {
-	  tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0,  2.0/P_yy);
-	}
-
-	// Sample beta.
-	for (size_t i=0; i<beta->size; i++)
-	{
-		d=gsl_ran_gaussian(gsl_r, 1);
-		gsl_vector_set(beta, i, d);
-	}
-	gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
-
-	// This computes inv(L^T(Omega)) %*% beta.
-	gsl_vector_scale(beta, sqrt(sigma_a2/tau));
-	gsl_vector_add(beta, beta_hat);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb);
-
-	// Sample alpha.
-	gsl_vector_memcpy (alpha_prime, Uty);
-	gsl_vector_sub (alpha_prime, UtXb);
-	gsl_vector_mul (alpha_prime, weight_Hi);
-	gsl_vector_scale (alpha_prime, sigma_b2);
-
-	// Sample u.
-	gsl_vector_memcpy (Utu, alpha_prime);
-	gsl_vector_mul (Utu, K_eval);
-
-	if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));}
-	gsl_vector_add (Utu, Utu_rand);
-
-	// For quantitative traits, calculate pve and pge.
-	if (a_mode==11) {
-		gsl_blas_ddot (UtXb, UtXb, &d);
-		cHyp.pge=d/(double)ni_test;
-
-		gsl_blas_ddot (Utu, Utu, &d);
-		cHyp.pve=cHyp.pge+d/(double)ni_test;
-
-		if (cHyp.pve==0) {cHyp.pge=0.0;}
-		else {cHyp.pge/=cHyp.pve;}
-		cHyp.pve/=cHyp.pve+1.0/tau;
-	}
-
-	gsl_matrix_free (UtXgamma_eval);
-	gsl_matrix_free (Omega);
-	gsl_vector_free (XtHiy);
-	gsl_vector_free (beta_hat);
-	gsl_vector_free (Utu_rand);
-	gsl_vector_free (weight_Hi);
-
-	logpost=-0.5*logdet_H-0.5*logdet_O;
-	if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
-	else {logpost-=0.5*P_yy;}
-	logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
-	  ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
-
-	return logpost;
+double BSLMM::CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+                            const gsl_vector *K_eval, gsl_vector *UtXb,
+                            gsl_vector *Utu, gsl_vector *alpha_prime,
+                            gsl_vector *beta, class HYPBSLMM &cHyp) {
+  clock_t time_start;
+
+  double sigma_a2 = cHyp.h * cHyp.rho /
+                    (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
+  double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h));
+
+  double logpost = 0.0;
+  double d, ds, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0;
+
+  gsl_matrix *UtXgamma_eval =
+      gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2);
+  gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
+  gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2);
+  gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2);
+  gsl_vector *Utu_rand = gsl_vector_alloc(UtXgamma->size1);
+  gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1);
+
+  gsl_matrix_memcpy(UtXgamma_eval, UtXgamma);
+
+  logdet_H = 0.0;
+  P_yy = 0.0;
+  for (size_t i = 0; i < ni_test; ++i) {
+    gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i);
+    d = gsl_vector_get(K_eval, i) * sigma_b2;
+    ds = d / (d + 1.0);
+    d = 1.0 / (d + 1.0);
+    gsl_vector_set(weight_Hi, i, d);
+
+    logdet_H -= log(d);
+    uy = gsl_vector_get(Uty, i);
+    P_yy += d * uy * uy;
+    gsl_vector_scale(&UtXgamma_row.vector, d);
+
+    gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds));
+  }
+
+  // Calculate Omega.
+  gsl_matrix_set_identity(Omega);
+
+  time_start = clock();
+  lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0,
+               Omega);
+  time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+  // Calculate beta_hat.
+  gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
+
+  logdet_O = CholeskySolve(Omega, XtHiy, beta_hat);
+
+  gsl_vector_scale(beta_hat, sigma_a2);
+
+  gsl_blas_ddot(XtHiy, beta_hat, &d);
+  P_yy -= d;
+
+  // Sample tau.
+  double tau = 1.0;
+  if (a_mode == 11) {
+    tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy);
+  }
+
+  // Sample beta.
+  for (size_t i = 0; i < beta->size; i++) {
+    d = gsl_ran_gaussian(gsl_r, 1);
+    gsl_vector_set(beta, i, d);
+  }
+  gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
+
+  // This computes inv(L^T(Omega)) %*% beta.
+  gsl_vector_scale(beta, sqrt(sigma_a2 / tau));
+  gsl_vector_add(beta, beta_hat);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb);
+
+  // Sample alpha.
+  gsl_vector_memcpy(alpha_prime, Uty);
+  gsl_vector_sub(alpha_prime, UtXb);
+  gsl_vector_mul(alpha_prime, weight_Hi);
+  gsl_vector_scale(alpha_prime, sigma_b2);
+
+  // Sample u.
+  gsl_vector_memcpy(Utu, alpha_prime);
+  gsl_vector_mul(Utu, K_eval);
+
+  if (a_mode == 11) {
+    gsl_vector_scale(Utu_rand, sqrt(1.0 / tau));
+  }
+  gsl_vector_add(Utu, Utu_rand);
+
+  // For quantitative traits, calculate pve and pge.
+  if (a_mode == 11) {
+    gsl_blas_ddot(UtXb, UtXb, &d);
+    cHyp.pge = d / (double)ni_test;
+
+    gsl_blas_ddot(Utu, Utu, &d);
+    cHyp.pve = cHyp.pge + d / (double)ni_test;
+
+    if (cHyp.pve == 0) {
+      cHyp.pge = 0.0;
+    } else {
+      cHyp.pge /= cHyp.pve;
+    }
+    cHyp.pve /= cHyp.pve + 1.0 / tau;
+  }
+
+  gsl_matrix_free(UtXgamma_eval);
+  gsl_matrix_free(Omega);
+  gsl_vector_free(XtHiy);
+  gsl_vector_free(beta_hat);
+  gsl_vector_free(Utu_rand);
+  gsl_vector_free(weight_Hi);
+
+  logpost = -0.5 * logdet_H - 0.5 * logdet_O;
+  if (a_mode == 11) {
+    logpost -= 0.5 * (double)ni_test * log(P_yy);
+  } else {
+    logpost -= 0.5 * P_yy;
+  }
+  logpost +=
+      ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+      ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
+
+  return logpost;
 }
 
 // Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu,
-			  gsl_vector *z_hat, class HYPBSLMM &cHyp) {
-	double d;
+void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu,
+                         gsl_vector *z_hat, class HYPBSLMM &cHyp) {
+  double d;
 
-	gsl_blas_ddot (Utu, Utu, &d);
-	cHyp.pve=d/(double)ni_test;
+  gsl_blas_ddot(Utu, Utu, &d);
+  cHyp.pve = d / (double)ni_test;
 
-	gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, z_hat);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, z_hat);
 
-	cHyp.pve/=cHyp.pve+1.0;
-	cHyp.pge=0.0;
+  cHyp.pve /= cHyp.pve + 1.0;
+  cHyp.pge = 0.0;
 
-	return;
+  return;
 }
 
 // Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb,
-			  const gsl_vector *Utu, gsl_vector *z_hat,
-			  class HYPBSLMM &cHyp) {
-	double d;
-	gsl_vector *UtXbU=gsl_vector_alloc (Utu->size);
+void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb,
+                         const gsl_vector *Utu, gsl_vector *z_hat,
+                         class HYPBSLMM &cHyp) {
+  double d;
+  gsl_vector *UtXbU = gsl_vector_alloc(Utu->size);
 
-	gsl_blas_ddot (UtXb, UtXb, &d);
-	cHyp.pge=d/(double)ni_test;
+  gsl_blas_ddot(UtXb, UtXb, &d);
+  cHyp.pge = d / (double)ni_test;
 
-	gsl_blas_ddot (Utu, Utu, &d);
-	cHyp.pve=cHyp.pge+d/(double)ni_test;
+  gsl_blas_ddot(Utu, Utu, &d);
+  cHyp.pve = cHyp.pge + d / (double)ni_test;
 
-	gsl_vector_memcpy (UtXbU, Utu);
-	gsl_vector_add (UtXbU, UtXb);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
+  gsl_vector_memcpy(UtXbU, Utu);
+  gsl_vector_add(UtXbU, UtXb);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
 
-	if (cHyp.pve==0) {cHyp.pge=0.0;}
-	else {cHyp.pge/=cHyp.pve;}
+  if (cHyp.pve == 0) {
+    cHyp.pge = 0.0;
+  } else {
+    cHyp.pge /= cHyp.pve;
+  }
 
-	cHyp.pve/=cHyp.pve+1.0;
+  cHyp.pve /= cHyp.pve + 1.0;
 
-	gsl_vector_free(UtXbU);
-	return;
+  gsl_vector_free(UtXbU);
+  return;
 }
 
-void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
-		     gsl_vector *z) {
-	double d1, d2, z_rand=0.0;
-	for (size_t i=0; i<z->size; ++i) {
-		d1=gsl_vector_get (y, i);
-		d2=gsl_vector_get (z_hat, i);
-
-		// y is centered for case control studies.
-		if (d1<=0.0) {
-
-		        // Control, right truncated.
-			do {
-				z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0);
-			} while (z_rand>0.0);
-		}
-		else {
-			do {
-				z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0);
-			} while (z_rand<0.0);
-		}
-
-		gsl_vector_set (z, i, z_rand);
-	}
-
-	return;
+void BSLMM::SampleZ(const gsl_vector *y, const gsl_vector *z_hat,
+                    gsl_vector *z) {
+  double d1, d2, z_rand = 0.0;
+  for (size_t i = 0; i < z->size; ++i) {
+    d1 = gsl_vector_get(y, i);
+    d2 = gsl_vector_get(z_hat, i);
+
+    // y is centered for case control studies.
+    if (d1 <= 0.0) {
+
+      // Control, right truncated.
+      do {
+        z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0);
+      } while (z_rand > 0.0);
+    } else {
+      do {
+        z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0);
+      } while (z_rand < 0.0);
+    }
+
+    gsl_vector_set(z, i, z_rand);
+  }
+
+  return;
 }
 
-double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old,
-			    class HYPBSLMM &cHyp_new, const size_t &repeat) {
+double BSLMM::ProposeHnRho(const class HYPBSLMM &cHyp_old,
+                           class HYPBSLMM &cHyp_new, const size_t &repeat) {
 
-	double h=cHyp_old.h, rho=cHyp_old.rho;
+  double h = cHyp_old.h, rho = cHyp_old.rho;
 
-	double d_h=(h_max-h_min)*h_scale, d_rho=(rho_max-rho_min)*rho_scale;
+  double d_h = (h_max - h_min) * h_scale,
+         d_rho = (rho_max - rho_min) * rho_scale;
 
-	for (size_t i=0; i<repeat; ++i) {
-		h=h+(gsl_rng_uniform(gsl_r)-0.5)*d_h;
-		if (h<h_min) {h=2*h_min-h;}
-		if (h>h_max) {h=2*h_max-h;}
+  for (size_t i = 0; i < repeat; ++i) {
+    h = h + (gsl_rng_uniform(gsl_r) - 0.5) * d_h;
+    if (h < h_min) {
+      h = 2 * h_min - h;
+    }
+    if (h > h_max) {
+      h = 2 * h_max - h;
+    }
 
-		rho=rho+(gsl_rng_uniform(gsl_r)-0.5)*d_rho;
-		if (rho<rho_min) {rho=2*rho_min-rho;}
-		if (rho>rho_max) {rho=2*rho_max-rho;}
-	}
-	cHyp_new.h=h;
-	cHyp_new.rho=rho;
-	return 0.0;
+    rho = rho + (gsl_rng_uniform(gsl_r) - 0.5) * d_rho;
+    if (rho < rho_min) {
+      rho = 2 * rho_min - rho;
+    }
+    if (rho > rho_max) {
+      rho = 2 * rho_max - rho;
+    }
+  }
+  cHyp_new.h = h;
+  cHyp_new.rho = rho;
+  return 0.0;
 }
 
-double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old,
-			 class HYPBSLMM &cHyp_new, const size_t &repeat) {
-	double logp_old=cHyp_old.logp, logp_new=cHyp_old.logp;
-	double log_ratio=0.0;
+double BSLMM::ProposePi(const class HYPBSLMM &cHyp_old,
+                        class HYPBSLMM &cHyp_new, const size_t &repeat) {
+  double logp_old = cHyp_old.logp, logp_new = cHyp_old.logp;
+  double log_ratio = 0.0;
 
-	double d_logp=min(0.1, (logp_max-logp_min)*logp_scale);
+  double d_logp = min(0.1, (logp_max - logp_min) * logp_scale);
 
-	for (size_t i=0; i<repeat; ++i) {
-		logp_new=logp_old+(gsl_rng_uniform(gsl_r)-0.5)*d_logp;
-		if (logp_new<logp_min) {logp_new=2*logp_min-logp_new;}
-		if (logp_new>logp_max) {logp_new=2*logp_max-logp_new;}
-		log_ratio+=logp_new-logp_old;
-		logp_old=logp_new;
-	}
-	cHyp_new.logp=logp_new;
+  for (size_t i = 0; i < repeat; ++i) {
+    logp_new = logp_old + (gsl_rng_uniform(gsl_r) - 0.5) * d_logp;
+    if (logp_new < logp_min) {
+      logp_new = 2 * logp_min - logp_new;
+    }
+    if (logp_new > logp_max) {
+      logp_new = 2 * logp_max - logp_new;
+    }
+    log_ratio += logp_new - logp_old;
+    logp_old = logp_new;
+  }
+  cHyp_new.logp = logp_new;
 
-	return log_ratio;
+  return log_ratio;
 }
 
-bool comp_vec (size_t a, size_t b) {
-	return (a < b);
-}
+bool comp_vec(size_t a, size_t b) { return (a < b); }
+
+double BSLMM::ProposeGamma(const vector<size_t> &rank_old,
+                           vector<size_t> &rank_new, const double *p_gamma,
+                           const class HYPBSLMM &cHyp_old,
+                           class HYPBSLMM &cHyp_new, const size_t &repeat) {
+  map<size_t, int> mapRank2in;
+  size_t r;
+  double unif, logp = 0.0;
+  int flag_gamma;
+  size_t r_add, r_remove, col_id;
+
+  rank_new.clear();
+  if (cHyp_old.n_gamma != rank_old.size()) {
+    cout << "size wrong" << endl;
+  }
+
+  if (cHyp_old.n_gamma != 0) {
+    for (size_t i = 0; i < rank_old.size(); ++i) {
+      r = rank_old[i];
+      rank_new.push_back(r);
+      mapRank2in[r] = 1;
+    }
+  }
+  cHyp_new.n_gamma = cHyp_old.n_gamma;
+
+  for (size_t i = 0; i < repeat; ++i) {
+    unif = gsl_rng_uniform(gsl_r);
+
+    if (unif < 0.40 && cHyp_new.n_gamma < s_max) {
+      flag_gamma = 1;
+    } else if (unif >= 0.40 && unif < 0.80 && cHyp_new.n_gamma > s_min) {
+      flag_gamma = 2;
+    } else if (unif >= 0.80 && cHyp_new.n_gamma > 0 &&
+               cHyp_new.n_gamma < ns_test) {
+      flag_gamma = 3;
+    } else {
+      flag_gamma = 4;
+    }
+
+    if (flag_gamma == 1) {
+
+      // Add a SNP.
+      do {
+        r_add = gsl_ran_discrete(gsl_r, gsl_t);
+      } while (mapRank2in.count(r_add) != 0);
+
+      double prob_total = 1.0;
+      for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
+        r = rank_new[i];
+        prob_total -= p_gamma[r];
+      }
+
+      mapRank2in[r_add] = 1;
+      rank_new.push_back(r_add);
+      cHyp_new.n_gamma++;
+      logp += -log(p_gamma[r_add] / prob_total) - log((double)cHyp_new.n_gamma);
+    } else if (flag_gamma == 2) {
+
+      // Delete a SNP.
+      col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
+      r_remove = rank_new[col_id];
+
+      double prob_total = 1.0;
+      for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
+        r = rank_new[i];
+        prob_total -= p_gamma[r];
+      }
+      prob_total += p_gamma[r_remove];
+
+      mapRank2in.erase(r_remove);
+      rank_new.erase(rank_new.begin() + col_id);
+      logp +=
+          log(p_gamma[r_remove] / prob_total) + log((double)cHyp_new.n_gamma);
+      cHyp_new.n_gamma--;
+    } else if (flag_gamma == 3) {
+
+      // Switch a SNP.
+      col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
+      r_remove = rank_new[col_id];
+
+      // Be careful with the proposal.
+      do {
+        r_add = gsl_ran_discrete(gsl_r, gsl_t);
+      } while (mapRank2in.count(r_add) != 0);
+
+      double prob_total = 1.0;
+      for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
+        r = rank_new[i];
+        prob_total -= p_gamma[r];
+      }
+
+      logp += log(p_gamma[r_remove] /
+                  (prob_total + p_gamma[r_remove] - p_gamma[r_add]));
+      logp -= log(p_gamma[r_add] / prob_total);
+
+      mapRank2in.erase(r_remove);
+      mapRank2in[r_add] = 1;
+      rank_new.erase(rank_new.begin() + col_id);
+      rank_new.push_back(r_add);
+    } else {
+      logp += 0;
+    } // Do not change.
+  }
+
+  stable_sort(rank_new.begin(), rank_new.end(), comp_vec);
 
-double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
-			    vector<size_t> &rank_new,
-			    const double *p_gamma,
-			    const class HYPBSLMM &cHyp_old,
-			    class HYPBSLMM &cHyp_new,
-			    const size_t &repeat) {
-	map<size_t, int> mapRank2in;
-	size_t r;
-	double unif, logp=0.0;
-	int flag_gamma;
-	size_t r_add, r_remove, col_id;
-
-	rank_new.clear();
-	if (cHyp_old.n_gamma!=rank_old.size()) {cout<<"size wrong"<<endl;}
-
-	if (cHyp_old.n_gamma!=0) {
-		for (size_t i=0; i<rank_old.size(); ++i) {
-			r=rank_old[i];
-			rank_new.push_back(r);
-			mapRank2in[r]=1;
-		}
-	}
-	cHyp_new.n_gamma=cHyp_old.n_gamma;
-
-	for (size_t i=0; i<repeat; ++i) {
-		unif=gsl_rng_uniform(gsl_r);
-
-		if (unif < 0.40 && cHyp_new.n_gamma<s_max) {flag_gamma=1;}
-		else if (unif>=0.40 && unif < 0.80 &&
-			 cHyp_new.n_gamma>s_min) {
-		  flag_gamma=2;
-		}
-		else if (unif>=0.80 && cHyp_new.n_gamma>0 &&
-			 cHyp_new.n_gamma<ns_test) {
-		  flag_gamma=3;
-		}
-		else {flag_gamma=4;}
-
-		if(flag_gamma==1)  {
-
-		        // Add a SNP.
-			do {
-				r_add=gsl_ran_discrete (gsl_r, gsl_t);
-			} while (mapRank2in.count(r_add)!=0);
-
-			double prob_total=1.0;
-			for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
-				r=rank_new[i];
-				prob_total-=p_gamma[r];
-			}
-
-			mapRank2in[r_add]=1;
-			rank_new.push_back(r_add);
-			cHyp_new.n_gamma++;
-			logp+=-log(p_gamma[r_add]/prob_total)-
-			  log((double)cHyp_new.n_gamma);
-		}
-		else if (flag_gamma==2) {
-
-		        // Delete a SNP.
-			col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
-			r_remove=rank_new[col_id];
-
-			double prob_total=1.0;
-			for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
-				r=rank_new[i];
-				prob_total-=p_gamma[r];
-			}
-			prob_total+=p_gamma[r_remove];
-
-			mapRank2in.erase(r_remove);
-			rank_new.erase(rank_new.begin()+col_id);
-			logp+=log(p_gamma[r_remove]/prob_total)+
-			  log((double)cHyp_new.n_gamma);
-			cHyp_new.n_gamma--;
-		}
-		else if (flag_gamma==3) {
-
-		        // Switch a SNP.
-			col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
-			r_remove=rank_new[col_id];
-
-		        // Be careful with the proposal.
-			do {
-				r_add=gsl_ran_discrete (gsl_r, gsl_t);
-			} while (mapRank2in.count(r_add)!=0);
-
-			double prob_total=1.0;
-			for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
-				r=rank_new[i];
-				prob_total-=p_gamma[r];
-			}
-
-			logp+=log(p_gamma[r_remove]/
-			  (prob_total+p_gamma[r_remove]-p_gamma[r_add]));
-			logp-=log(p_gamma[r_add]/prob_total);
-
-			mapRank2in.erase(r_remove);
-			mapRank2in[r_add]=1;
-			rank_new.erase(rank_new.begin()+col_id);
-			rank_new.push_back(r_add);
-		}
-		else {logp+=0;} // Do not change.
-	}
-
-	stable_sort (rank_new.begin(), rank_new.end(), comp_vec);
-
-	mapRank2in.clear();
-	return logp;
+  mapRank2in.clear();
+  return logp;
 }
 
-bool comp_lr (pair<size_t, double> a, pair<size_t, double> b) {
-	return (a.second > b.second);
+bool comp_lr(pair<size_t, double> a, pair<size_t, double> b) {
+  return (a.second > b.second);
 }
 
 // If a_mode==13 then Uty==y.
-void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
-		  const gsl_vector *Uty, const gsl_vector *K_eval,
-		  const gsl_vector *y) {
-	clock_t time_start;
-
-	class HYPBSLMM cHyp_old, cHyp_new;
-
-	gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6);
-	gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
-
-	gsl_vector *alpha_prime=gsl_vector_alloc (ni_test);
-	gsl_vector *alpha_new=gsl_vector_alloc (ni_test);
-	gsl_vector *alpha_old=gsl_vector_alloc (ni_test);
-	gsl_vector *Utu=gsl_vector_alloc (ni_test);
-	gsl_vector *Utu_new=gsl_vector_alloc (ni_test);
-	gsl_vector *Utu_old=gsl_vector_alloc (ni_test);
-
-	gsl_vector *UtXb_new=gsl_vector_alloc (ni_test);
-	gsl_vector *UtXb_old=gsl_vector_alloc (ni_test);
-
-	gsl_vector *z_hat=gsl_vector_alloc (ni_test);
-	gsl_vector *z=gsl_vector_alloc (ni_test);
-	gsl_vector *Utz=gsl_vector_alloc (ni_test);
-
-	gsl_vector_memcpy (Utz, Uty);
-
-	double logPost_new, logPost_old;
-	double logMHratio;
-	double mean_z=0.0;
-
-	gsl_matrix_set_zero (Result_gamma);
-	gsl_vector_set_zero (Utu);
-	gsl_vector_set_zero (alpha_prime);
-	if (a_mode==13) {
-		pheno_mean=0.0;
-	}
-
-	vector<pair<double, double> > beta_g;
-	for (size_t i=0; i<ns_test; i++) {
-		beta_g.push_back(make_pair(0.0, 0.0));
-	}
-
-	vector<size_t> rank_new, rank_old;
-	vector<double> beta_new, beta_old;
-
-	vector<pair<size_t, double> > pos_loglr;
-
-	time_start=clock();
-	MatrixCalcLR (U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr);
-	time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	stable_sort (pos_loglr.begin(), pos_loglr.end(), comp_lr);
-	for (size_t i=0; i<ns_test; ++i) {
-		mapRank2pos[i]=pos_loglr[i].first;
-	}
-
-	// Calculate proposal distribution for gamma (unnormalized),
-	// and set up gsl_r and gsl_t.
-	gsl_rng_env_setup();
-	const gsl_rng_type * gslType;
-	gslType = gsl_rng_default;
-	if (randseed<0)
-	{
-		time_t rawtime;
-		time (&rawtime);
-		tm * ptm = gmtime (&rawtime);
-
-		randseed = (unsigned) (ptm->tm_hour%24*3600+
-				       ptm->tm_min*60+ptm->tm_sec);
-	}
-	gsl_r = gsl_rng_alloc(gslType);
-	gsl_rng_set(gsl_r, randseed);
-
-	double *p_gamma = new double[ns_test];
-	CalcPgamma (p_gamma);
-
-	gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma);
-
-	// Initial parameters.
-	InitialMCMC (UtX, Utz, rank_old, cHyp_old, pos_loglr);
-
-	cHyp_initial=cHyp_old;
-
-	if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
-		logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old,
-					  cHyp_old);
-
-		beta_old.clear();
-		for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
-		  beta_old.push_back(0);
-		}
-	}
-	else {
-		gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test,
-						       cHyp_old.n_gamma);
-		gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma);
-		SetXgamma (UtXgamma, UtX, rank_old);
-		logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old,
-					  Utu_old, alpha_old, beta, cHyp_old);
-
-		beta_old.clear();
-		for (size_t i=0; i<beta->size; ++i) {
-			beta_old.push_back(gsl_vector_get(beta, i));
-		}
-		gsl_matrix_free (UtXgamma);
-		gsl_vector_free (beta);
-	}
-
-	// Calculate centered z_hat, and pve.
-	if (a_mode==13) {
-		time_start=clock();
-		if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
-			CalcCC_PVEnZ (U, Utu_old, z_hat, cHyp_old);
-		}
-		else {
-			CalcCC_PVEnZ (U, UtXb_old, Utu_old, z_hat, cHyp_old);
-		}
-		time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	}
-
-	// Start MCMC.
-	int accept;
-	size_t total_step=w_step+s_step;
-	size_t w=0, w_col, pos;
-	size_t repeat=0;
-
-	for (size_t t=0; t<total_step; ++t) {
-		if (t%d_pace==0 || t==total_step-1) {
-		  ProgressBar ("Running MCMC ", t, total_step-1,
-			       (double)n_accept/(double)(t*n_mh+1));
-		}
-
-		if (a_mode==13) {
-			SampleZ (y, z_hat, z);
-			mean_z=CenterVector (z);
-
-			time_start=clock();
-			gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz);
-			time_UtZ+=(clock()-time_start)/
-			  (double(CLOCKS_PER_SEC)*60.0);
-
-			// First proposal.
-			if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
-				logPost_old=
-				  CalcPosterior(Utz, K_eval, Utu_old,
-						alpha_old, cHyp_old);
-				beta_old.clear();
-				for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
-				  beta_old.push_back(0);
-				}
-			}
-			else {
-				gsl_matrix *UtXgamma=
-				  gsl_matrix_alloc (ni_test, cHyp_old.n_gamma);
-				gsl_vector *beta=
-				  gsl_vector_alloc (cHyp_old.n_gamma);
-				SetXgamma (UtXgamma, UtX, rank_old);
-				logPost_old=
-				  CalcPosterior(UtXgamma, Utz, K_eval,
-						UtXb_old, Utu_old, alpha_old,
-						beta, cHyp_old);
-
-				beta_old.clear();
-				for (size_t i=0; i<beta->size; ++i) {
-				  beta_old.push_back(gsl_vector_get(beta, i));
-				}
-				gsl_matrix_free (UtXgamma);
-				gsl_vector_free (beta);
-			}
-		}
-
-		// M-H steps.
-		for (size_t i=0; i<n_mh; ++i) {
-			if (gsl_rng_uniform(gsl_r)<0.33) {
-			  repeat = 1+gsl_rng_uniform_int(gsl_r, 20);
-			}
-			else {
-			  repeat=1;
-			}
-
-			logMHratio=0.0;
-			logMHratio+=ProposeHnRho(cHyp_old, cHyp_new, repeat);
-			logMHratio+=ProposeGamma (rank_old, rank_new, p_gamma,
-						  cHyp_old, cHyp_new, repeat);
-			logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat);
-
-			if (cHyp_new.n_gamma==0 || cHyp_new.rho==0) {
-				logPost_new=CalcPosterior(Utz, K_eval, Utu_new,
-							  alpha_new, cHyp_new);
-				beta_new.clear();
-				for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
-				  beta_new.push_back(0);
-				}
-			}
-			else {
-				gsl_matrix *UtXgamma=
-				  gsl_matrix_alloc (ni_test, cHyp_new.n_gamma);
-				gsl_vector *beta=
-				  gsl_vector_alloc (cHyp_new.n_gamma);
-				SetXgamma (UtXgamma, UtX, rank_new);
-				logPost_new=
-				  CalcPosterior(UtXgamma, Utz, K_eval,
-						UtXb_new, Utu_new, alpha_new,
-						beta, cHyp_new);
-				beta_new.clear();
-				for (size_t i=0; i<beta->size; ++i) {
-				  beta_new.push_back(gsl_vector_get(beta, i));
-				}
-				gsl_matrix_free (UtXgamma);
-				gsl_vector_free (beta);
-			}
-
-			logMHratio+=logPost_new-logPost_old;
-
-			if (logMHratio>0 ||
-			    log(gsl_rng_uniform(gsl_r))<logMHratio) {
-			  accept=1; n_accept++;
-			}
-			else {accept=0;}
-
-			if (accept==1) {
-				logPost_old=logPost_new;
-				rank_old.clear(); beta_old.clear();
-				if (rank_new.size()!=0) {
-				  for (size_t i=0; i<rank_new.size(); ++i) {
-				    rank_old.push_back(rank_new[i]);
-				    beta_old.push_back(beta_new[i]);
-				  }
-				}
-				cHyp_old=cHyp_new;
-				gsl_vector_memcpy (alpha_old, alpha_new);
-				gsl_vector_memcpy (UtXb_old, UtXb_new);
-				gsl_vector_memcpy (Utu_old, Utu_new);
-			}
-			else {cHyp_new=cHyp_old;}
-		}
-
-		// Calculate z_hat, and pve.
-		if (a_mode==13) {
-			time_start=clock();
-			if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
-				CalcCC_PVEnZ (U, Utu_old, z_hat, cHyp_old);
-			}
-			else {
-				CalcCC_PVEnZ (U, UtXb_old, Utu_old,
-					      z_hat, cHyp_old);
-			}
-
-			// Sample mu and update z_hat.
-			gsl_vector_sub (z, z_hat);
-			mean_z+=CenterVector(z);
-			mean_z+=
-			  gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test));
-			gsl_vector_add_constant (z_hat, mean_z);
-
-			time_UtZ+=(clock()-time_start)/
-			  (double(CLOCKS_PER_SEC)*60.0);
-		}
-
-		// Save data.
-		if (t<w_step) {continue;}
-		else {
-			if (t%r_pace==0) {
-				w_col=w%w_pace;
-				if (w_col==0) {
-					if (w==0) {
-					  WriteResult (0, Result_hyp,
-						       Result_gamma, w_col);
-					}
-					else {
-					  WriteResult (1, Result_hyp,
-						       Result_gamma, w_col);
-					  gsl_matrix_set_zero (Result_hyp);
-					  gsl_matrix_set_zero (Result_gamma);
-					}
-				}
-
-				gsl_matrix_set(Result_hyp,w_col,0,cHyp_old.h);
-				gsl_matrix_set(Result_hyp,w_col,1,cHyp_old.pve);
-				gsl_matrix_set(Result_hyp,w_col,2,cHyp_old.rho);
-				gsl_matrix_set(Result_hyp,w_col,3,cHyp_old.pge);
-				gsl_matrix_set(Result_hyp,w_col,4,cHyp_old.logp);
-				gsl_matrix_set(Result_hyp,w_col,5,cHyp_old.n_gamma);
-
-				for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
-					pos=mapRank2pos[rank_old[i]]+1;
-
-					gsl_matrix_set(Result_gamma,w_col,i,
-						       pos);
-
-					beta_g[pos-1].first+=beta_old[i];
-					beta_g[pos-1].second+=1.0;
-				}
-
-				gsl_vector_add (alpha_prime, alpha_old);
-				gsl_vector_add (Utu, Utu_old);
-
-				if (a_mode==13) {
-					pheno_mean+=mean_z;
-				}
-
-				w++;
-
-			}
-
-		}
-	}
-	cout<<endl;
-
-	w_col=w%w_pace;
-	WriteResult (1, Result_hyp, Result_gamma, w_col);
-
-	gsl_matrix_free(Result_hyp);
-	gsl_matrix_free(Result_gamma);
-
-	gsl_vector_free(z_hat);
-	gsl_vector_free(z);
-	gsl_vector_free(Utz);
-	gsl_vector_free(UtXb_new);
-	gsl_vector_free(UtXb_old);
-	gsl_vector_free(alpha_new);
-	gsl_vector_free(alpha_old);
-	gsl_vector_free(Utu_new);
-	gsl_vector_free(Utu_old);
-
-	gsl_vector_scale (alpha_prime, 1.0/(double)w);
-	gsl_vector_scale (Utu, 1.0/(double)w);
-	if (a_mode==13) {
-		pheno_mean/=(double)w;
-	}
-
-	gsl_vector *alpha=gsl_vector_alloc (ns_test);
-	gsl_blas_dgemv (CblasTrans, 1.0/(double)ns_test, UtX,
-			alpha_prime, 0.0, alpha);
-	WriteParam (beta_g, alpha, w);
-	gsl_vector_free(alpha);
-
-	gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime);
-	WriteBV(alpha_prime);
-
-	gsl_vector_free(alpha_prime);
-	gsl_vector_free(Utu);
-
-	delete [] p_gamma;
-	beta_g.clear();
-
-	return;
+void BSLMM::MCMC(const gsl_matrix *U, const gsl_matrix *UtX,
+                 const gsl_vector *Uty, const gsl_vector *K_eval,
+                 const gsl_vector *y) {
+  clock_t time_start;
+
+  class HYPBSLMM cHyp_old, cHyp_new;
+
+  gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6);
+  gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max);
+
+  gsl_vector *alpha_prime = gsl_vector_alloc(ni_test);
+  gsl_vector *alpha_new = gsl_vector_alloc(ni_test);
+  gsl_vector *alpha_old = gsl_vector_alloc(ni_test);
+  gsl_vector *Utu = gsl_vector_alloc(ni_test);
+  gsl_vector *Utu_new = gsl_vector_alloc(ni_test);
+  gsl_vector *Utu_old = gsl_vector_alloc(ni_test);
+
+  gsl_vector *UtXb_new = gsl_vector_alloc(ni_test);
+  gsl_vector *UtXb_old = gsl_vector_alloc(ni_test);
+
+  gsl_vector *z_hat = gsl_vector_alloc(ni_test);
+  gsl_vector *z = gsl_vector_alloc(ni_test);
+  gsl_vector *Utz = gsl_vector_alloc(ni_test);
+
+  gsl_vector_memcpy(Utz, Uty);
+
+  double logPost_new, logPost_old;
+  double logMHratio;
+  double mean_z = 0.0;
+
+  gsl_matrix_set_zero(Result_gamma);
+  gsl_vector_set_zero(Utu);
+  gsl_vector_set_zero(alpha_prime);
+  if (a_mode == 13) {
+    pheno_mean = 0.0;
+  }
+
+  vector<pair<double, double>> beta_g;
+  for (size_t i = 0; i < ns_test; i++) {
+    beta_g.push_back(make_pair(0.0, 0.0));
+  }
+
+  vector<size_t> rank_new, rank_old;
+  vector<double> beta_new, beta_old;
+
+  vector<pair<size_t, double>> pos_loglr;
+
+  time_start = clock();
+  MatrixCalcLR(U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr);
+  time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+  stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr);
+  for (size_t i = 0; i < ns_test; ++i) {
+    mapRank2pos[i] = pos_loglr[i].first;
+  }
+
+  // Calculate proposal distribution for gamma (unnormalized),
+  // and set up gsl_r and gsl_t.
+  gsl_rng_env_setup();
+  const gsl_rng_type *gslType;
+  gslType = gsl_rng_default;
+  if (randseed < 0) {
+    time_t rawtime;
+    time(&rawtime);
+    tm *ptm = gmtime(&rawtime);
+
+    randseed =
+        (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec);
+  }
+  gsl_r = gsl_rng_alloc(gslType);
+  gsl_rng_set(gsl_r, randseed);
+
+  double *p_gamma = new double[ns_test];
+  CalcPgamma(p_gamma);
+
+  gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma);
+
+  // Initial parameters.
+  InitialMCMC(UtX, Utz, rank_old, cHyp_old, pos_loglr);
+
+  cHyp_initial = cHyp_old;
+
+  if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
+    logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
+
+    beta_old.clear();
+    for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
+      beta_old.push_back(0);
+    }
+  } else {
+    gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma);
+    gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma);
+    SetXgamma(UtXgamma, UtX, rank_old);
+    logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old,
+                                alpha_old, beta, cHyp_old);
+
+    beta_old.clear();
+    for (size_t i = 0; i < beta->size; ++i) {
+      beta_old.push_back(gsl_vector_get(beta, i));
+    }
+    gsl_matrix_free(UtXgamma);
+    gsl_vector_free(beta);
+  }
+
+  // Calculate centered z_hat, and pve.
+  if (a_mode == 13) {
+    time_start = clock();
+    if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
+      CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old);
+    } else {
+      CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old);
+    }
+    time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+  }
+
+  // Start MCMC.
+  int accept;
+  size_t total_step = w_step + s_step;
+  size_t w = 0, w_col, pos;
+  size_t repeat = 0;
+
+  for (size_t t = 0; t < total_step; ++t) {
+    if (t % d_pace == 0 || t == total_step - 1) {
+      ProgressBar("Running MCMC ", t, total_step - 1,
+                  (double)n_accept / (double)(t * n_mh + 1));
+    }
+
+    if (a_mode == 13) {
+      SampleZ(y, z_hat, z);
+      mean_z = CenterVector(z);
+
+      time_start = clock();
+      gsl_blas_dgemv(CblasTrans, 1.0, U, z, 0.0, Utz);
+      time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      // First proposal.
+      if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
+        logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
+        beta_old.clear();
+        for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
+          beta_old.push_back(0);
+        }
+      } else {
+        gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma);
+        gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma);
+        SetXgamma(UtXgamma, UtX, rank_old);
+        logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old,
+                                    alpha_old, beta, cHyp_old);
+
+        beta_old.clear();
+        for (size_t i = 0; i < beta->size; ++i) {
+          beta_old.push_back(gsl_vector_get(beta, i));
+        }
+        gsl_matrix_free(UtXgamma);
+        gsl_vector_free(beta);
+      }
+    }
+
+    // M-H steps.
+    for (size_t i = 0; i < n_mh; ++i) {
+      if (gsl_rng_uniform(gsl_r) < 0.33) {
+        repeat = 1 + gsl_rng_uniform_int(gsl_r, 20);
+      } else {
+        repeat = 1;
+      }
+
+      logMHratio = 0.0;
+      logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat);
+      logMHratio +=
+          ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat);
+      logMHratio += ProposePi(cHyp_old, cHyp_new, repeat);
+
+      if (cHyp_new.n_gamma == 0 || cHyp_new.rho == 0) {
+        logPost_new = CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new);
+        beta_new.clear();
+        for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
+          beta_new.push_back(0);
+        }
+      } else {
+        gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_new.n_gamma);
+        gsl_vector *beta = gsl_vector_alloc(cHyp_new.n_gamma);
+        SetXgamma(UtXgamma, UtX, rank_new);
+        logPost_new = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_new, Utu_new,
+                                    alpha_new, beta, cHyp_new);
+        beta_new.clear();
+        for (size_t i = 0; i < beta->size; ++i) {
+          beta_new.push_back(gsl_vector_get(beta, i));
+        }
+        gsl_matrix_free(UtXgamma);
+        gsl_vector_free(beta);
+      }
+
+      logMHratio += logPost_new - logPost_old;
+
+      if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) {
+        accept = 1;
+        n_accept++;
+      } else {
+        accept = 0;
+      }
+
+      if (accept == 1) {
+        logPost_old = logPost_new;
+        rank_old.clear();
+        beta_old.clear();
+        if (rank_new.size() != 0) {
+          for (size_t i = 0; i < rank_new.size(); ++i) {
+            rank_old.push_back(rank_new[i]);
+            beta_old.push_back(beta_new[i]);
+          }
+        }
+        cHyp_old = cHyp_new;
+        gsl_vector_memcpy(alpha_old, alpha_new);
+        gsl_vector_memcpy(UtXb_old, UtXb_new);
+        gsl_vector_memcpy(Utu_old, Utu_new);
+      } else {
+        cHyp_new = cHyp_old;
+      }
+    }
+
+    // Calculate z_hat, and pve.
+    if (a_mode == 13) {
+      time_start = clock();
+      if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
+        CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old);
+      } else {
+        CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old);
+      }
+
+      // Sample mu and update z_hat.
+      gsl_vector_sub(z, z_hat);
+      mean_z += CenterVector(z);
+      mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test));
+      gsl_vector_add_constant(z_hat, mean_z);
+
+      time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+    }
+
+    // Save data.
+    if (t < w_step) {
+      continue;
+    } else {
+      if (t % r_pace == 0) {
+        w_col = w % w_pace;
+        if (w_col == 0) {
+          if (w == 0) {
+            WriteResult(0, Result_hyp, Result_gamma, w_col);
+          } else {
+            WriteResult(1, Result_hyp, Result_gamma, w_col);
+            gsl_matrix_set_zero(Result_hyp);
+            gsl_matrix_set_zero(Result_gamma);
+          }
+        }
+
+        gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h);
+        gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve);
+        gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho);
+        gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge);
+        gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp);
+        gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma);
+
+        for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
+          pos = mapRank2pos[rank_old[i]] + 1;
+
+          gsl_matrix_set(Result_gamma, w_col, i, pos);
+
+          beta_g[pos - 1].first += beta_old[i];
+          beta_g[pos - 1].second += 1.0;
+        }
+
+        gsl_vector_add(alpha_prime, alpha_old);
+        gsl_vector_add(Utu, Utu_old);
+
+        if (a_mode == 13) {
+          pheno_mean += mean_z;
+        }
+
+        w++;
+      }
+    }
+  }
+  cout << endl;
+
+  w_col = w % w_pace;
+  WriteResult(1, Result_hyp, Result_gamma, w_col);
+
+  gsl_matrix_free(Result_hyp);
+  gsl_matrix_free(Result_gamma);
+
+  gsl_vector_free(z_hat);
+  gsl_vector_free(z);
+  gsl_vector_free(Utz);
+  gsl_vector_free(UtXb_new);
+  gsl_vector_free(UtXb_old);
+  gsl_vector_free(alpha_new);
+  gsl_vector_free(alpha_old);
+  gsl_vector_free(Utu_new);
+  gsl_vector_free(Utu_old);
+
+  gsl_vector_scale(alpha_prime, 1.0 / (double)w);
+  gsl_vector_scale(Utu, 1.0 / (double)w);
+  if (a_mode == 13) {
+    pheno_mean /= (double)w;
+  }
+
+  gsl_vector *alpha = gsl_vector_alloc(ns_test);
+  gsl_blas_dgemv(CblasTrans, 1.0 / (double)ns_test, UtX, alpha_prime, 0.0,
+                 alpha);
+  WriteParam(beta_g, alpha, w);
+  gsl_vector_free(alpha);
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime);
+  WriteBV(alpha_prime);
+
+  gsl_vector_free(alpha_prime);
+  gsl_vector_free(Utu);
+
+  delete[] p_gamma;
+  beta_g.clear();
+
+  return;
 }
 
 void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
-		   const gsl_vector *Uty, const gsl_vector *eval,
-		   const double lambda) {
-	gsl_vector *beta=gsl_vector_alloc (UtX->size2);
-	gsl_vector *H_eval=gsl_vector_alloc (Uty->size);
-	gsl_vector *bv=gsl_vector_alloc (Uty->size);
+                   const gsl_vector *Uty, const gsl_vector *eval,
+                   const double lambda) {
+  gsl_vector *beta = gsl_vector_alloc(UtX->size2);
+  gsl_vector *H_eval = gsl_vector_alloc(Uty->size);
+  gsl_vector *bv = gsl_vector_alloc(Uty->size);
 
-	gsl_vector_memcpy (H_eval, eval);
-	gsl_vector_scale (H_eval, lambda);
-	gsl_vector_add_constant (H_eval, 1.0);
+  gsl_vector_memcpy(H_eval, eval);
+  gsl_vector_scale(H_eval, lambda);
+  gsl_vector_add_constant(H_eval, 1.0);
 
-	gsl_vector_memcpy (bv, Uty);
-	gsl_vector_div (bv, H_eval);
+  gsl_vector_memcpy(bv, Uty);
+  gsl_vector_div(bv, H_eval);
 
-	gsl_blas_dgemv (CblasTrans, lambda/(double)UtX->size2,
-			UtX, bv, 0.0, beta);
-	gsl_vector_add_constant (H_eval, -1.0);
-	gsl_vector_mul (H_eval, bv);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, U, H_eval, 0.0, bv);
+  gsl_blas_dgemv(CblasTrans, lambda / (double)UtX->size2, UtX, bv, 0.0, beta);
+  gsl_vector_add_constant(H_eval, -1.0);
+  gsl_vector_mul(H_eval, bv);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, U, H_eval, 0.0, bv);
 
-	WriteParam (beta);
-	WriteBV(bv);
+  WriteParam(beta);
+  WriteBV(bv);
 
-	gsl_vector_free (H_eval);
-	gsl_vector_free (beta);
-	gsl_vector_free (bv);
+  gsl_vector_free(H_eval);
+  gsl_vector_free(beta);
+  gsl_vector_free(bv);
 
-	return;
+  return;
 }
 
 // Below fits MCMC for rho=1.
-void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y,
-		     const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) {
-  time_t time_start=clock();
-  gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1,
-							 s_size);
-  gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
-  gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size);
-
-  lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix,
-		&X_sub.matrix, 0.0, &XtX_sub.matrix);
+void BSLMM::CalcXtX(const gsl_matrix *X, const gsl_vector *y,
+                    const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) {
+  time_t time_start = clock();
+  gsl_matrix_const_view X_sub =
+      gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size);
+  gsl_matrix_view XtX_sub = gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
+  gsl_vector_view Xty_sub = gsl_vector_subvector(Xty, 0, s_size);
+
+  lapack_dgemm((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0,
+               &XtX_sub.matrix);
   gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector);
 
-  time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+  time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
   return;
 }
 
-void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
-		       const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
-		       const gsl_vector *y, const vector<size_t> &rank_old,
-		       const vector<size_t> &rank_new, gsl_matrix *X_new,
-		       gsl_matrix *XtX_new, gsl_vector *Xty_new) {
+void BSLMM::SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old,
+                      const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
+                      const gsl_vector *y, const vector<size_t> &rank_old,
+                      const vector<size_t> &rank_new, gsl_matrix *X_new,
+                      gsl_matrix *XtX_new, gsl_vector *Xty_new) {
   double d;
 
   // rank_old and rank_new are sorted already inside PorposeGamma
   // calculate vectors rank_remove and rank_add.
   // make sure that v_size is larger than repeat.
-  size_t v_size=20;
+  size_t v_size = 20;
   vector<size_t> rank_remove(v_size), rank_add(v_size),
-    rank_union(s_max+v_size);
+      rank_union(s_max + v_size);
   vector<size_t>::iterator it;
 
-  it=set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(),
-		    rank_new.end(), rank_remove.begin());
-  rank_remove.resize(it-rank_remove.begin());
+  it = set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(),
+                      rank_new.end(), rank_remove.begin());
+  rank_remove.resize(it - rank_remove.begin());
 
-  it=set_difference (rank_new.begin(), rank_new.end(), rank_old.begin(),
-		     rank_old.end(), rank_add.begin());
-  rank_add.resize(it-rank_add.begin());
+  it = set_difference(rank_new.begin(), rank_new.end(), rank_old.begin(),
+                      rank_old.end(), rank_add.begin());
+  rank_add.resize(it - rank_add.begin());
 
-  it=set_union (rank_new.begin(), rank_new.end(), rank_old.begin(),
-		rank_old.end(), rank_union.begin());
-  rank_union.resize(it-rank_union.begin());
+  it = set_union(rank_new.begin(), rank_new.end(), rank_old.begin(),
+                 rank_old.end(), rank_union.begin());
+  rank_union.resize(it - rank_union.begin());
 
   // Map rank_remove and rank_add.
   map<size_t, int> mapRank2in_remove, mapRank2in_add;
-  for (size_t i=0; i<rank_remove.size(); i++) {
-    mapRank2in_remove[rank_remove[i]]=1;
+  for (size_t i = 0; i < rank_remove.size(); i++) {
+    mapRank2in_remove[rank_remove[i]] = 1;
   }
-  for (size_t i=0; i<rank_add.size(); i++) {
-    mapRank2in_add[rank_add[i]]=1;
+  for (size_t i = 0; i < rank_add.size(); i++) {
+    mapRank2in_add[rank_add[i]] = 1;
   }
 
   // Obtain the subset of matrix/vector.
-  gsl_matrix_const_view Xold_sub=
-    gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size());
-  gsl_matrix_const_view XtXold_sub=
-    gsl_matrix_const_submatrix(XtX_old, 0, 0, rank_old.size(),
-			       rank_old.size());
-  gsl_vector_const_view Xtyold_sub=
-    gsl_vector_const_subvector(Xty_old, 0, rank_old.size());
-
-  gsl_matrix_view Xnew_sub=
-    gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size());
-  gsl_matrix_view XtXnew_sub=
-    gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size());
-  gsl_vector_view Xtynew_sub=
-    gsl_vector_subvector(Xty_new, 0, rank_new.size());
+  gsl_matrix_const_view Xold_sub =
+      gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size());
+  gsl_matrix_const_view XtXold_sub = gsl_matrix_const_submatrix(
+      XtX_old, 0, 0, rank_old.size(), rank_old.size());
+  gsl_vector_const_view Xtyold_sub =
+      gsl_vector_const_subvector(Xty_old, 0, rank_old.size());
+
+  gsl_matrix_view Xnew_sub =
+      gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size());
+  gsl_matrix_view XtXnew_sub =
+      gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size());
+  gsl_vector_view Xtynew_sub =
+      gsl_vector_subvector(Xty_new, 0, rank_new.size());
 
   // Get X_new and calculate XtX_new.
-  if (rank_remove.size()==0 && rank_add.size()==0) {
+  if (rank_remove.size() == 0 && rank_add.size() == 0) {
     gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix);
     gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix);
     gsl_vector_memcpy(&Xtynew_sub.vector, &Xtyold_sub.vector);
   } else {
     size_t i_old, j_old, i_new, j_new, i_add, j_add, i_flag, j_flag;
-    if (rank_add.size()==0) {
-      i_old=0; i_new=0;
-      for (size_t i=0; i<rank_union.size(); i++) {
-	if (mapRank2in_remove.count(rank_old[i_old])!=0) {i_old++; continue;}
+    if (rank_add.size() == 0) {
+      i_old = 0;
+      i_new = 0;
+      for (size_t i = 0; i < rank_union.size(); i++) {
+        if (mapRank2in_remove.count(rank_old[i_old]) != 0) {
+          i_old++;
+          continue;
+        }
 
-	gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
-	gsl_vector_const_view Xcopy_col=gsl_matrix_const_column(X_old, i_old);
-	gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
+        gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new);
+        gsl_vector_const_view Xcopy_col = gsl_matrix_const_column(X_old, i_old);
+        gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
 
-	d=gsl_vector_get (Xty_old, i_old);
-	gsl_vector_set (Xty_new, i_new, d);
+        d = gsl_vector_get(Xty_old, i_old);
+        gsl_vector_set(Xty_new, i_new, d);
 
-	j_old=i_old; j_new=i_new;
-	for (size_t j=i; j<rank_union.size(); j++) {
-          if (mapRank2in_remove.count(rank_old[j_old])!=0) {j_old++; continue;}
+        j_old = i_old;
+        j_new = i_new;
+        for (size_t j = i; j < rank_union.size(); j++) {
+          if (mapRank2in_remove.count(rank_old[j_old]) != 0) {
+            j_old++;
+            continue;
+          }
 
-	  d=gsl_matrix_get(XtX_old, i_old, j_old);
+          d = gsl_matrix_get(XtX_old, i_old, j_old);
 
-	  gsl_matrix_set (XtX_new, i_new, j_new, d);
-	  if (i_new!=j_new) {gsl_matrix_set (XtX_new, j_new, i_new, d);}
+          gsl_matrix_set(XtX_new, i_new, j_new, d);
+          if (i_new != j_new) {
+            gsl_matrix_set(XtX_new, j_new, i_new, d);
+          }
 
-	  j_old++; j_new++;
+          j_old++;
+          j_new++;
         }
-	i_old++; i_new++;
+        i_old++;
+        i_new++;
       }
     } else {
-      gsl_matrix *X_add=gsl_matrix_alloc(X_old->size1, rank_add.size() );
-      gsl_matrix *XtX_aa=gsl_matrix_alloc(X_add->size2, X_add->size2);
-      gsl_matrix *XtX_ao=gsl_matrix_alloc(X_add->size2, X_old->size2);
-      gsl_vector *Xty_add=gsl_vector_alloc(X_add->size2);
+      gsl_matrix *X_add = gsl_matrix_alloc(X_old->size1, rank_add.size());
+      gsl_matrix *XtX_aa = gsl_matrix_alloc(X_add->size2, X_add->size2);
+      gsl_matrix *XtX_ao = gsl_matrix_alloc(X_add->size2, X_old->size2);
+      gsl_vector *Xty_add = gsl_vector_alloc(X_add->size2);
 
       // Get X_add.
-      SetXgamma (X_add, X, rank_add);
+      SetXgamma(X_add, X, rank_add);
 
       // Get t(X_add)X_add and t(X_add)X_temp.
-      clock_t time_start=clock();
+      clock_t time_start = clock();
 
       // Somehow the lapack_dgemm does not work here.
-      gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add,
-		      0.0, XtX_aa);
-      gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_old,
-		      0.0, XtX_ao);
+      gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa);
+      gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_old, 0.0, XtX_ao);
       gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add);
 
-      time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+      time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
       // Save to X_new, XtX_new and Xty_new.
-      i_old=0; i_new=0; i_add=0;
-      for (size_t i=0; i<rank_union.size(); i++) {
-	if (mapRank2in_remove.count(rank_old[i_old])!=0) {
-	  i_old++;
-	  continue;
-	}
-	if (mapRank2in_add.count(rank_new[i_new])!=0) {
-	  i_flag=1;
-	} else {
-	  i_flag=0;
-	}
-
-	gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
-	if (i_flag==1) {
-	  gsl_vector_view Xcopy_col=gsl_matrix_column(X_add, i_add);
-	  gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
-	} else {
-	  gsl_vector_const_view Xcopy_col=
-	    gsl_matrix_const_column(X_old, i_old);
-	  gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
-	}
-
-	if (i_flag==1) {
-          d=gsl_vector_get (Xty_add, i_add);
+      i_old = 0;
+      i_new = 0;
+      i_add = 0;
+      for (size_t i = 0; i < rank_union.size(); i++) {
+        if (mapRank2in_remove.count(rank_old[i_old]) != 0) {
+          i_old++;
+          continue;
+        }
+        if (mapRank2in_add.count(rank_new[i_new]) != 0) {
+          i_flag = 1;
+        } else {
+          i_flag = 0;
+        }
+
+        gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new);
+        if (i_flag == 1) {
+          gsl_vector_view Xcopy_col = gsl_matrix_column(X_add, i_add);
+          gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
+        } else {
+          gsl_vector_const_view Xcopy_col =
+              gsl_matrix_const_column(X_old, i_old);
+          gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
+        }
+
+        if (i_flag == 1) {
+          d = gsl_vector_get(Xty_add, i_add);
         } else {
-          d=gsl_vector_get (Xty_old, i_old);
+          d = gsl_vector_get(Xty_old, i_old);
+        }
+        gsl_vector_set(Xty_new, i_new, d);
+
+        j_old = i_old;
+        j_new = i_new;
+        j_add = i_add;
+        for (size_t j = i; j < rank_union.size(); j++) {
+          if (mapRank2in_remove.count(rank_old[j_old]) != 0) {
+            j_old++;
+            continue;
+          }
+          if (mapRank2in_add.count(rank_new[j_new]) != 0) {
+            j_flag = 1;
+          } else {
+            j_flag = 0;
+          }
+
+          if (i_flag == 1 && j_flag == 1) {
+            d = gsl_matrix_get(XtX_aa, i_add, j_add);
+          } else if (i_flag == 1) {
+            d = gsl_matrix_get(XtX_ao, i_add, j_old);
+          } else if (j_flag == 1) {
+            d = gsl_matrix_get(XtX_ao, j_add, i_old);
+          } else {
+            d = gsl_matrix_get(XtX_old, i_old, j_old);
+          }
+
+          gsl_matrix_set(XtX_new, i_new, j_new, d);
+          if (i_new != j_new) {
+            gsl_matrix_set(XtX_new, j_new, i_new, d);
+          }
+
+          j_new++;
+          if (j_flag == 1) {
+            j_add++;
+          } else {
+            j_old++;
+          }
         }
-	gsl_vector_set (Xty_new, i_new, d);
-
-	j_old=i_old; j_new=i_new; j_add=i_add;
-	for (size_t j=i; j<rank_union.size(); j++) {
-	  if (mapRank2in_remove.count(rank_old[j_old])!=0) {
-	    j_old++;
-	    continue;
-	  }
-	  if (mapRank2in_add.count(rank_new[j_new])!=0) {
-	    j_flag=1;
-	  } else {
-	    j_flag=0;
-	  }
-
-	  if (i_flag==1 && j_flag==1) {
-            d=gsl_matrix_get(XtX_aa, i_add, j_add);
-	  } else if (i_flag==1) {
-	    d=gsl_matrix_get(XtX_ao, i_add, j_old);
-	  } else if (j_flag==1) {
-	    d=gsl_matrix_get(XtX_ao, j_add, i_old);
-	  } else {
-	    d=gsl_matrix_get(XtX_old, i_old, j_old);
-	  }
-
-	  gsl_matrix_set (XtX_new, i_new, j_new, d);
-	  if (i_new!=j_new) {gsl_matrix_set (XtX_new, j_new, i_new, d);}
-
-	  j_new++; if (j_flag==1) {j_add++;} else {j_old++;}
+        i_new++;
+        if (i_flag == 1) {
+          i_add++;
+        } else {
+          i_old++;
         }
-	i_new++; if (i_flag==1) {i_add++;} else {i_old++;}
       }
 
       gsl_matrix_free(X_add);
@@ -1377,7 +1445,6 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
       gsl_matrix_free(XtX_ao);
       gsl_vector_free(Xty_add);
     }
-
   }
 
   rank_remove.clear();
@@ -1389,462 +1456,442 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
   return;
 }
 
-double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) {
-	double logpost=0.0;
+double BSLMM::CalcPosterior(const double yty, class HYPBSLMM &cHyp) {
+  double logpost = 0.0;
 
-	// For quantitative traits, calculate pve and pge.
-	// Pve and pge for case/control data are calculted in CalcCC_PVEnZ.
-	if (a_mode==11) {
-		cHyp.pve=0.0;
-		cHyp.pge=1.0;
-	}
+  // For quantitative traits, calculate pve and pge.
+  // Pve and pge for case/control data are calculted in CalcCC_PVEnZ.
+  if (a_mode == 11) {
+    cHyp.pve = 0.0;
+    cHyp.pge = 1.0;
+  }
 
-	// Calculate likelihood.
-	if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);}
-	else {logpost-=0.5*yty;}
+  // Calculate likelihood.
+  if (a_mode == 11) {
+    logpost -= 0.5 * (double)ni_test * log(yty);
+  } else {
+    logpost -= 0.5 * yty;
+  }
 
-	logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
-	  ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
+  logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+             ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp));
 
-	return logpost;
+  return logpost;
 }
 
-double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
-			     const gsl_vector *Xty, const double yty,
-			     const size_t s_size, gsl_vector *Xb,
-			     gsl_vector *beta, class HYPBSLMM &cHyp) {
-	double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
-	double logpost=0.0;
-	double d, P_yy=yty, logdet_O=0.0;
-
-	gsl_matrix_const_view Xgamma_sub=
-	  gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size);
-	gsl_matrix_const_view XtX_sub=
-	  gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size);
-	gsl_vector_const_view Xty_sub=
-	  gsl_vector_const_subvector (Xty, 0, s_size);
-
-	gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size);
-	gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size);
-	gsl_vector *beta_hat=gsl_vector_alloc (s_size);
-	gsl_vector *Xty_temp=gsl_vector_alloc (s_size);
-
-	gsl_vector_memcpy (Xty_temp, &Xty_sub.vector);
-
-	// Calculate Omega.
-	gsl_matrix_memcpy (Omega, &XtX_sub.matrix);
-	gsl_matrix_scale (Omega, sigma_a2);
-	gsl_matrix_set_identity (M_temp);
-	gsl_matrix_add (Omega, M_temp);
-
-	// Calculate beta_hat.
-	logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
-	gsl_vector_scale (beta_hat, sigma_a2);
-
-	gsl_blas_ddot (Xty_temp, beta_hat, &d);
-	P_yy-=d;
-
-	// Sample tau.
-	double tau=1.0;
-	if (a_mode==11) {
-	  tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0,  2.0/P_yy);
-	}
-
-	// Sample beta.
-	for (size_t i=0; i<s_size; i++)
-	{
-		d=gsl_ran_gaussian(gsl_r, 1);
-		gsl_vector_set(beta, i, d);
-	}
-	gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size);
-	gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega,
-		       &beta_sub.vector);
-
-	// This computes inv(L^T(Omega)) %*% beta.
-	gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau));
-	gsl_vector_add(&beta_sub.vector, beta_hat);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix,
-			&beta_sub.vector, 0.0, Xb);
-
-	// For quantitative traits, calculate pve and pge.
-	if (a_mode==11) {
-		gsl_blas_ddot (Xb, Xb, &d);
-		cHyp.pve=d/(double)ni_test;
-		cHyp.pve/=cHyp.pve+1.0/tau;
-		cHyp.pge=1.0;
-	}
-
-	logpost=-0.5*logdet_O;
-	if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
-	else {logpost-=0.5*P_yy;}
-
-	logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
-	  ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
-
-	gsl_matrix_free (Omega);
-	gsl_matrix_free (M_temp);
-	gsl_vector_free (beta_hat);
-	gsl_vector_free (Xty_temp);
-
-	return logpost;
+double BSLMM::CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX,
+                            const gsl_vector *Xty, const double yty,
+                            const size_t s_size, gsl_vector *Xb,
+                            gsl_vector *beta, class HYPBSLMM &cHyp) {
+  double sigma_a2 = cHyp.h / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
+  double logpost = 0.0;
+  double d, P_yy = yty, logdet_O = 0.0;
+
+  gsl_matrix_const_view Xgamma_sub =
+      gsl_matrix_const_submatrix(Xgamma, 0, 0, Xgamma->size1, s_size);
+  gsl_matrix_const_view XtX_sub =
+      gsl_matrix_const_submatrix(XtX, 0, 0, s_size, s_size);
+  gsl_vector_const_view Xty_sub = gsl_vector_const_subvector(Xty, 0, s_size);
+
+  gsl_matrix *Omega = gsl_matrix_alloc(s_size, s_size);
+  gsl_matrix *M_temp = gsl_matrix_alloc(s_size, s_size);
+  gsl_vector *beta_hat = gsl_vector_alloc(s_size);
+  gsl_vector *Xty_temp = gsl_vector_alloc(s_size);
+
+  gsl_vector_memcpy(Xty_temp, &Xty_sub.vector);
+
+  // Calculate Omega.
+  gsl_matrix_memcpy(Omega, &XtX_sub.matrix);
+  gsl_matrix_scale(Omega, sigma_a2);
+  gsl_matrix_set_identity(M_temp);
+  gsl_matrix_add(Omega, M_temp);
+
+  // Calculate beta_hat.
+  logdet_O = CholeskySolve(Omega, Xty_temp, beta_hat);
+  gsl_vector_scale(beta_hat, sigma_a2);
+
+  gsl_blas_ddot(Xty_temp, beta_hat, &d);
+  P_yy -= d;
+
+  // Sample tau.
+  double tau = 1.0;
+  if (a_mode == 11) {
+    tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy);
+  }
+
+  // Sample beta.
+  for (size_t i = 0; i < s_size; i++) {
+    d = gsl_ran_gaussian(gsl_r, 1);
+    gsl_vector_set(beta, i, d);
+  }
+  gsl_vector_view beta_sub = gsl_vector_subvector(beta, 0, s_size);
+  gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega,
+                 &beta_sub.vector);
+
+  // This computes inv(L^T(Omega)) %*% beta.
+  gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2 / tau));
+  gsl_vector_add(&beta_sub.vector, beta_hat);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0,
+                 Xb);
+
+  // For quantitative traits, calculate pve and pge.
+  if (a_mode == 11) {
+    gsl_blas_ddot(Xb, Xb, &d);
+    cHyp.pve = d / (double)ni_test;
+    cHyp.pve /= cHyp.pve + 1.0 / tau;
+    cHyp.pge = 1.0;
+  }
+
+  logpost = -0.5 * logdet_O;
+  if (a_mode == 11) {
+    logpost -= 0.5 * (double)ni_test * log(P_yy);
+  } else {
+    logpost -= 0.5 * P_yy;
+  }
+
+  logpost +=
+      ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+      ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
+
+  gsl_matrix_free(Omega);
+  gsl_matrix_free(M_temp);
+  gsl_vector_free(beta_hat);
+  gsl_vector_free(Xty_temp);
+
+  return logpost;
 }
 
 // Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp)
-{
+void BSLMM::CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp) {
   gsl_vector_set_zero(z_hat);
-  cHyp.pve=0.0;
-  cHyp.pge=1.0;
+  cHyp.pve = 0.0;
+  cHyp.pge = 1.0;
   return;
 }
 
 // Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat,
-			  class HYPBSLMM &cHyp) {
-	double d;
+void BSLMM::CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat,
+                         class HYPBSLMM &cHyp) {
+  double d;
 
-	gsl_blas_ddot (Xb, Xb, &d);
-	cHyp.pve=d/(double)ni_test;
-	cHyp.pve/=cHyp.pve+1.0;
-	cHyp.pge=1.0;
+  gsl_blas_ddot(Xb, Xb, &d);
+  cHyp.pve = d / (double)ni_test;
+  cHyp.pve /= cHyp.pve + 1.0;
+  cHyp.pge = 1.0;
 
-	gsl_vector_memcpy (z_hat, Xb);
+  gsl_vector_memcpy(z_hat, Xb);
 
-	return;
+  return;
 }
 
 // If a_mode==13, then run probit model.
-void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
-	clock_t time_start;
-	double time_set=0, time_post=0;
-
-	class HYPBSLMM cHyp_old, cHyp_new;
-
-	gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6);
-	gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
-
-	gsl_vector *Xb_new=gsl_vector_alloc (ni_test);
-	gsl_vector *Xb_old=gsl_vector_alloc (ni_test);
-	gsl_vector *z_hat=gsl_vector_alloc (ni_test);
-	gsl_vector *z=gsl_vector_alloc (ni_test);
-
-	gsl_matrix *Xgamma_old=gsl_matrix_alloc (ni_test, s_max);
-	gsl_matrix *XtX_old=gsl_matrix_alloc (s_max, s_max);
-	gsl_vector *Xtz_old=gsl_vector_alloc (s_max);
-	gsl_vector *beta_old=gsl_vector_alloc (s_max);
-
-	gsl_matrix *Xgamma_new=gsl_matrix_alloc (ni_test, s_max);
-	gsl_matrix *XtX_new=gsl_matrix_alloc (s_max, s_max);
-	gsl_vector *Xtz_new=gsl_vector_alloc (s_max);
-	gsl_vector *beta_new=gsl_vector_alloc (s_max);
-
-	double ztz=0.0;
-	gsl_vector_memcpy (z, y);
-
-	// For quantitative traits, y is centered already in
-	// gemma.cpp, but just in case.
-	double mean_z=CenterVector (z);
-	gsl_blas_ddot(z, z, &ztz);
-
-	double logPost_new, logPost_old;
-	double logMHratio;
-
-	gsl_matrix_set_zero (Result_gamma);
-	if (a_mode==13) {
-		pheno_mean=0.0;
-	}
-
-	vector<pair<double, double> > beta_g;
-	for (size_t i=0; i<ns_test; i++) {
-		beta_g.push_back(make_pair(0.0, 0.0));
-	}
-
-	vector<size_t> rank_new, rank_old;
-	vector<pair<size_t, double> > pos_loglr;
-
-	time_start=clock();
-	MatrixCalcLmLR (X, z, pos_loglr);
-	time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	stable_sort (pos_loglr.begin(), pos_loglr.end(), comp_lr);
-	for (size_t i=0; i<ns_test; ++i) {
-		mapRank2pos[i]=pos_loglr[i].first;
-	}
-
-	// Calculate proposal distribution for gamma (unnormalized),
-	// and set up gsl_r and gsl_t.
-	gsl_rng_env_setup();
-	const gsl_rng_type * gslType;
-	gslType = gsl_rng_default;
-	if (randseed<0)
-	{
-		time_t rawtime;
-		time (&rawtime);
-		tm * ptm = gmtime (&rawtime);
-
-		randseed = (unsigned) (ptm->tm_hour%24*3600+
-				       ptm->tm_min*60+ptm->tm_sec);
-	}
-	gsl_r = gsl_rng_alloc(gslType);
-	gsl_rng_set(gsl_r, randseed);
-
-	double *p_gamma = new double[ns_test];
-	CalcPgamma (p_gamma);
-
-	gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma);
-
-	// Initial parameters.
-	InitialMCMC (X, z, rank_old, cHyp_old, pos_loglr);
-
-	cHyp_initial=cHyp_old;
-
-	if (cHyp_old.n_gamma==0) {
-	    logPost_old=CalcPosterior (ztz, cHyp_old);
-	}
-	else {
-	  SetXgamma (Xgamma_old, X, rank_old);
-	  CalcXtX (Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old);
-	  logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz,
-				     rank_old.size(), Xb_old, beta_old,
-				     cHyp_old);
-	}
-
-	// Calculate centered z_hat, and pve.
-	if (a_mode==13) {
-		if (cHyp_old.n_gamma==0) {
-			CalcCC_PVEnZ (z_hat, cHyp_old);
-		}
-		else {
-			CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old);
-		}
-	}
-
-	// Start MCMC.
-	int accept;
-	size_t total_step=w_step+s_step;
-	size_t w=0, w_col, pos;
-	size_t repeat=0;
-
-	for (size_t t=0; t<total_step; ++t) {
-		if (t%d_pace==0 || t==total_step-1) {
-		  ProgressBar ("Running MCMC ", t, total_step-1,
-			       (double)n_accept/(double)(t*n_mh+1));
-		}
-
-		if (a_mode==13) {
-			SampleZ (y, z_hat, z);
-			mean_z=CenterVector (z);
-			gsl_blas_ddot(z,z,&ztz);
-
-			// First proposal.
-			if (cHyp_old.n_gamma==0) {
-			  logPost_old=CalcPosterior (ztz, cHyp_old);
-			} else {
-			  gsl_matrix_view Xold_sub=
-			    gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test,
-						 rank_old.size());
-			  gsl_vector_view Xtz_sub=
-			    gsl_vector_subvector(Xtz_old, 0, rank_old.size());
-			  gsl_blas_dgemv (CblasTrans, 1.0, &Xold_sub.matrix,
-					  z, 0.0, &Xtz_sub.vector);
-			  logPost_old=
-			    CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz,
-					   rank_old.size(), Xb_old, beta_old,
-					   cHyp_old);
-			}
-		}
-
-		// M-H steps.
-		for (size_t i=0; i<n_mh; ++i) {
-			if (gsl_rng_uniform(gsl_r)<0.33) {
-			  repeat = 1+gsl_rng_uniform_int(gsl_r, 20);
-			}
-			else {repeat=1;}
-
-			logMHratio=0.0;
-			logMHratio+=
-			  ProposeHnRho(cHyp_old, cHyp_new, repeat);
-			logMHratio+=
-			  ProposeGamma (rank_old, rank_new, p_gamma,
-					cHyp_old, cHyp_new, repeat);
-			logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat);
-
-			if (cHyp_new.n_gamma==0) {
-				logPost_new=CalcPosterior (ztz, cHyp_new);
-			} else {
-
-			  // This makes sure that rank_old.size() ==
-			  // rank_remove.size() does not happen.
-			  if (cHyp_new.n_gamma<=20 || cHyp_old.n_gamma<=20) {
-			    time_start=clock();
-			    SetXgamma (Xgamma_new, X, rank_new);
-			    CalcXtX (Xgamma_new, z, rank_new.size(),
-				     XtX_new, Xtz_new);
-			    time_set+=(clock()-time_start)/
-			      (double(CLOCKS_PER_SEC)*60.0);
-			  } else {
-			    time_start=clock();
-			    SetXgamma (X, Xgamma_old, XtX_old, Xtz_old, z,
-				       rank_old, rank_new, Xgamma_new,
-				       XtX_new, Xtz_new);
-			    time_set+=(clock()-time_start)/
-			      (double(CLOCKS_PER_SEC)*60.0);
-			  }
-			  time_start=clock();
-			  logPost_new=
-			    CalcPosterior (Xgamma_new, XtX_new, Xtz_new, ztz,
-					   rank_new.size(), Xb_new, beta_new,
-					   cHyp_new);
-			  time_post+=(clock()-time_start)/
-			    (double(CLOCKS_PER_SEC)*60.0);
-			}
-			logMHratio+=logPost_new-logPost_old;
-
-			if (logMHratio>0 ||
-			    log(gsl_rng_uniform(gsl_r))<logMHratio) {
-			  accept=1;
-			  n_accept++;
-			}
-			else {accept=0;}
-
-			if (accept==1) {
-				logPost_old=logPost_new;
-				cHyp_old=cHyp_new;
-				gsl_vector_memcpy (Xb_old, Xb_new);
-
-				rank_old.clear();
-				if (rank_new.size()!=0) {
-					for (size_t i=0;
-					     i<rank_new.size();
-					     ++i) {
-					  rank_old.push_back(rank_new[i]);
-					}
-
-					gsl_matrix_view Xold_sub=gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size());
-					gsl_matrix_view XtXold_sub=gsl_matrix_submatrix(XtX_old, 0, 0, rank_new.size(), rank_new.size());
-					gsl_vector_view Xtzold_sub=gsl_vector_subvector(Xtz_old, 0, rank_new.size());
-					gsl_vector_view betaold_sub=gsl_vector_subvector(beta_old, 0, rank_new.size());
-
-					gsl_matrix_view Xnew_sub=gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size());
-					gsl_matrix_view XtXnew_sub=gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size());
-					gsl_vector_view Xtznew_sub=gsl_vector_subvector(Xtz_new, 0, rank_new.size());
-					gsl_vector_view betanew_sub=gsl_vector_subvector(beta_new, 0, rank_new.size());
-
-					gsl_matrix_memcpy(&Xold_sub.matrix,
-							  &Xnew_sub.matrix);
-					gsl_matrix_memcpy(&XtXold_sub.matrix,
-							  &XtXnew_sub.matrix);
-					gsl_vector_memcpy(&Xtzold_sub.vector,
-							  &Xtznew_sub.vector);
-					gsl_vector_memcpy(&betaold_sub.vector,
-							  &betanew_sub.vector);
-				}
-			} else {
-			  cHyp_new=cHyp_old;
-			}
-
-		}
-
-		// Calculate z_hat, and pve.
-		if (a_mode==13) {
-			if (cHyp_old.n_gamma==0) {
-				CalcCC_PVEnZ (z_hat, cHyp_old);
-			}
-			else {
-				CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old);
-			}
-
-			// Sample mu and update z_hat.
-			gsl_vector_sub (z, z_hat);
-			mean_z+=CenterVector(z);
-			mean_z+=gsl_ran_gaussian(gsl_r,
-						 sqrt(1.0/(double) ni_test));
-
-			gsl_vector_add_constant (z_hat, mean_z);
-		}
-
-		// Save data.
-		if (t<w_step) {continue;}
-		else {
-			if (t%r_pace==0) {
-				w_col=w%w_pace;
-				if (w_col==0) {
-					if (w==0) {
-					  WriteResult(0,Result_hyp,
-						      Result_gamma,w_col);
-					}
-					else {
-					  WriteResult(1,Result_hyp,
-						      Result_gamma,w_col);
-					  gsl_matrix_set_zero (Result_hyp);
-					  gsl_matrix_set_zero (Result_gamma);
-					}
-				}
-
-				gsl_matrix_set(Result_hyp,w_col,0,
-					       cHyp_old.h);
-				gsl_matrix_set(Result_hyp,w_col,1,
-					       cHyp_old.pve);
-				gsl_matrix_set(Result_hyp,w_col,2,
-					       cHyp_old.rho);
-				gsl_matrix_set(Result_hyp,w_col,3,
-					       cHyp_old.pge);
-				gsl_matrix_set(Result_hyp,w_col,4,
-					       cHyp_old.logp);
-				gsl_matrix_set(Result_hyp,w_col,5,
-					       cHyp_old.n_gamma);
-
-				for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
-					pos=mapRank2pos[rank_old[i]]+1;
-					gsl_matrix_set(Result_gamma,w_col,
-						       i,pos);
-
-					beta_g[pos-1].first+=
-					  gsl_vector_get(beta_old, i);
-					beta_g[pos-1].second+=1.0;
-				}
-
-				if (a_mode==13) {
-					pheno_mean+=mean_z;
-				}
-
-				w++;
-			}
-		}
-	}
-	cout<<endl;
-
-	cout<<"time on selecting Xgamma: "<<time_set<<endl;
-	cout<<"time on calculating posterior: "<<time_post<<endl;
-
-	w_col=w%w_pace;
-	WriteResult (1, Result_hyp, Result_gamma, w_col);
-
-	gsl_vector *alpha=gsl_vector_alloc (ns_test);
-	gsl_vector_set_zero (alpha);
-	WriteParam (beta_g, alpha, w);
-	gsl_vector_free(alpha);
-
-	gsl_matrix_free(Result_hyp);
-	gsl_matrix_free(Result_gamma);
-
-	gsl_vector_free(z_hat);
-	gsl_vector_free(z);
-	gsl_vector_free(Xb_new);
-	gsl_vector_free(Xb_old);
-
-	gsl_matrix_free(Xgamma_old);
-	gsl_matrix_free(XtX_old);
-	gsl_vector_free(Xtz_old);
-	gsl_vector_free(beta_old);
-
-	gsl_matrix_free(Xgamma_new);
-	gsl_matrix_free(XtX_new);
-	gsl_vector_free(Xtz_new);
-	gsl_vector_free(beta_new);
-
-	delete [] p_gamma;
-	beta_g.clear();
-
-	return;
+void BSLMM::MCMC(const gsl_matrix *X, const gsl_vector *y) {
+  clock_t time_start;
+  double time_set = 0, time_post = 0;
+
+  class HYPBSLMM cHyp_old, cHyp_new;
+
+  gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6);
+  gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max);
+
+  gsl_vector *Xb_new = gsl_vector_alloc(ni_test);
+  gsl_vector *Xb_old = gsl_vector_alloc(ni_test);
+  gsl_vector *z_hat = gsl_vector_alloc(ni_test);
+  gsl_vector *z = gsl_vector_alloc(ni_test);
+
+  gsl_matrix *Xgamma_old = gsl_matrix_alloc(ni_test, s_max);
+  gsl_matrix *XtX_old = gsl_matrix_alloc(s_max, s_max);
+  gsl_vector *Xtz_old = gsl_vector_alloc(s_max);
+  gsl_vector *beta_old = gsl_vector_alloc(s_max);
+
+  gsl_matrix *Xgamma_new = gsl_matrix_alloc(ni_test, s_max);
+  gsl_matrix *XtX_new = gsl_matrix_alloc(s_max, s_max);
+  gsl_vector *Xtz_new = gsl_vector_alloc(s_max);
+  gsl_vector *beta_new = gsl_vector_alloc(s_max);
+
+  double ztz = 0.0;
+  gsl_vector_memcpy(z, y);
+
+  // For quantitative traits, y is centered already in
+  // gemma.cpp, but just in case.
+  double mean_z = CenterVector(z);
+  gsl_blas_ddot(z, z, &ztz);
+
+  double logPost_new, logPost_old;
+  double logMHratio;
+
+  gsl_matrix_set_zero(Result_gamma);
+  if (a_mode == 13) {
+    pheno_mean = 0.0;
+  }
+
+  vector<pair<double, double>> beta_g;
+  for (size_t i = 0; i < ns_test; i++) {
+    beta_g.push_back(make_pair(0.0, 0.0));
+  }
+
+  vector<size_t> rank_new, rank_old;
+  vector<pair<size_t, double>> pos_loglr;
+
+  time_start = clock();
+  MatrixCalcLmLR(X, z, pos_loglr);
+  time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+  stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr);
+  for (size_t i = 0; i < ns_test; ++i) {
+    mapRank2pos[i] = pos_loglr[i].first;
+  }
+
+  // Calculate proposal distribution for gamma (unnormalized),
+  // and set up gsl_r and gsl_t.
+  gsl_rng_env_setup();
+  const gsl_rng_type *gslType;
+  gslType = gsl_rng_default;
+  if (randseed < 0) {
+    time_t rawtime;
+    time(&rawtime);
+    tm *ptm = gmtime(&rawtime);
+
+    randseed =
+        (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec);
+  }
+  gsl_r = gsl_rng_alloc(gslType);
+  gsl_rng_set(gsl_r, randseed);
+
+  double *p_gamma = new double[ns_test];
+  CalcPgamma(p_gamma);
+
+  gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma);
+
+  // Initial parameters.
+  InitialMCMC(X, z, rank_old, cHyp_old, pos_loglr);
+
+  cHyp_initial = cHyp_old;
+
+  if (cHyp_old.n_gamma == 0) {
+    logPost_old = CalcPosterior(ztz, cHyp_old);
+  } else {
+    SetXgamma(Xgamma_old, X, rank_old);
+    CalcXtX(Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old);
+    logPost_old = CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz,
+                                rank_old.size(), Xb_old, beta_old, cHyp_old);
+  }
+
+  // Calculate centered z_hat, and pve.
+  if (a_mode == 13) {
+    if (cHyp_old.n_gamma == 0) {
+      CalcCC_PVEnZ(z_hat, cHyp_old);
+    } else {
+      CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old);
+    }
+  }
+
+  // Start MCMC.
+  int accept;
+  size_t total_step = w_step + s_step;
+  size_t w = 0, w_col, pos;
+  size_t repeat = 0;
+
+  for (size_t t = 0; t < total_step; ++t) {
+    if (t % d_pace == 0 || t == total_step - 1) {
+      ProgressBar("Running MCMC ", t, total_step - 1,
+                  (double)n_accept / (double)(t * n_mh + 1));
+    }
+
+    if (a_mode == 13) {
+      SampleZ(y, z_hat, z);
+      mean_z = CenterVector(z);
+      gsl_blas_ddot(z, z, &ztz);
+
+      // First proposal.
+      if (cHyp_old.n_gamma == 0) {
+        logPost_old = CalcPosterior(ztz, cHyp_old);
+      } else {
+        gsl_matrix_view Xold_sub =
+            gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size());
+        gsl_vector_view Xtz_sub =
+            gsl_vector_subvector(Xtz_old, 0, rank_old.size());
+        gsl_blas_dgemv(CblasTrans, 1.0, &Xold_sub.matrix, z, 0.0,
+                       &Xtz_sub.vector);
+        logPost_old =
+            CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(),
+                          Xb_old, beta_old, cHyp_old);
+      }
+    }
+
+    // M-H steps.
+    for (size_t i = 0; i < n_mh; ++i) {
+      if (gsl_rng_uniform(gsl_r) < 0.33) {
+        repeat = 1 + gsl_rng_uniform_int(gsl_r, 20);
+      } else {
+        repeat = 1;
+      }
+
+      logMHratio = 0.0;
+      logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat);
+      logMHratio +=
+          ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat);
+      logMHratio += ProposePi(cHyp_old, cHyp_new, repeat);
+
+      if (cHyp_new.n_gamma == 0) {
+        logPost_new = CalcPosterior(ztz, cHyp_new);
+      } else {
+
+        // This makes sure that rank_old.size() ==
+        // rank_remove.size() does not happen.
+        if (cHyp_new.n_gamma <= 20 || cHyp_old.n_gamma <= 20) {
+          time_start = clock();
+          SetXgamma(Xgamma_new, X, rank_new);
+          CalcXtX(Xgamma_new, z, rank_new.size(), XtX_new, Xtz_new);
+          time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+        } else {
+          time_start = clock();
+          SetXgamma(X, Xgamma_old, XtX_old, Xtz_old, z, rank_old, rank_new,
+                    Xgamma_new, XtX_new, Xtz_new);
+          time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+        }
+        time_start = clock();
+        logPost_new =
+            CalcPosterior(Xgamma_new, XtX_new, Xtz_new, ztz, rank_new.size(),
+                          Xb_new, beta_new, cHyp_new);
+        time_post += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+      }
+      logMHratio += logPost_new - logPost_old;
+
+      if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) {
+        accept = 1;
+        n_accept++;
+      } else {
+        accept = 0;
+      }
+
+      if (accept == 1) {
+        logPost_old = logPost_new;
+        cHyp_old = cHyp_new;
+        gsl_vector_memcpy(Xb_old, Xb_new);
+
+        rank_old.clear();
+        if (rank_new.size() != 0) {
+          for (size_t i = 0; i < rank_new.size(); ++i) {
+            rank_old.push_back(rank_new[i]);
+          }
+
+          gsl_matrix_view Xold_sub =
+              gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size());
+          gsl_matrix_view XtXold_sub = gsl_matrix_submatrix(
+              XtX_old, 0, 0, rank_new.size(), rank_new.size());
+          gsl_vector_view Xtzold_sub =
+              gsl_vector_subvector(Xtz_old, 0, rank_new.size());
+          gsl_vector_view betaold_sub =
+              gsl_vector_subvector(beta_old, 0, rank_new.size());
+
+          gsl_matrix_view Xnew_sub =
+              gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size());
+          gsl_matrix_view XtXnew_sub = gsl_matrix_submatrix(
+              XtX_new, 0, 0, rank_new.size(), rank_new.size());
+          gsl_vector_view Xtznew_sub =
+              gsl_vector_subvector(Xtz_new, 0, rank_new.size());
+          gsl_vector_view betanew_sub =
+              gsl_vector_subvector(beta_new, 0, rank_new.size());
+
+          gsl_matrix_memcpy(&Xold_sub.matrix, &Xnew_sub.matrix);
+          gsl_matrix_memcpy(&XtXold_sub.matrix, &XtXnew_sub.matrix);
+          gsl_vector_memcpy(&Xtzold_sub.vector, &Xtznew_sub.vector);
+          gsl_vector_memcpy(&betaold_sub.vector, &betanew_sub.vector);
+        }
+      } else {
+        cHyp_new = cHyp_old;
+      }
+    }
+
+    // Calculate z_hat, and pve.
+    if (a_mode == 13) {
+      if (cHyp_old.n_gamma == 0) {
+        CalcCC_PVEnZ(z_hat, cHyp_old);
+      } else {
+        CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old);
+      }
+
+      // Sample mu and update z_hat.
+      gsl_vector_sub(z, z_hat);
+      mean_z += CenterVector(z);
+      mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test));
+
+      gsl_vector_add_constant(z_hat, mean_z);
+    }
+
+    // Save data.
+    if (t < w_step) {
+      continue;
+    } else {
+      if (t % r_pace == 0) {
+        w_col = w % w_pace;
+        if (w_col == 0) {
+          if (w == 0) {
+            WriteResult(0, Result_hyp, Result_gamma, w_col);
+          } else {
+            WriteResult(1, Result_hyp, Result_gamma, w_col);
+            gsl_matrix_set_zero(Result_hyp);
+            gsl_matrix_set_zero(Result_gamma);
+          }
+        }
+
+        gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h);
+        gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve);
+        gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho);
+        gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge);
+        gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp);
+        gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma);
+
+        for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
+          pos = mapRank2pos[rank_old[i]] + 1;
+          gsl_matrix_set(Result_gamma, w_col, i, pos);
+
+          beta_g[pos - 1].first += gsl_vector_get(beta_old, i);
+          beta_g[pos - 1].second += 1.0;
+        }
+
+        if (a_mode == 13) {
+          pheno_mean += mean_z;
+        }
+
+        w++;
+      }
+    }
+  }
+  cout << endl;
+
+  cout << "time on selecting Xgamma: " << time_set << endl;
+  cout << "time on calculating posterior: " << time_post << endl;
+
+  w_col = w % w_pace;
+  WriteResult(1, Result_hyp, Result_gamma, w_col);
+
+  gsl_vector *alpha = gsl_vector_alloc(ns_test);
+  gsl_vector_set_zero(alpha);
+  WriteParam(beta_g, alpha, w);
+  gsl_vector_free(alpha);
+
+  gsl_matrix_free(Result_hyp);
+  gsl_matrix_free(Result_gamma);
+
+  gsl_vector_free(z_hat);
+  gsl_vector_free(z);
+  gsl_vector_free(Xb_new);
+  gsl_vector_free(Xb_old);
+
+  gsl_matrix_free(Xgamma_old);
+  gsl_matrix_free(XtX_old);
+  gsl_vector_free(Xtz_old);
+  gsl_vector_free(beta_old);
+
+  gsl_matrix_free(Xgamma_new);
+  gsl_matrix_free(XtX_new);
+  gsl_vector_free(Xtz_new);
+  gsl_vector_free(beta_new);
+
+  delete[] p_gamma;
+  beta_g.clear();
+
+  return;
 }
diff --git a/src/bslmm.h b/src/bslmm.h
index c7768a2..d2dadbf 100644
--- a/src/bslmm.h
+++ b/src/bslmm.h
@@ -19,10 +19,10 @@
 #ifndef __BSLMM_H__
 #define __BSLMM_H__
 
-#include <vector>
-#include <map>
-#include <gsl/gsl_rng.h>
 #include <gsl/gsl_randist.h>
+#include <gsl/gsl_rng.h>
+#include <map>
+#include <vector>
 
 #include "param.h"
 
@@ -31,149 +31,139 @@ using namespace std;
 class BSLMM {
 
 public:
-	// IO-related parameters.
-	int a_mode;
-	size_t d_pace;
-
-	string file_bfile;
-	string file_geno;
-	string file_out;
-	string path_out;
-
-	// LMM-related parameters.
-	double l_min;
-	double l_max;
-	size_t n_region;
-	double pve_null;
-	double pheno_mean;
-
-	// BSLMM MCMC-related parameters
-	double h_min, h_max, h_scale;	       // Priors for h.
-	double rho_min, rho_max, rho_scale;    // Priors for rho.
-	double logp_min, logp_max, logp_scale; // Priors for log(pi).
-	size_t s_min, s_max;		       // Min. & max. number of gammas.
-	size_t w_step;			       // Number of warm up/burn in
-                                               // iterations.
-	size_t s_step;			       // Num. sampling iterations.
-	size_t r_pace;			       // Record pace.
-	size_t w_pace;			       // Write pace.
-	size_t n_accept;		       // Number of acceptances.
-	size_t n_mh;			       // Number of MH steps per iter.
-	double geo_mean;		       // Mean of geometric dist.
-	long int randseed;
-	double trace_G;
-
-	HYPBSLMM cHyp_initial;
-
-	// Summary statistics.
-	size_t ni_total, ns_total; // Number of total individuals and SNPs
-	size_t ni_test, ns_test;   // Num. individuals & SNPs used in analysis.
-	size_t n_cvt;		   // Number of covariates.
-	double time_UtZ;
-	double time_Omega;	   // Time spent on optimization iterations.
-
-        // Time spent on constructing the proposal distribution for
-        // gamma (i.e. lmm or lm analysis).
-	double time_Proposal;
-
-        // Indicator for individuals (phenotypes): 0 missing, 1
-        // available for analysis.
-	vector<int> indicator_idv;
-
-	// Sequence indicator for SNPs: 0 ignored because of (a) maf,
-	// (b) miss, (c) non-poly; 1 available for analysis.
-	vector<int> indicator_snp;
-
-        // Record SNP information.
-	vector<SNPINFO> snpInfo;
-
-	// Not included in PARAM.
-	gsl_rng *gsl_r;
-	gsl_ran_discrete_t *gsl_t;
-	map<size_t, size_t> mapRank2pos;
-
-	// Main functions.
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
-
-	void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
-		    const gsl_vector *Uty, const gsl_vector *eval,
-		    const double lambda);
-
-	void MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
-		   const gsl_vector *Uty, const gsl_vector *K_eval,
-		   const gsl_vector *y);
-	void WriteLog ();
-	void WriteLR ();
-	void WriteBV (const gsl_vector *bv);
-	void WriteParam (vector<pair<double, double> > &beta_g,
-			 const gsl_vector *alpha, const size_t w);
-	void WriteParam (const gsl_vector *alpha);
-	void WriteResult (const int flag, const gsl_matrix *Result_hyp,
-			  const gsl_matrix *Result_gamma, const size_t w_col);
-
-	// Subfunctions inside MCMC.
-	void CalcPgamma (double *p_gammar);
-
-	double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
-			  const double sigma_a2);
-	void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
-			  vector<size_t> &rank_old, class HYPBSLMM &cHyp,
-			  vector<pair<size_t, double> > &pos_loglr);
-	double CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval,
-			      gsl_vector *Utu, gsl_vector *alpha_prime,
-			      class HYPBSLMM &cHyp);
-	double CalcPosterior (const gsl_matrix *UtXgamma,
-			      const gsl_vector *Uty, const gsl_vector *K_eval,
-			      gsl_vector *UtXb, gsl_vector *Utu,
-			      gsl_vector *alpha_prime, gsl_vector *beta,
-			      class HYPBSLMM &cHyp);
-	void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu,
-			   gsl_vector *z_hat, class HYPBSLMM &cHyp);
-	void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb,
-			   const gsl_vector *Utu, gsl_vector *z_hat,
-			   class HYPBSLMM &cHyp);
-	double CalcREMLE (const gsl_matrix *Utw, const gsl_vector *Uty,
-			  const gsl_vector *K_eval);
-
-        // Calculate the maximum marginal likelihood ratio for each
-        // analyzed SNPs with gemma, use it to rank SNPs.
-	double CalcLR (const gsl_matrix *U, const gsl_matrix *UtX,
-		       const gsl_vector *Uty, const gsl_vector *K_eval,
-		       vector<pair<size_t, double> > &loglr_sort);
-	void SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
-		      gsl_vector *z);
-	double ProposeHnRho (const class HYPBSLMM &cHyp_old,
-			     class HYPBSLMM &cHyp_new, const size_t &repeat);
-	double ProposePi (const class HYPBSLMM &cHyp_old,
-			  class HYPBSLMM &cHyp_new,
-			  const size_t &repeat);
-	double ProposeGamma (const vector<size_t> &rank_old,
-			     vector<size_t> &rank_new, const double *p_gamma,
-			     const class HYPBSLMM &cHyp_old,
-			     class HYPBSLMM &cHyp_new, const size_t &repeat);
-	void SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X,
-			vector<size_t> &rank);
-
-	void CalcXtX (const gsl_matrix *X_new, const gsl_vector *y,
-		      const size_t s_size, gsl_matrix *XtX_new,
-		      gsl_vector *Xty_new);
-	void SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
-			const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
-			const gsl_vector *y, const vector<size_t> &rank_old,
-			const vector<size_t> &rank_new, gsl_matrix *X_new,
-			gsl_matrix *XtX_new, gsl_vector *Xty_new);
-	double CalcPosterior (const double yty, class HYPBSLMM &cHyp);
-	double CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
-			      const gsl_vector *Xty, const double yty,
-			      const size_t s_size, gsl_vector *Xb,
-			      gsl_vector *beta, class HYPBSLMM &cHyp);
-	void CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp);
-	void CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat,
-			   class HYPBSLMM &cHyp);
-	void MCMC (const gsl_matrix *X, const gsl_vector *y);
+  // IO-related parameters.
+  int a_mode;
+  size_t d_pace;
+
+  string file_bfile;
+  string file_geno;
+  string file_out;
+  string path_out;
+
+  // LMM-related parameters.
+  double l_min;
+  double l_max;
+  size_t n_region;
+  double pve_null;
+  double pheno_mean;
+
+  // BSLMM MCMC-related parameters
+  double h_min, h_max, h_scale;          // Priors for h.
+  double rho_min, rho_max, rho_scale;    // Priors for rho.
+  double logp_min, logp_max, logp_scale; // Priors for log(pi).
+  size_t s_min, s_max;                   // Min. & max. number of gammas.
+  size_t w_step;                         // Number of warm up/burn in
+                                         // iterations.
+  size_t s_step;                         // Num. sampling iterations.
+  size_t r_pace;                         // Record pace.
+  size_t w_pace;                         // Write pace.
+  size_t n_accept;                       // Number of acceptances.
+  size_t n_mh;                           // Number of MH steps per iter.
+  double geo_mean;                       // Mean of geometric dist.
+  long int randseed;
+  double trace_G;
+
+  HYPBSLMM cHyp_initial;
+
+  // Summary statistics.
+  size_t ni_total, ns_total; // Number of total individuals and SNPs
+  size_t ni_test, ns_test;   // Num. individuals & SNPs used in analysis.
+  size_t n_cvt;              // Number of covariates.
+  double time_UtZ;
+  double time_Omega; // Time spent on optimization iterations.
+
+  // Time spent on constructing the proposal distribution for
+  // gamma (i.e. lmm or lm analysis).
+  double time_Proposal;
+
+  // Indicator for individuals (phenotypes): 0 missing, 1
+  // available for analysis.
+  vector<int> indicator_idv;
+
+  // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+  // (b) miss, (c) non-poly; 1 available for analysis.
+  vector<int> indicator_snp;
+
+  // Record SNP information.
+  vector<SNPINFO> snpInfo;
+
+  // Not included in PARAM.
+  gsl_rng *gsl_r;
+  gsl_ran_discrete_t *gsl_t;
+  map<size_t, size_t> mapRank2pos;
+
+  // Main functions.
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
+
+  void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty,
+              const gsl_vector *eval, const double lambda);
+
+  void MCMC(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty,
+            const gsl_vector *K_eval, const gsl_vector *y);
+  void WriteLog();
+  void WriteLR();
+  void WriteBV(const gsl_vector *bv);
+  void WriteParam(vector<pair<double, double>> &beta_g, const gsl_vector *alpha,
+                  const size_t w);
+  void WriteParam(const gsl_vector *alpha);
+  void WriteResult(const int flag, const gsl_matrix *Result_hyp,
+                   const gsl_matrix *Result_gamma, const size_t w_col);
+
+  // Subfunctions inside MCMC.
+  void CalcPgamma(double *p_gammar);
+
+  double CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+                   const double sigma_a2);
+  void InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty,
+                   vector<size_t> &rank_old, class HYPBSLMM &cHyp,
+                   vector<pair<size_t, double>> &pos_loglr);
+  double CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval,
+                       gsl_vector *Utu, gsl_vector *alpha_prime,
+                       class HYPBSLMM &cHyp);
+  double CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+                       const gsl_vector *K_eval, gsl_vector *UtXb,
+                       gsl_vector *Utu, gsl_vector *alpha_prime,
+                       gsl_vector *beta, class HYPBSLMM &cHyp);
+  void CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu,
+                    gsl_vector *z_hat, class HYPBSLMM &cHyp);
+  void CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb,
+                    const gsl_vector *Utu, gsl_vector *z_hat,
+                    class HYPBSLMM &cHyp);
+  double CalcREMLE(const gsl_matrix *Utw, const gsl_vector *Uty,
+                   const gsl_vector *K_eval);
+
+  // Calculate the maximum marginal likelihood ratio for each
+  // analyzed SNPs with gemma, use it to rank SNPs.
+  double CalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
+                const gsl_vector *Uty, const gsl_vector *K_eval,
+                vector<pair<size_t, double>> &loglr_sort);
+  void SampleZ(const gsl_vector *y, const gsl_vector *z_hat, gsl_vector *z);
+  double ProposeHnRho(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new,
+                      const size_t &repeat);
+  double ProposePi(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new,
+                   const size_t &repeat);
+  double ProposeGamma(const vector<size_t> &rank_old, vector<size_t> &rank_new,
+                      const double *p_gamma, const class HYPBSLMM &cHyp_old,
+                      class HYPBSLMM &cHyp_new, const size_t &repeat);
+  void SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank);
+
+  void CalcXtX(const gsl_matrix *X_new, const gsl_vector *y,
+               const size_t s_size, gsl_matrix *XtX_new, gsl_vector *Xty_new);
+  void SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old,
+                 const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
+                 const gsl_vector *y, const vector<size_t> &rank_old,
+                 const vector<size_t> &rank_new, gsl_matrix *X_new,
+                 gsl_matrix *XtX_new, gsl_vector *Xty_new);
+  double CalcPosterior(const double yty, class HYPBSLMM &cHyp);
+  double CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX,
+                       const gsl_vector *Xty, const double yty,
+                       const size_t s_size, gsl_vector *Xb, gsl_vector *beta,
+                       class HYPBSLMM &cHyp);
+  void CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp);
+  void CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat,
+                    class HYPBSLMM &cHyp);
+  void MCMC(const gsl_matrix *X, const gsl_vector *y);
 };
 
 #endif
-
-
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
index e1a53a6..7aac1d4 100644
--- a/src/bslmmdap.cpp
+++ b/src/bslmmdap.cpp
@@ -16,89 +16,97 @@
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-#include <iomanip>
+#include <algorithm>
 #include <cmath>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
 #include <iostream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <ctime>
-#include <cstring>
-#include <algorithm>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
 #include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
 #include "gsl/gsl_randist.h"
-#include "gsl/gsl_cdf.h"
 #include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
 
-#include "logistic.h"
-#include "lapack.h"
-#include "io.h"
-#include "param.h"
 #include "bslmmdap.h"
-#include "lmm.h"
+#include "io.h"
+#include "lapack.h"
 #include "lm.h"
+#include "lmm.h"
+#include "logistic.h"
 #include "mathfunc.h"
+#include "param.h"
 
 using namespace std;
 
-void BSLMMDAP::CopyFromParam (PARAM &cPar) {
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
+void BSLMMDAP::CopyFromParam(PARAM &cPar) {
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
 
-	time_UtZ=0.0;
-	time_Omega=0.0;
+  time_UtZ = 0.0;
+  time_Omega = 0.0;
 
-	h_min=cPar.h_min;
-	h_max=cPar.h_max;
-	h_ngrid=cPar.h_ngrid;
-	rho_min=cPar.rho_min;
-	rho_max=cPar.rho_max;
-	rho_ngrid=cPar.rho_ngrid;
+  h_min = cPar.h_min;
+  h_max = cPar.h_max;
+  h_ngrid = cPar.h_ngrid;
+  rho_min = cPar.rho_min;
+  rho_max = cPar.rho_max;
+  rho_ngrid = cPar.rho_ngrid;
 
-	if (h_min<=0) {h_min=0.01;}
-	if (h_max>=1) {h_max=0.99;}
-	if (rho_min<=0) {rho_min=0.01;}
-	if (rho_max>=1) {rho_max=0.99;}
+  if (h_min <= 0) {
+    h_min = 0.01;
+  }
+  if (h_max >= 1) {
+    h_max = 0.99;
+  }
+  if (rho_min <= 0) {
+    rho_min = 0.01;
+  }
+  if (rho_max >= 1) {
+    rho_max = 0.99;
+  }
 
-	trace_G=cPar.trace_G;
+  trace_G = cPar.trace_G;
 
-	ni_total=cPar.ni_total;
-	ns_total=cPar.ns_total;
-	ni_test=cPar.ni_test;
-	ns_test=cPar.ns_test;
+  ni_total = cPar.ni_total;
+  ns_total = cPar.ns_total;
+  ni_test = cPar.ni_test;
+  ns_test = cPar.ns_test;
 
-	indicator_idv=cPar.indicator_idv;
-	indicator_snp=cPar.indicator_snp;
-	snpInfo=cPar.snpInfo;
+  indicator_idv = cPar.indicator_idv;
+  indicator_snp = cPar.indicator_snp;
+  snpInfo = cPar.snpInfo;
 
-	return;
+  return;
 }
 
-void BSLMMDAP::CopyToParam (PARAM &cPar) {
-	cPar.time_UtZ=time_UtZ;
-	cPar.time_Omega=time_Omega;
+void BSLMMDAP::CopyToParam(PARAM &cPar) {
+  cPar.time_UtZ = time_UtZ;
+  cPar.time_Omega = time_Omega;
 
-	return;
+  return;
 }
 
-
-
 // Read hyp file.
-void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
-		   vector<double> &vec_sb2, vector<double> &vec_wab) {
-  vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear();
+void ReadFile_hyb(const string &file_hyp, vector<double> &vec_sa2,
+                  vector<double> &vec_sb2, vector<double> &vec_wab) {
+  vec_sa2.clear();
+  vec_sb2.clear();
+  vec_wab.clear();
 
-  igzstream infile (file_hyp.c_str(), igzstream::in);
+  igzstream infile(file_hyp.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open hyp file: "<<file_hyp<<endl;
+    cout << "error! fail to open hyp file: " << file_hyp << endl;
     return;
   }
 
@@ -108,16 +116,16 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
   getline(infile, line);
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-    ch_ptr=strtok (NULL, " , \t");
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
 
-    ch_ptr=strtok (NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
     vec_sa2.push_back(atof(ch_ptr));
 
-    ch_ptr=strtok (NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
     vec_sb2.push_back(atof(ch_ptr));
 
-    ch_ptr=strtok (NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
     vec_wab.push_back(atof(ch_ptr));
   }
 
@@ -128,55 +136,59 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
 }
 
 // Read bf file.
-void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
-		  vector<vector<vector<double> > > &BF) {
-  BF.clear(); vec_rs.clear();
+void ReadFile_bf(const string &file_bf, vector<string> &vec_rs,
+                 vector<vector<vector<double>>> &BF) {
+  BF.clear();
+  vec_rs.clear();
 
-  igzstream infile (file_bf.c_str(), igzstream::in);
-  if (!infile) {cout<<"error! fail to open bf file: "<<file_bf<<endl; return;}
+  igzstream infile(file_bf.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open bf file: " << file_bf << endl;
+    return;
+  }
 
   string line, rs, block;
   vector<double> vec_bf;
-  vector<vector<double> > mat_bf;
+  vector<vector<double>> mat_bf;
   char *ch_ptr;
 
   size_t bf_size, flag_block;
 
   getline(infile, line);
 
-  size_t t=0;
+  size_t t = 0;
   while (!safeGetline(infile, line).eof()) {
-    flag_block=0;
+    flag_block = 0;
 
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-    rs=ch_ptr;
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
     vec_rs.push_back(rs);
 
-    ch_ptr=strtok (NULL, " , \t");
-    if (t==0) {
-      block=ch_ptr;
+    ch_ptr = strtok(NULL, " , \t");
+    if (t == 0) {
+      block = ch_ptr;
     } else {
-      if (strcmp(ch_ptr, block.c_str() )!=0) {
-	flag_block=1;
-	block=ch_ptr;
+      if (strcmp(ch_ptr, block.c_str()) != 0) {
+        flag_block = 1;
+        block = ch_ptr;
       }
     }
 
-    ch_ptr=strtok (NULL, " , \t");
-    while (ch_ptr!=NULL) {
+    ch_ptr = strtok(NULL, " , \t");
+    while (ch_ptr != NULL) {
       vec_bf.push_back(atof(ch_ptr));
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
 
-    if (t==0) {
-      bf_size=vec_bf.size();
+    if (t == 0) {
+      bf_size = vec_bf.size();
     } else {
-      if (bf_size!=vec_bf.size()) {
-	cout<<"error! unequal row size in bf file."<<endl;
+      if (bf_size != vec_bf.size()) {
+        cout << "error! unequal row size in bf file." << endl;
       }
     }
 
-    if (flag_block==0) {
+    if (flag_block == 0) {
       mat_bf.push_back(vec_bf);
     } else {
       BF.push_back(mat_bf);
@@ -193,15 +205,14 @@ void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
   return;
 }
 
-
 // Read category files.
 // Read both continuous and discrete category file, record mapRS2catc.
-void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
-		   gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
-		   size_t &kc, size_t &kd) {
-  igzstream infile (file_cat.c_str(), igzstream::in);
+void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
+                  gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
+                  size_t &kc, size_t &kd) {
+  igzstream infile(file_cat.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open category file: "<<file_cat<<endl;
+    cout << "error! fail to open category file: " << file_cat << endl;
     return;
   }
 
@@ -213,94 +224,103 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
   // Read header.
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader_io(line, header);
 
   // Use the header to determine the number of categories.
-  kc=header.catc_col.size(); kd=header.catd_col.size();
+  kc = header.catc_col.size();
+  kd = header.catd_col.size();
 
-  //set up storage and mapper
-  map<string, vector<double> > mapRS2catc;
-  map<string, vector<int> > mapRS2catd;
+  // set up storage and mapper
+  map<string, vector<double>> mapRS2catc;
+  map<string, vector<int>> mapRS2catd;
   vector<double> catc;
   vector<int> catd;
 
   // Read the following lines to record mapRS2cat.
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
 
-    if (header.rs_col==0) {
-      rs=chr+":"+pos;
+    if (header.rs_col == 0) {
+      rs = chr + ":" + pos;
     }
 
-    catc.clear(); catd.clear();
-
-    for (size_t i=0; i<header.coln; i++) {
-      if (header.rs_col!=0 && header.rs_col==i+1) {
-	rs=ch_ptr;
-      } else if (header.chr_col!=0 && header.chr_col==i+1) {
-	chr=ch_ptr;
-      } else if (header.pos_col!=0 && header.pos_col==i+1) {
-	pos=ch_ptr;
-      } else if (header.cm_col!=0 && header.cm_col==i+1) {
-	cm=ch_ptr;
-      } else if (header.a1_col!=0 && header.a1_col==i+1) {
-	a1=ch_ptr;
-      } else if (header.a0_col!=0 && header.a0_col==i+1) {
-	a0=ch_ptr;
-      } else if (header.catc_col.size()!=0 && header.catc_col.count(i+1)!=0 ) {
-	catc.push_back(atof(ch_ptr));
-      } else if (header.catd_col.size()!=0 && header.catd_col.count(i+1)!=0 ) {
-	catd.push_back(atoi(ch_ptr));
-      } else {}
-
-      ch_ptr=strtok (NULL, " , \t");
+    catc.clear();
+    catd.clear();
+
+    for (size_t i = 0; i < header.coln; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      } else if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      } else if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+      } else if (header.cm_col != 0 && header.cm_col == i + 1) {
+        cm = ch_ptr;
+      } else if (header.a1_col != 0 && header.a1_col == i + 1) {
+        a1 = ch_ptr;
+      } else if (header.a0_col != 0 && header.a0_col == i + 1) {
+        a0 = ch_ptr;
+      } else if (header.catc_col.size() != 0 &&
+                 header.catc_col.count(i + 1) != 0) {
+        catc.push_back(atof(ch_ptr));
+      } else if (header.catd_col.size() != 0 &&
+                 header.catd_col.count(i + 1) != 0) {
+        catd.push_back(atoi(ch_ptr));
+      } else {
+      }
+
+      ch_ptr = strtok(NULL, " , \t");
     }
 
-    if (mapRS2catc.count(rs)==0 && kc>0) {mapRS2catc[rs]=catc;}
-    if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;}
+    if (mapRS2catc.count(rs) == 0 && kc > 0) {
+      mapRS2catc[rs] = catc;
+    }
+    if (mapRS2catd.count(rs) == 0 && kd > 0) {
+      mapRS2catd[rs] = catd;
+    }
   }
 
   // Load into Ad and Ac.
-  if (kc>0) {
-    Ac=gsl_matrix_alloc(vec_rs.size(), kc);
-    for (size_t i=0; i<vec_rs.size(); i++) {
-      if (mapRS2catc.count(vec_rs[i])!=0) {
-	for (size_t j=0; j<kc; j++) {
-	  gsl_matrix_set(Ac, i, j, mapRS2catc[vec_rs[i]][j]);
-	}
+  if (kc > 0) {
+    Ac = gsl_matrix_alloc(vec_rs.size(), kc);
+    for (size_t i = 0; i < vec_rs.size(); i++) {
+      if (mapRS2catc.count(vec_rs[i]) != 0) {
+        for (size_t j = 0; j < kc; j++) {
+          gsl_matrix_set(Ac, i, j, mapRS2catc[vec_rs[i]][j]);
+        }
       } else {
-	for (size_t j=0; j<kc; j++) {
-	  gsl_matrix_set(Ac, i, j, 0);
-	}
+        for (size_t j = 0; j < kc; j++) {
+          gsl_matrix_set(Ac, i, j, 0);
+        }
       }
     }
   }
 
-  if (kd>0) {
-    Ad=gsl_matrix_int_alloc(vec_rs.size(), kd);
+  if (kd > 0) {
+    Ad = gsl_matrix_int_alloc(vec_rs.size(), kd);
 
-    for (size_t i=0; i<vec_rs.size(); i++) {
-      if (mapRS2catd.count(vec_rs[i])!=0) {
-	for (size_t j=0; j<kd; j++) {
-	  gsl_matrix_int_set(Ad, i, j, mapRS2catd[vec_rs[i]][j]);
-	}
+    for (size_t i = 0; i < vec_rs.size(); i++) {
+      if (mapRS2catd.count(vec_rs[i]) != 0) {
+        for (size_t j = 0; j < kd; j++) {
+          gsl_matrix_int_set(Ad, i, j, mapRS2catd[vec_rs[i]][j]);
+        }
       } else {
-	for (size_t j=0; j<kd; j++) {
-	  gsl_matrix_int_set(Ad, i, j, 0);
-	}
+        for (size_t j = 0; j < kd; j++) {
+          gsl_matrix_int_set(Ad, i, j, 0);
+        }
       }
     }
 
-    dlevel=gsl_vector_int_alloc(kd);
+    dlevel = gsl_vector_int_alloc(kd);
     map<int, int> rcd;
     int val;
-    for (size_t j=0; j<kd; j++) {
+    for (size_t j = 0; j < kd; j++) {
       rcd.clear();
-      for (size_t i=0; i<Ad->size1; i++) {
-	val = gsl_matrix_int_get(Ad, i, j);
-	rcd[val] = 1;
+      for (size_t i = 0; i < Ad->size1; i++) {
+        val = gsl_matrix_int_get(Ad, i, j);
+        rcd[val] = 1;
       }
-      gsl_vector_int_set (dlevel, j, rcd.size());
+      gsl_vector_int_set(dlevel, j, rcd.size());
     }
   }
 
@@ -310,509 +330,531 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
   return;
 }
 
-void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) {
+void BSLMMDAP::WriteResult(const gsl_matrix *Hyper, const gsl_matrix *BF) {
   string file_bf, file_hyp;
-	file_bf=path_out+"/"+file_out;
-	file_bf+=".bf.txt";
-	file_hyp=path_out+"/"+file_out;
-	file_hyp+=".hyp.txt";
-
-	ofstream outfile_bf, outfile_hyp;
-
-	outfile_bf.open (file_bf.c_str(), ofstream::out);
-	outfile_hyp.open (file_hyp.c_str(), ofstream::out);
-
-	if (!outfile_bf) {
-	  cout<<"error writing file: "<<file_bf<<endl;
-	  return;
-	}
-	if (!outfile_hyp) {
-	  cout<<"error writing file: "<<file_hyp<<endl;
-	  return;
-	}
-
-	outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<
-	  "weight"<<endl;
-	outfile_hyp<<scientific;
-	for (size_t i=0; i<Hyper->size1; i++) {
-	  for (size_t j=0; j<Hyper->size2; j++) {
-	    outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t";
-	  }
-	  outfile_hyp<<endl;
-	}
-
-	outfile_bf<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss";
-	for (size_t i=0; i<BF->size2; i++) {
-	  outfile_bf<<"\t"<<"BF"<<i+1;
-	}
-	outfile_bf<<endl;
-
-	size_t t=0;
-	for (size_t i=0; i<ns_total; ++i) {
-	  if (indicator_snp[i]==0) {continue;}
-
-	  outfile_bf<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
-		    <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss;
-
-	  outfile_bf<<scientific;
-	  for (size_t j=0; j<BF->size2; j++) {
-	    outfile_bf<<"\t"<<setprecision(6)<<gsl_matrix_get (BF, t, j);
-	  }
-	  outfile_bf<<endl;
-
-	  t++;
-	}
-
-	outfile_hyp.close();
-	outfile_hyp.clear();
-	outfile_bf.close();
-	outfile_bf.clear();
-	return;
+  file_bf = path_out + "/" + file_out;
+  file_bf += ".bf.txt";
+  file_hyp = path_out + "/" + file_out;
+  file_hyp += ".hyp.txt";
+
+  ofstream outfile_bf, outfile_hyp;
+
+  outfile_bf.open(file_bf.c_str(), ofstream::out);
+  outfile_hyp.open(file_hyp.c_str(), ofstream::out);
+
+  if (!outfile_bf) {
+    cout << "error writing file: " << file_bf << endl;
+    return;
+  }
+  if (!outfile_hyp) {
+    cout << "error writing file: " << file_hyp << endl;
+    return;
+  }
+
+  outfile_hyp << "h"
+              << "\t"
+              << "rho"
+              << "\t"
+              << "sa2"
+              << "\t"
+              << "sb2"
+              << "\t"
+              << "weight" << endl;
+  outfile_hyp << scientific;
+  for (size_t i = 0; i < Hyper->size1; i++) {
+    for (size_t j = 0; j < Hyper->size2; j++) {
+      outfile_hyp << setprecision(6) << gsl_matrix_get(Hyper, i, j) << "\t";
+    }
+    outfile_hyp << endl;
+  }
+
+  outfile_bf << "chr"
+             << "\t"
+             << "rs"
+             << "\t"
+             << "ps"
+             << "\t"
+             << "n_miss";
+  for (size_t i = 0; i < BF->size2; i++) {
+    outfile_bf << "\t"
+               << "BF" << i + 1;
+  }
+  outfile_bf << endl;
+
+  size_t t = 0;
+  for (size_t i = 0; i < ns_total; ++i) {
+    if (indicator_snp[i] == 0) {
+      continue;
+    }
+
+    outfile_bf << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+               << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss;
+
+    outfile_bf << scientific;
+    for (size_t j = 0; j < BF->size2; j++) {
+      outfile_bf << "\t" << setprecision(6) << gsl_matrix_get(BF, t, j);
+    }
+    outfile_bf << endl;
+
+    t++;
+  }
+
+  outfile_hyp.close();
+  outfile_hyp.clear();
+  outfile_bf.close();
+  outfile_bf.clear();
+  return;
 }
 
-void BSLMMDAP::WriteResult (const vector<string> &vec_rs,
-			    const gsl_matrix *Hyper, const gsl_vector *pip,
-			    const gsl_vector *coef) {
+void BSLMMDAP::WriteResult(const vector<string> &vec_rs,
+                           const gsl_matrix *Hyper, const gsl_vector *pip,
+                           const gsl_vector *coef) {
   string file_gamma, file_hyp, file_coef;
-	file_gamma=path_out+"/"+file_out;
-	file_gamma+=".gamma.txt";
-	file_hyp=path_out+"/"+file_out;
-	file_hyp+=".hyp.txt";
-	file_coef=path_out+"/"+file_out;
-	file_coef+=".coef.txt";
-
-	ofstream outfile_gamma, outfile_hyp, outfile_coef;
-
-	outfile_gamma.open (file_gamma.c_str(), ofstream::out);
-	outfile_hyp.open (file_hyp.c_str(), ofstream::out);
-	outfile_coef.open (file_coef.c_str(), ofstream::out);
-
-	if (!outfile_gamma) {
-	  cout<<"error writing file: "<<file_gamma<<endl;
-	  return;
-	}
-	if (!outfile_hyp) {
-	  cout<<"error writing file: "<<file_hyp<<endl;
-	  return;
-	}
-	if (!outfile_coef) {
-	  cout<<"error writing file: "<<file_coef<<endl;
-	  return;
-	}
-
-	outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<
-	  "weight"<<endl;
-	outfile_hyp<<scientific;
-	for (size_t i=0; i<Hyper->size1; i++) {
-	  for (size_t j=0; j<Hyper->size2; j++) {
-	    outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t";
-	  }
-	  outfile_hyp<<endl;
-	}
-
-	outfile_gamma<<"rs"<<"\t"<<"gamma"<<endl;
-	for (size_t i=0; i<vec_rs.size(); ++i) {
-	  outfile_gamma<<vec_rs[i]<<"\t"<<scientific<<setprecision(6)<<
-	    gsl_vector_get(pip, i)<<endl;
-	}
-
-	outfile_coef<<"coef"<<endl;
-	outfile_coef<<scientific;
-	for (size_t i=0; i<coef->size; i++) {
-	  outfile_coef<<setprecision(6)<<gsl_vector_get (coef, i)<<endl;
-	}
-
-	outfile_coef.close();
-	outfile_coef.clear();
-	outfile_hyp.close();
-	outfile_hyp.clear();
-	outfile_gamma.close();
-	outfile_gamma.clear();
-	return;
-}
+  file_gamma = path_out + "/" + file_out;
+  file_gamma += ".gamma.txt";
+  file_hyp = path_out + "/" + file_out;
+  file_hyp += ".hyp.txt";
+  file_coef = path_out + "/" + file_out;
+  file_coef += ".coef.txt";
 
+  ofstream outfile_gamma, outfile_hyp, outfile_coef;
 
-double BSLMMDAP::CalcMarginal (const gsl_vector *Uty,
-			       const gsl_vector *K_eval,
-			       const double sigma_b2, const double tau) {
-	gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
+  outfile_gamma.open(file_gamma.c_str(), ofstream::out);
+  outfile_hyp.open(file_hyp.c_str(), ofstream::out);
+  outfile_coef.open(file_coef.c_str(), ofstream::out);
 
-	double logm=0.0;
-	double d, uy, Hi_yy=0, logdet_H=0.0;
-	for (size_t i=0; i<ni_test; ++i) {
-		d=gsl_vector_get (K_eval, i)*sigma_b2;
-		d=1.0/(d+1.0);
-		gsl_vector_set (weight_Hi, i, d);
+  if (!outfile_gamma) {
+    cout << "error writing file: " << file_gamma << endl;
+    return;
+  }
+  if (!outfile_hyp) {
+    cout << "error writing file: " << file_hyp << endl;
+    return;
+  }
+  if (!outfile_coef) {
+    cout << "error writing file: " << file_coef << endl;
+    return;
+  }
 
-		logdet_H-=log(d);
-		uy=gsl_vector_get (Uty, i);
-		Hi_yy+=d*uy*uy;
-	}
+  outfile_hyp << "h"
+              << "\t"
+              << "rho"
+              << "\t"
+              << "sa2"
+              << "\t"
+              << "sb2"
+              << "\t"
+              << "weight" << endl;
+  outfile_hyp << scientific;
+  for (size_t i = 0; i < Hyper->size1; i++) {
+    for (size_t j = 0; j < Hyper->size2; j++) {
+      outfile_hyp << setprecision(6) << gsl_matrix_get(Hyper, i, j) << "\t";
+    }
+    outfile_hyp << endl;
+  }
 
-	// Calculate likelihood.
-	logm=-0.5*logdet_H-0.5*tau*Hi_yy+0.5*log(tau)*(double)ni_test;
+  outfile_gamma << "rs"
+                << "\t"
+                << "gamma" << endl;
+  for (size_t i = 0; i < vec_rs.size(); ++i) {
+    outfile_gamma << vec_rs[i] << "\t" << scientific << setprecision(6)
+                  << gsl_vector_get(pip, i) << endl;
+  }
 
-	gsl_vector_free (weight_Hi);
+  outfile_coef << "coef" << endl;
+  outfile_coef << scientific;
+  for (size_t i = 0; i < coef->size; i++) {
+    outfile_coef << setprecision(6) << gsl_vector_get(coef, i) << endl;
+  }
 
-	return logm;
+  outfile_coef.close();
+  outfile_coef.clear();
+  outfile_hyp.close();
+  outfile_hyp.clear();
+  outfile_gamma.close();
+  outfile_gamma.clear();
+  return;
 }
 
-double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma,
-			       const gsl_vector *Uty,
-			       const gsl_vector *K_eval,
-			       const double sigma_a2,
-			       const double sigma_b2, const double tau) {
-  clock_t  time_start;
-	double logm=0.0;
-	double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;
-
-	gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
-						    UtXgamma->size2);
-	gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
-	gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
-	gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
-	gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
-
-	gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
-
-	logdet_H=0.0; P_yy=0.0;
-	for (size_t i=0; i<ni_test; ++i) {
-		gsl_vector_view UtXgamma_row=gsl_matrix_row(UtXgamma_eval,i);
-		d=gsl_vector_get (K_eval, i)*sigma_b2;
-		d=1.0/(d+1.0);
-		gsl_vector_set (weight_Hi, i, d);
-
-		logdet_H-=log(d);
-		uy=gsl_vector_get (Uty, i);
-		P_yy+=d*uy*uy;
-		gsl_vector_scale (&UtXgamma_row.vector, d);
-	}
-
-	// Calculate Omega.
-	gsl_matrix_set_identity (Omega);
-
-	time_start=clock();
-	lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval,
-		      UtXgamma, 1.0, Omega);
-	time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	// Calculate beta_hat.
-	gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
-
-	logdet_O=CholeskySolve(Omega, XtHiy, beta_hat);
-
-	gsl_vector_scale (beta_hat, sigma_a2);
-
-	gsl_blas_ddot (XtHiy, beta_hat, &d);
-	P_yy-=d;
-
-	gsl_matrix_free (UtXgamma_eval);
-	gsl_matrix_free (Omega);
-	gsl_vector_free (XtHiy);
-	gsl_vector_free (beta_hat);
-	gsl_vector_free (weight_Hi);
-
-	logm=-0.5*logdet_H-0.5*logdet_O-0.5*tau*P_yy+0.5*log(tau)*
-	  (double)ni_test;
-
-	return logm;
+double BSLMMDAP::CalcMarginal(const gsl_vector *Uty, const gsl_vector *K_eval,
+                              const double sigma_b2, const double tau) {
+  gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size);
+
+  double logm = 0.0;
+  double d, uy, Hi_yy = 0, logdet_H = 0.0;
+  for (size_t i = 0; i < ni_test; ++i) {
+    d = gsl_vector_get(K_eval, i) * sigma_b2;
+    d = 1.0 / (d + 1.0);
+    gsl_vector_set(weight_Hi, i, d);
+
+    logdet_H -= log(d);
+    uy = gsl_vector_get(Uty, i);
+    Hi_yy += d * uy * uy;
+  }
+
+  // Calculate likelihood.
+  logm = -0.5 * logdet_H - 0.5 * tau * Hi_yy + 0.5 * log(tau) * (double)ni_test;
+
+  gsl_vector_free(weight_Hi);
+
+  return logm;
 }
 
-double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) {
-  double logprior=0;
-  logprior=((double)cHyp.n_gamma-1.0)*cHyp.logp+
-    ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
+double BSLMMDAP::CalcMarginal(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+                              const gsl_vector *K_eval, const double sigma_a2,
+                              const double sigma_b2, const double tau) {
+  clock_t time_start;
+  double logm = 0.0;
+  double d, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0;
+
+  gsl_matrix *UtXgamma_eval =
+      gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2);
+  gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
+  gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2);
+  gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2);
+  gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1);
+
+  gsl_matrix_memcpy(UtXgamma_eval, UtXgamma);
+
+  logdet_H = 0.0;
+  P_yy = 0.0;
+  for (size_t i = 0; i < ni_test; ++i) {
+    gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i);
+    d = gsl_vector_get(K_eval, i) * sigma_b2;
+    d = 1.0 / (d + 1.0);
+    gsl_vector_set(weight_Hi, i, d);
+
+    logdet_H -= log(d);
+    uy = gsl_vector_get(Uty, i);
+    P_yy += d * uy * uy;
+    gsl_vector_scale(&UtXgamma_row.vector, d);
+  }
+
+  // Calculate Omega.
+  gsl_matrix_set_identity(Omega);
+
+  time_start = clock();
+  lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0,
+               Omega);
+  time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+  // Calculate beta_hat.
+  gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
+
+  logdet_O = CholeskySolve(Omega, XtHiy, beta_hat);
+
+  gsl_vector_scale(beta_hat, sigma_a2);
+
+  gsl_blas_ddot(XtHiy, beta_hat, &d);
+  P_yy -= d;
+
+  gsl_matrix_free(UtXgamma_eval);
+  gsl_matrix_free(Omega);
+  gsl_vector_free(XtHiy);
+  gsl_vector_free(beta_hat);
+  gsl_vector_free(weight_Hi);
+
+  logm = -0.5 * logdet_H - 0.5 * logdet_O - 0.5 * tau * P_yy +
+         0.5 * log(tau) * (double)ni_test;
+
+  return logm;
+}
+
+double BSLMMDAP::CalcPrior(class HYPBSLMM &cHyp) {
+  double logprior = 0;
+  logprior =
+      ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+      ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
   return logprior;
 }
 
 // Where A is the ni_test by n_cat matrix of annotations.
-void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
-			   const gsl_vector *Uty, const gsl_vector *K_eval,
-			   const gsl_vector *y) {
-	clock_t time_start;
-
-	// Set up BF.
-	double tau, h, rho, sigma_a2, sigma_b2, d;
-	size_t ns_causal=10;
-	size_t n_grid=h_ngrid*rho_ngrid;
-	vector<double> vec_sa2, vec_sb2, logm_null;
-
-	gsl_matrix *BF=gsl_matrix_alloc(ns_test, n_grid);
-	gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1);
-	gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
-
-	// Compute tau by using yty.
-	gsl_blas_ddot (Uty, Uty, &tau);
-	tau=(double)ni_test/tau;
-
-	// Set up grid values for sigma_a2 and sigma_b2 based on an
-	// approximately even grid for h and rho, and a fixed number
-	// of causals.
-	size_t ij=0;
-	for (size_t i=0; i<h_ngrid; i++) {
-	  h=h_min+(h_max-h_min)*(double)i/((double)h_ngrid-1);
-	  for (size_t j=0; j<rho_ngrid; j++) {
-	    rho=rho_min+(rho_max-rho_min)*(double)j/((double)rho_ngrid-1);
-
-	    sigma_a2=h*rho/((1-h)*(double)ns_causal);
-	    sigma_b2=h*(1.0-rho)/(trace_G*(1-h));
-
-	    vec_sa2.push_back(sigma_a2);
-	    vec_sb2.push_back(sigma_b2);
-	    logm_null.push_back(CalcMarginal (Uty, K_eval, 0.0, tau));
-
-	    gsl_matrix_set (Hyper, ij, 0, h);
-	    gsl_matrix_set (Hyper, ij, 1, rho);
-	    gsl_matrix_set (Hyper, ij, 2, sigma_a2);
-	    gsl_matrix_set (Hyper, ij, 3, sigma_b2);
-	    gsl_matrix_set (Hyper, ij, 4, 1/(double)n_grid);
-	    ij++;
-	  }
-	}
-
-	// Compute BF factors.
-	time_start=clock();
-	cout<<"Calculating BF..."<<endl;
-	for (size_t t=0; t<ns_test; t++) {
-	  gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, 0);
-	  gsl_vector_const_view X_col=gsl_matrix_const_column (UtX, t);
-	  gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);
-
-	  for (size_t ij=0; ij<n_grid; ij++) {
-	    sigma_a2=vec_sa2[ij];
-	    sigma_b2=vec_sb2[ij];
-
-	    d=CalcMarginal (Xgamma, Uty, K_eval, sigma_a2, sigma_b2, tau);
-	    d-=logm_null[ij];
-	    d=exp(d);
-
-	    gsl_matrix_set(BF, t, ij, d);
-	  }
-	}
-	time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	// Save results.
-	WriteResult (Hyper, BF);
-
-	// Free matrices and vectors.
-	gsl_matrix_free(BF);
-	gsl_matrix_free(Xgamma);
-	gsl_matrix_free(Hyper);
-	return;
+void BSLMMDAP::DAP_CalcBF(const gsl_matrix *U, const gsl_matrix *UtX,
+                          const gsl_vector *Uty, const gsl_vector *K_eval,
+                          const gsl_vector *y) {
+  clock_t time_start;
+
+  // Set up BF.
+  double tau, h, rho, sigma_a2, sigma_b2, d;
+  size_t ns_causal = 10;
+  size_t n_grid = h_ngrid * rho_ngrid;
+  vector<double> vec_sa2, vec_sb2, logm_null;
+
+  gsl_matrix *BF = gsl_matrix_alloc(ns_test, n_grid);
+  gsl_matrix *Xgamma = gsl_matrix_alloc(ni_test, 1);
+  gsl_matrix *Hyper = gsl_matrix_alloc(n_grid, 5);
+
+  // Compute tau by using yty.
+  gsl_blas_ddot(Uty, Uty, &tau);
+  tau = (double)ni_test / tau;
+
+  // Set up grid values for sigma_a2 and sigma_b2 based on an
+  // approximately even grid for h and rho, and a fixed number
+  // of causals.
+  size_t ij = 0;
+  for (size_t i = 0; i < h_ngrid; i++) {
+    h = h_min + (h_max - h_min) * (double)i / ((double)h_ngrid - 1);
+    for (size_t j = 0; j < rho_ngrid; j++) {
+      rho = rho_min + (rho_max - rho_min) * (double)j / ((double)rho_ngrid - 1);
+
+      sigma_a2 = h * rho / ((1 - h) * (double)ns_causal);
+      sigma_b2 = h * (1.0 - rho) / (trace_G * (1 - h));
+
+      vec_sa2.push_back(sigma_a2);
+      vec_sb2.push_back(sigma_b2);
+      logm_null.push_back(CalcMarginal(Uty, K_eval, 0.0, tau));
+
+      gsl_matrix_set(Hyper, ij, 0, h);
+      gsl_matrix_set(Hyper, ij, 1, rho);
+      gsl_matrix_set(Hyper, ij, 2, sigma_a2);
+      gsl_matrix_set(Hyper, ij, 3, sigma_b2);
+      gsl_matrix_set(Hyper, ij, 4, 1 / (double)n_grid);
+      ij++;
+    }
+  }
+
+  // Compute BF factors.
+  time_start = clock();
+  cout << "Calculating BF..." << endl;
+  for (size_t t = 0; t < ns_test; t++) {
+    gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, 0);
+    gsl_vector_const_view X_col = gsl_matrix_const_column(UtX, t);
+    gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector);
+
+    for (size_t ij = 0; ij < n_grid; ij++) {
+      sigma_a2 = vec_sa2[ij];
+      sigma_b2 = vec_sb2[ij];
+
+      d = CalcMarginal(Xgamma, Uty, K_eval, sigma_a2, sigma_b2, tau);
+      d -= logm_null[ij];
+      d = exp(d);
+
+      gsl_matrix_set(BF, t, ij, d);
+    }
+  }
+  time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+  // Save results.
+  WriteResult(Hyper, BF);
+
+  // Free matrices and vectors.
+  gsl_matrix_free(BF);
+  gsl_matrix_free(Xgamma);
+  gsl_matrix_free(Hyper);
+  return;
 }
 
 void single_ct_regression(const gsl_matrix_int *Xd,
-			  const gsl_vector_int *dlevel,
-			  const gsl_vector *pip_vec,
-			  gsl_vector *coef, gsl_vector *prior_vec) {
+                          const gsl_vector_int *dlevel,
+                          const gsl_vector *pip_vec, gsl_vector *coef,
+                          gsl_vector *prior_vec) {
 
-  map<int,double> sum_pip;
-  map<int,double> sum;
+  map<int, double> sum_pip;
+  map<int, double> sum;
 
-  int levels = gsl_vector_int_get(dlevel,0);
+  int levels = gsl_vector_int_get(dlevel, 0);
 
-  for(int i=0;i<levels;i++){
+  for (int i = 0; i < levels; i++) {
     sum_pip[i] = sum[i] = 0;
   }
 
-  for(int i=0;i<Xd->size1;i++){
-    int cat = gsl_matrix_int_get(Xd,i,0);
-    sum_pip[cat] += gsl_vector_get(pip_vec,i);
+  for (int i = 0; i < Xd->size1; i++) {
+    int cat = gsl_matrix_int_get(Xd, i, 0);
+    sum_pip[cat] += gsl_vector_get(pip_vec, i);
     sum[cat] += 1;
   }
 
-  for(int i=0;i<Xd->size1;i++){
-    int cat = gsl_matrix_int_get(Xd,i,0);
-    gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]);
+  for (int i = 0; i < Xd->size1; i++) {
+    int cat = gsl_matrix_int_get(Xd, i, 0);
+    gsl_vector_set(prior_vec, i, sum_pip[cat] / sum[cat]);
   }
 
-  for(int i=0;i<levels;i++){
-    double new_prior = sum_pip[i]/sum[i];
-    gsl_vector_set(coef, i, log(new_prior/(1-new_prior)) );
+  for (int i = 0; i < levels; i++) {
+    double new_prior = sum_pip[i] / sum[i];
+    gsl_vector_set(coef, i, log(new_prior / (1 - new_prior)));
   }
 
   return;
 }
 
 // Where A is the ni_test by n_cat matrix of annotations.
-void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd,
-				  const vector<string> &vec_rs,
-				  const vector<double> &vec_sa2,
-				  const vector<double> &vec_sb2,
-				  const vector<double> &wab,
-				  const vector<vector<vector<double> > > &BF,
-				  gsl_matrix *Ac, gsl_matrix_int *Ad,
-				  gsl_vector_int *dlevel) {
-	clock_t time_start;
-
-	// Set up BF.
-	double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
-	size_t t1, t2;
-	size_t n_grid=wab.size(), ns_test=vec_rs.size();
-
-	gsl_vector *prior_vec=gsl_vector_alloc(ns_test);
-	gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
-	gsl_vector *pip=gsl_vector_alloc(ns_test);
-	gsl_vector *coef=gsl_vector_alloc(kc+kd+1);
-
-	// Perform the EM algorithm.
-	vector<double> vec_wab, vec_wab_new;
-
-	// Initial values.
-	for (size_t t=0; t<ns_test; t++) {
-	  gsl_vector_set (prior_vec, t, (double)BF.size()/(double)ns_test);
-	}
-	for (size_t ij=0; ij<n_grid; ij++) {
-	  vec_wab.push_back(wab[ij]);
-	  vec_wab_new.push_back(wab[ij]);
-	}
-
-	// EM iteration.
-	size_t it=0;
-	double dif=1;
-	while (it<100 && dif>1e-3) {
-
-	  // Update E_gamma.
-	  t1=0, t2=0;
-	  for (size_t b=0; b<BF.size(); b++) {
-	    s=1;
-	    for (size_t m=0; m<BF[b].size(); m++) {
-	      d=0;
-	      for (size_t ij=0; ij<n_grid; ij++) {
-		d+=vec_wab_new[ij]*BF[b][m][ij];
-	      }
-	      d*=gsl_vector_get(prior_vec,t1)/(1-gsl_vector_get(prior_vec,t1));
-
-	      gsl_vector_set(pip, t1, d);
-	      s+=d;
-	      t1++;
-	    }
-
-	    for (size_t m=0; m<BF[b].size(); m++) {
-	      d=gsl_vector_get(pip, t2)/s;
-	      gsl_vector_set(pip, t2, d);
-	      t2++;
-	    }
-	  }
-
-	  // Update E_wab.
-	  s=0;
-	  for (size_t ij=0; ij<n_grid; ij++) {
-	    vec_wab_new[ij]=0;
-
-	    t1=0;
-	    for (size_t b=0; b<BF.size(); b++) {
-	      d=1;
-	      for (size_t m=0; m<BF[b].size(); m++) {
-		d+=gsl_vector_get(prior_vec, t1)/
-		  (1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij];
-		t1++;
-	      }
-	      vec_wab_new[ij]+=log(d);
-	    }
-
-	    s=max(s, vec_wab_new[ij]);
-	  }
-
-	  d=0;
-	  for (size_t ij=0; ij<n_grid; ij++) {
-	    vec_wab_new[ij]=exp(vec_wab_new[ij]-s);
-	    d+=vec_wab_new[ij];
-	  }
-
-	  for (size_t ij=0; ij<n_grid; ij++) {
-	    vec_wab_new[ij]/=d;
-	  }
-
-	  // Update coef, and pi.
-	  if(kc==0 && kd==0){
-
-	    // No annotation.
-	    s=0;
-	    for (size_t t=0; t<pip->size; t++) {
-	      s+=gsl_vector_get(pip, t);
-	    }
-	    s=s/(double)pip->size;
-	    for (size_t t=0; t<pip->size; t++) {
-	      gsl_vector_set(prior_vec, t, s);
-	    }
-
-	    gsl_vector_set (coef, 0, log(s/(1-s)));
-	  } else if(kc==0 && kd!=0){
-
-	    // Only discrete annotations.
-	    if(kd == 1){
-	      single_ct_regression(Ad, dlevel, pip, coef, prior_vec);
-	    }else{
-	      logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0);
-	      logistic_cat_pred(coef, Ad, dlevel, prior_vec);
-	    }
-	  } else if (kc!=0 && kd==0) {
-
-	    // Only continuous annotations.
-	    logistic_cont_fit(coef, Ac, pip, 0, 0);
-	    logistic_cont_pred(coef, Ac, prior_vec);
-	  } else if (kc!=0 && kd!=0) {
-
-	    // Both continuous and categorical annotations.
-	    logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0);
-	    logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec);
-	  }
-
-	  // Compute marginal likelihood.
-	  logm=0;
-
-	  t1=0;
-	  for (size_t b=0; b<BF.size(); b++) {
-	    d=1; s=0;
-	    for (size_t m=0; m<BF[b].size(); m++) {
-	      s+=log(1-gsl_vector_get(prior_vec, t1));
-	      for (size_t ij=0; ij<n_grid; ij++) {
-		d+=gsl_vector_get(prior_vec, t1)/
-		  (1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij];
-	      }
-	    }
-	    logm+=log(d)+s;
-	    t1++;
-	  }
-
-	  if (it>0) {
-	    dif=logm-logm_save;
-	  }
-	  logm_save=logm;
-	  it++;
-
-	  cout<<"iteration = "<<it<<"; marginal likelihood = "<<logm<<endl;
-	}
-
-	// Update h and rho that correspond to w_ab.
-	for (size_t ij=0; ij<n_grid; ij++) {
-	  sigma_a2=vec_sa2[ij];
-	  sigma_b2=vec_sb2[ij];
-
-	  d=exp(gsl_vector_get(coef, coef->size-1))/
-	    (1+exp(gsl_vector_get(coef, coef->size-1)));
-	  h=(d*(double)ns_test*sigma_a2+1*sigma_b2)/
-	    (1+d*(double)ns_test*sigma_a2+1*sigma_b2);
-	  rho=d*(double)ns_test*sigma_a2/
-	    (d*(double)ns_test*sigma_a2+1*sigma_b2);
-
-	  gsl_matrix_set (Hyper, ij, 0, h);
-	  gsl_matrix_set (Hyper, ij, 1, rho);
-	  gsl_matrix_set (Hyper, ij, 2, sigma_a2);
-	  gsl_matrix_set (Hyper, ij, 3, sigma_b2);
-	  gsl_matrix_set (Hyper, ij, 4, vec_wab_new[ij]);
-	}
-
-	// Obtain beta and alpha parameters.
-
-	// Save results.
-	WriteResult (vec_rs, Hyper, pip, coef);
-
-	// Free matrices and vectors.
-	gsl_vector_free(prior_vec);
-	gsl_matrix_free(Hyper);
-	gsl_vector_free(pip);
-	gsl_vector_free(coef);
-	return;
+void BSLMMDAP::DAP_EstimateHyper(
+    const size_t kc, const size_t kd, const vector<string> &vec_rs,
+    const vector<double> &vec_sa2, const vector<double> &vec_sb2,
+    const vector<double> &wab, const vector<vector<vector<double>>> &BF,
+    gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) {
+  clock_t time_start;
+
+  // Set up BF.
+  double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
+  size_t t1, t2;
+  size_t n_grid = wab.size(), ns_test = vec_rs.size();
+
+  gsl_vector *prior_vec = gsl_vector_alloc(ns_test);
+  gsl_matrix *Hyper = gsl_matrix_alloc(n_grid, 5);
+  gsl_vector *pip = gsl_vector_alloc(ns_test);
+  gsl_vector *coef = gsl_vector_alloc(kc + kd + 1);
+
+  // Perform the EM algorithm.
+  vector<double> vec_wab, vec_wab_new;
+
+  // Initial values.
+  for (size_t t = 0; t < ns_test; t++) {
+    gsl_vector_set(prior_vec, t, (double)BF.size() / (double)ns_test);
+  }
+  for (size_t ij = 0; ij < n_grid; ij++) {
+    vec_wab.push_back(wab[ij]);
+    vec_wab_new.push_back(wab[ij]);
+  }
+
+  // EM iteration.
+  size_t it = 0;
+  double dif = 1;
+  while (it < 100 && dif > 1e-3) {
+
+    // Update E_gamma.
+    t1 = 0, t2 = 0;
+    for (size_t b = 0; b < BF.size(); b++) {
+      s = 1;
+      for (size_t m = 0; m < BF[b].size(); m++) {
+        d = 0;
+        for (size_t ij = 0; ij < n_grid; ij++) {
+          d += vec_wab_new[ij] * BF[b][m][ij];
+        }
+        d *=
+            gsl_vector_get(prior_vec, t1) / (1 - gsl_vector_get(prior_vec, t1));
+
+        gsl_vector_set(pip, t1, d);
+        s += d;
+        t1++;
+      }
+
+      for (size_t m = 0; m < BF[b].size(); m++) {
+        d = gsl_vector_get(pip, t2) / s;
+        gsl_vector_set(pip, t2, d);
+        t2++;
+      }
+    }
+
+    // Update E_wab.
+    s = 0;
+    for (size_t ij = 0; ij < n_grid; ij++) {
+      vec_wab_new[ij] = 0;
+
+      t1 = 0;
+      for (size_t b = 0; b < BF.size(); b++) {
+        d = 1;
+        for (size_t m = 0; m < BF[b].size(); m++) {
+          d += gsl_vector_get(prior_vec, t1) /
+               (1 - gsl_vector_get(prior_vec, t1)) * vec_wab[ij] * BF[b][m][ij];
+          t1++;
+        }
+        vec_wab_new[ij] += log(d);
+      }
+
+      s = max(s, vec_wab_new[ij]);
+    }
+
+    d = 0;
+    for (size_t ij = 0; ij < n_grid; ij++) {
+      vec_wab_new[ij] = exp(vec_wab_new[ij] - s);
+      d += vec_wab_new[ij];
+    }
+
+    for (size_t ij = 0; ij < n_grid; ij++) {
+      vec_wab_new[ij] /= d;
+    }
+
+    // Update coef, and pi.
+    if (kc == 0 && kd == 0) {
+
+      // No annotation.
+      s = 0;
+      for (size_t t = 0; t < pip->size; t++) {
+        s += gsl_vector_get(pip, t);
+      }
+      s = s / (double)pip->size;
+      for (size_t t = 0; t < pip->size; t++) {
+        gsl_vector_set(prior_vec, t, s);
+      }
+
+      gsl_vector_set(coef, 0, log(s / (1 - s)));
+    } else if (kc == 0 && kd != 0) {
+
+      // Only discrete annotations.
+      if (kd == 1) {
+        single_ct_regression(Ad, dlevel, pip, coef, prior_vec);
+      } else {
+        logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0);
+        logistic_cat_pred(coef, Ad, dlevel, prior_vec);
+      }
+    } else if (kc != 0 && kd == 0) {
+
+      // Only continuous annotations.
+      logistic_cont_fit(coef, Ac, pip, 0, 0);
+      logistic_cont_pred(coef, Ac, prior_vec);
+    } else if (kc != 0 && kd != 0) {
+
+      // Both continuous and categorical annotations.
+      logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0);
+      logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec);
+    }
+
+    // Compute marginal likelihood.
+    logm = 0;
+
+    t1 = 0;
+    for (size_t b = 0; b < BF.size(); b++) {
+      d = 1;
+      s = 0;
+      for (size_t m = 0; m < BF[b].size(); m++) {
+        s += log(1 - gsl_vector_get(prior_vec, t1));
+        for (size_t ij = 0; ij < n_grid; ij++) {
+          d += gsl_vector_get(prior_vec, t1) /
+               (1 - gsl_vector_get(prior_vec, t1)) * vec_wab[ij] * BF[b][m][ij];
+        }
+      }
+      logm += log(d) + s;
+      t1++;
+    }
+
+    if (it > 0) {
+      dif = logm - logm_save;
+    }
+    logm_save = logm;
+    it++;
+
+    cout << "iteration = " << it << "; marginal likelihood = " << logm << endl;
+  }
+
+  // Update h and rho that correspond to w_ab.
+  for (size_t ij = 0; ij < n_grid; ij++) {
+    sigma_a2 = vec_sa2[ij];
+    sigma_b2 = vec_sb2[ij];
+
+    d = exp(gsl_vector_get(coef, coef->size - 1)) /
+        (1 + exp(gsl_vector_get(coef, coef->size - 1)));
+    h = (d * (double)ns_test * sigma_a2 + 1 * sigma_b2) /
+        (1 + d * (double)ns_test * sigma_a2 + 1 * sigma_b2);
+    rho = d * (double)ns_test * sigma_a2 /
+          (d * (double)ns_test * sigma_a2 + 1 * sigma_b2);
+
+    gsl_matrix_set(Hyper, ij, 0, h);
+    gsl_matrix_set(Hyper, ij, 1, rho);
+    gsl_matrix_set(Hyper, ij, 2, sigma_a2);
+    gsl_matrix_set(Hyper, ij, 3, sigma_b2);
+    gsl_matrix_set(Hyper, ij, 4, vec_wab_new[ij]);
+  }
+
+  // Obtain beta and alpha parameters.
+
+  // Save results.
+  WriteResult(vec_rs, Hyper, pip, coef);
+
+  // Free matrices and vectors.
+  gsl_vector_free(prior_vec);
+  gsl_matrix_free(Hyper);
+  gsl_vector_free(pip);
+  gsl_vector_free(coef);
+  return;
 }
diff --git a/src/bslmmdap.h b/src/bslmmdap.h
index db5774b..dc05e34 100644
--- a/src/bslmmdap.h
+++ b/src/bslmmdap.h
@@ -19,97 +19,91 @@
 #ifndef __BSLMMDAP_H__
 #define __BSLMMDAP_H__
 
-#include <vector>
-#include <map>
-#include <gsl/gsl_rng.h>
-#include <gsl/gsl_randist.h>
 #include "param.h"
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_rng.h>
+#include <map>
+#include <vector>
 
 using namespace std;
 
 class BSLMMDAP {
 
 public:
-	// IO-related parameters.
-	int a_mode;
-	size_t d_pace;
-
-	string file_bfile;
-	string file_geno;
-	string file_out;
-	string path_out;
-
-	// LMM related parameters
-	double pve_null;
-	double pheno_mean;
-
-	// BSLMM MCMC related parameters
-	long int randseed;
-	double trace_G;
-
-	HYPBSLMM cHyp_initial;
-
-	// Summary statistics
-	size_t ni_total, ns_total; // Number of total individuals and SNPs.
-	size_t ni_test, ns_test;   // Number of individuals and SNPs
-				   // used for analysis.
-
-	double h_min, h_max, rho_min, rho_max;
-	size_t h_ngrid, rho_ngrid;
-
-	double time_UtZ;
-	double time_Omega;	// Time spent on optimization iterations.
-	double time_Proposal;   // Time spent on constructing the
-				// proposal distribution for gamma
-				// (i.e., lmm or lm analysis).
-
-        // Indicator for individuals (phenotypes): 0 missing, 1
-        // available for analysis.
-	vector<int> indicator_idv;
-
- 	// Sequence indicator for SNPs: 0 ignored because of (a) maf,
- 	// (b) miss, (c) non-poly; 1 available for analysis.
-	vector<int> indicator_snp;
-
-	vector<SNPINFO> snpInfo;  // Record SNP information.
-
-	// Main functions.
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
-
-	void WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF);
-	void WriteResult (const vector<string> &vec_rs,
-			  const gsl_matrix *Hyper, const gsl_vector *pip,
-			  const gsl_vector *coef);
-	double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval,
-			     const double sigma_b2, const double tau);
-	double CalcMarginal (const gsl_matrix *UtXgamma,
-			     const gsl_vector *Uty, const gsl_vector *K_eval,
-			     const double sigma_a2, const double sigma_b2,
-			     const double tau);
-	double CalcPrior (class HYPBSLMM &cHyp);
-
-	void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
-			 const gsl_vector *Uty, const gsl_vector *K_eval,
-			 const gsl_vector *y);
-	void DAP_EstimateHyper (const size_t kc, const size_t kd,
-				const vector<string> &vec_rs,
-				const vector<double> &vec_sa2,
-				const vector<double> &vec_sb2,
-				const vector<double> &wab,
-				const vector<vector<vector<double> > > &BF,
-				gsl_matrix *Ac, gsl_matrix_int *Ad,
-				gsl_vector_int *dlevel);
+  // IO-related parameters.
+  int a_mode;
+  size_t d_pace;
+
+  string file_bfile;
+  string file_geno;
+  string file_out;
+  string path_out;
+
+  // LMM related parameters
+  double pve_null;
+  double pheno_mean;
+
+  // BSLMM MCMC related parameters
+  long int randseed;
+  double trace_G;
+
+  HYPBSLMM cHyp_initial;
+
+  // Summary statistics
+  size_t ni_total, ns_total; // Number of total individuals and SNPs.
+  size_t ni_test, ns_test;   // Number of individuals and SNPs
+                             // used for analysis.
+
+  double h_min, h_max, rho_min, rho_max;
+  size_t h_ngrid, rho_ngrid;
+
+  double time_UtZ;
+  double time_Omega;    // Time spent on optimization iterations.
+  double time_Proposal; // Time spent on constructing the
+                        // proposal distribution for gamma
+                        // (i.e., lmm or lm analysis).
+
+  // Indicator for individuals (phenotypes): 0 missing, 1
+  // available for analysis.
+  vector<int> indicator_idv;
+
+  // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+  // (b) miss, (c) non-poly; 1 available for analysis.
+  vector<int> indicator_snp;
+
+  vector<SNPINFO> snpInfo; // Record SNP information.
+
+  // Main functions.
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
+
+  void WriteResult(const gsl_matrix *Hyper, const gsl_matrix *BF);
+  void WriteResult(const vector<string> &vec_rs, const gsl_matrix *Hyper,
+                   const gsl_vector *pip, const gsl_vector *coef);
+  double CalcMarginal(const gsl_vector *Uty, const gsl_vector *K_eval,
+                      const double sigma_b2, const double tau);
+  double CalcMarginal(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+                      const gsl_vector *K_eval, const double sigma_a2,
+                      const double sigma_b2, const double tau);
+  double CalcPrior(class HYPBSLMM &cHyp);
+
+  void DAP_CalcBF(const gsl_matrix *U, const gsl_matrix *UtX,
+                  const gsl_vector *Uty, const gsl_vector *K_eval,
+                  const gsl_vector *y);
+  void
+  DAP_EstimateHyper(const size_t kc, const size_t kd,
+                    const vector<string> &vec_rs, const vector<double> &vec_sa2,
+                    const vector<double> &vec_sb2, const vector<double> &wab,
+                    const vector<vector<vector<double>>> &BF, gsl_matrix *Ac,
+                    gsl_matrix_int *Ad, gsl_vector_int *dlevel);
 };
 
-void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
-		   vector<double> &vec_sb2, vector<double> &vec_wab);
-void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
-		  vector<vector<vector<double> > > &BF);
-void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
-		   gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
-		   size_t &kc, size_t &kd);
+void ReadFile_hyb(const string &file_hyp, vector<double> &vec_sa2,
+                  vector<double> &vec_sb2, vector<double> &vec_wab);
+void ReadFile_bf(const string &file_bf, vector<string> &vec_rs,
+                 vector<vector<vector<double>>> &BF);
+void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
+                  gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
+                  size_t &kc, size_t &kd);
 
 #endif
-
-
diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp
index 733dae1..a8c545c 100644
--- a/src/eigenlib.cpp
+++ b/src/eigenlib.cpp
@@ -16,13 +16,13 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
+#include "Eigen/Dense"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 #include <cmath>
+#include <iostream>
 #include <vector>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
-#include "Eigen/Dense"
 
 using namespace std;
 using namespace Eigen;
@@ -34,82 +34,79 @@ using namespace Eigen;
 // eigen, 1x or 0.3x slower than lapack
 // invert, 20x or 10x faster than lapack
 //
-void eigenlib_dgemm (const char *TransA, const char *TransB,
-		     const double alpha, const gsl_matrix *A,
-		     const gsl_matrix *B, const double beta,
-		     gsl_matrix *C) {
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
-    A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) );
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
-    B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda) );
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
-    C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda) );
+void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
+                    const gsl_matrix *A, const gsl_matrix *B, const double beta,
+                    gsl_matrix *C) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+      A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda));
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+      B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda));
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+      C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda));
 
-  if (*TransA=='N' || *TransA=='n') {
-    if (*TransB=='N' || *TransB=='n') {
-      C_mat=alpha*A_mat*B_mat+beta*C_mat;
+  if (*TransA == 'N' || *TransA == 'n') {
+    if (*TransB == 'N' || *TransB == 'n') {
+      C_mat = alpha * A_mat * B_mat + beta * C_mat;
     } else {
-      C_mat=alpha*A_mat*B_mat.transpose()+beta*C_mat;
+      C_mat = alpha * A_mat * B_mat.transpose() + beta * C_mat;
     }
   } else {
-    if (*TransB=='N' || *TransB=='n') {
-      C_mat=alpha*A_mat.transpose()*B_mat+beta*C_mat;
+    if (*TransB == 'N' || *TransB == 'n') {
+      C_mat = alpha * A_mat.transpose() * B_mat + beta * C_mat;
     } else {
-      C_mat=alpha*A_mat.transpose()*B_mat.transpose()+beta*C_mat;
+      C_mat = alpha * A_mat.transpose() * B_mat.transpose() + beta * C_mat;
     }
   }
 
   return;
 }
 
-void eigenlib_dgemv (const char *TransA, const double alpha,
-		     const gsl_matrix *A, const gsl_vector *x,
-		     const double beta, gsl_vector *y) {
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
-    A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) );
-  Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> >
-    x_vec(x->data, x->size, InnerStride<Dynamic>(x->stride) );
-  Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> >
-    y_vec(y->data, y->size, InnerStride<Dynamic>(y->stride) );
+void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A,
+                    const gsl_vector *x, const double beta, gsl_vector *y) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+      A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda));
+  Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic>> x_vec(
+      x->data, x->size, InnerStride<Dynamic>(x->stride));
+  Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic>> y_vec(
+      y->data, y->size, InnerStride<Dynamic>(y->stride));
 
-  if (*TransA=='N' || *TransA=='n') {
-    y_vec=alpha*A_mat*x_vec+beta*y_vec;
+  if (*TransA == 'N' || *TransA == 'n') {
+    y_vec = alpha * A_mat * x_vec + beta * y_vec;
   } else {
-    y_vec=alpha*A_mat.transpose()*x_vec+beta*y_vec;
+    y_vec = alpha * A_mat.transpose() * x_vec + beta * y_vec;
   }
 
   return;
 }
 
 void eigenlib_invert(gsl_matrix *A) {
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor> >
-    A_mat(A->data, A->size1, A->size2);
-  A_mat=A_mat.inverse();
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1,
+                                                        A->size2);
+  A_mat = A_mat.inverse();
   return;
 }
 
-void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A) {
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor> >
-    A_mat(A->data, A->size1, A->size2);
-  Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> >
-    b_vec(b->data, b->size, OuterStride<Dynamic>(b->stride) );
-  A_mat=alpha*b_vec*b_vec.transpose()+A_mat;
+void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1,
+                                                        A->size2);
+  Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> b_vec(
+      b->data, b->size, OuterStride<Dynamic>(b->stride));
+  A_mat = alpha * b_vec * b_vec.transpose() + A_mat;
   return;
 }
 
-void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U,
-			 gsl_vector *eval) {
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
-    G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda) );
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
-    U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda) );
-  Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> >
-    eval_vec(eval->data, eval->size, OuterStride<Dynamic>(eval->stride) );
+void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+      G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda));
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+      U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda));
+  Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> eval_vec(
+      eval->data, eval->size, OuterStride<Dynamic>(eval->stride));
 
   SelfAdjointEigenSolver<MatrixXd> es(G_mat);
   if (es.info() != Success)
     abort();
-  eval_vec=es.eigenvalues();
-  U_mat=es.eigenvectors();
+  eval_vec = es.eigenvalues();
+  U_mat = es.eigenvectors();
   return;
 }
diff --git a/src/eigenlib.h b/src/eigenlib.h
index 3659dc1..b29fa63 100644
--- a/src/eigenlib.h
+++ b/src/eigenlib.h
@@ -23,15 +23,13 @@
 
 using namespace std;
 
-void eigenlib_dgemm (const char *TransA, const char *TransB,
-		     const double alpha, const gsl_matrix *A,
-		     const gsl_matrix *B, const double beta,
-		     gsl_matrix *C);
-void eigenlib_dgemv (const char *TransA, const double alpha,
-		     const gsl_matrix *A, const gsl_vector *x,
-		     const double beta, gsl_vector *y);
+void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
+                    const gsl_matrix *A, const gsl_matrix *B, const double beta,
+                    gsl_matrix *C);
+void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A,
+                    const gsl_vector *x, const double beta, gsl_vector *y);
 void eigenlib_invert(gsl_matrix *A);
-void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A);
-void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval);
+void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A);
+void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval);
 
 #endif
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 1a9ca9b..c72475b 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -16,427 +16,670 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
+#include <cmath>
+#include <cstring>
+#include <ctime>
 #include <fstream>
+#include <iostream>
 #include <string>
-#include <cstring>
 #include <sys/stat.h>
-#include <ctime>
-#include <cmath>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
-#include "gsl/gsl_eigen.h"
 #include "gsl/gsl_cdf.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 
-#include "lapack.h"
-#include "io.h"
-#include "gemma.h"
-#include "vc.h"
-#include "lm.h"
 #include "bslmm.h"
 #include "bslmmdap.h"
+#include "gemma.h"
+#include "io.h"
+#include "lapack.h"
 #include "ldr.h"
+#include "lm.h"
 #include "lmm.h"
+#include "mathfunc.h"
 #include "mvlmm.h"
 #include "prdt.h"
 #include "varcov.h"
-#include "mathfunc.h"
+#include "vc.h"
 
 using namespace std;
 
-GEMMA::GEMMA(void):
-version("0.97"), date("07/27/2017"), year("2017")
-{}
-
-void GEMMA::PrintHeader (void) {
-  cout<<endl;
-  cout<<"*********************************************************"<<endl;
-  cout<<"  Genome-wide Efficient Mixed Model Association (GEMMA)  "<<endl;
-  cout<<"  Version "<<version<<", "<<date<<"                              "<<
-    endl;
-  cout<<"  Visit http://www.xzlab.org/software.html For Updates   "<<endl;
-  cout<<"  (C) "<<year<<" Xiang Zhou                                   "<<endl;
-  cout<<"  GNU General Public License                             "<<endl;
-  cout<<"  For Help, Type ./gemma -h                              "<<endl;
-  cout<<"*********************************************************"<<endl;
-  cout<<endl;
+GEMMA::GEMMA(void) : version("0.97"), date("07/27/2017"), year("2017") {}
+
+void GEMMA::PrintHeader(void) {
+  cout << endl;
+  cout << "*********************************************************" << endl;
+  cout << "  Genome-wide Efficient Mixed Model Association (GEMMA)  " << endl;
+  cout << "  Version " << version << ", " << date
+       << "                              " << endl;
+  cout << "  Visit http://www.xzlab.org/software.html For Updates   " << endl;
+  cout << "  (C) " << year << " Xiang Zhou                                   "
+       << endl;
+  cout << "  GNU General Public License                             " << endl;
+  cout << "  For Help, Type ./gemma -h                              " << endl;
+  cout << "*********************************************************" << endl;
+  cout << endl;
 
   return;
 }
 
-void GEMMA::PrintLicense (void) {
-	cout<<endl;
-	cout<<"The Software Is Distributed Under GNU General Public "<<
-	  "License, But May Also Require The Following Notifications."<<endl;
-	cout<<endl;
-
-	cout<<"Including Lapack Routines In The Software May Require"<<
-	  " The Following Notification:"<<endl;
-	cout<<"Copyright (c) 1992-2010 The University of Tennessee and "<<
-	  "The University of Tennessee Research Foundation.  All rights "<<
-	  "reserved."<<endl;
-	cout<<"Copyright (c) 2000-2010 The University of California "<<
-	  "Berkeley. All rights reserved."<<endl;
-	cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. "<<
-	  "All rights reserved."<<endl;
-	cout<<endl;
-
-	cout<<"$COPYRIGHT$"<<endl;
-	cout<<"Additional copyrights may follow"<<endl;
-	cout<<"$HEADER$"<<endl;
-	cout<<"Redistribution and use in source and binary forms, with or "<<
-	  "without modification, are permitted provided that the following "<<
-	  " conditions are met:"<<endl;
-	cout<<"- Redistributions of source code must retain the above "<<
-	  "copyright notice, this list of conditions and the following "<<
-	  "disclaimer."<<endl;
-	cout<<"- Redistributions in binary form must reproduce the above "<<
-	  "copyright notice, this list of conditions and the following "<<
-	  "disclaimer listed in this license in the documentation and/or "<<
-	  "other materials provided with the distribution."<<endl;
-	cout<<"- Neither the name of the copyright holders nor the names "<<
-	  "of its contributors may be used to endorse or promote products "<<
-	  "derived from this software without specific prior written "<<
-	  "permission."<<endl;
-	cout<<"The copyright holders provide no reassurances that the "<<
-	  "source code provided does not infringe any patent, copyright, "<<
-	  "or any other "<<
-	  "intellectual property rights of third parties. "<<
-	  "The copyright holders disclaim any liability to any recipient "<<
-	  "for claims brought against "<<
-	  "recipient by any third party for infringement of that parties "<<
-	  "intellectual property rights. "<<endl;
-	cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "<<
-	  "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "<<
-	  "INCLUDING, BUT NOT "<<
-	  "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "<<
-	  "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "<<
-	  "SHALL THE COPYRIGHT "<<
-	  "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "<<
-	  "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "<<
-	  "(INCLUDING, BUT NOT "<<
-	  "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "<<
-	  "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "<<
-	  "HOWEVER CAUSED AND ON ANY "<<
-	  "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "<<
-	  "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "<<
-	  "OUT OF THE USE "<<
-	  "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "<<
-	  "SUCH DAMAGE."<<endl;
-	cout<<endl;
-
-	return;
+void GEMMA::PrintLicense(void) {
+  cout << endl;
+  cout << "The Software Is Distributed Under GNU General Public "
+       << "License, But May Also Require The Following Notifications." << endl;
+  cout << endl;
+
+  cout << "Including Lapack Routines In The Software May Require"
+       << " The Following Notification:" << endl;
+  cout << "Copyright (c) 1992-2010 The University of Tennessee and "
+       << "The University of Tennessee Research Foundation.  All rights "
+       << "reserved." << endl;
+  cout << "Copyright (c) 2000-2010 The University of California "
+       << "Berkeley. All rights reserved." << endl;
+  cout << "Copyright (c) 2006-2010 The University of Colorado Denver. "
+       << "All rights reserved." << endl;
+  cout << endl;
+
+  cout << "$COPYRIGHT$" << endl;
+  cout << "Additional copyrights may follow" << endl;
+  cout << "$HEADER$" << endl;
+  cout << "Redistribution and use in source and binary forms, with or "
+       << "without modification, are permitted provided that the following "
+       << " conditions are met:" << endl;
+  cout << "- Redistributions of source code must retain the above "
+       << "copyright notice, this list of conditions and the following "
+       << "disclaimer." << endl;
+  cout << "- Redistributions in binary form must reproduce the above "
+       << "copyright notice, this list of conditions and the following "
+       << "disclaimer listed in this license in the documentation and/or "
+       << "other materials provided with the distribution." << endl;
+  cout << "- Neither the name of the copyright holders nor the names "
+       << "of its contributors may be used to endorse or promote products "
+       << "derived from this software without specific prior written "
+       << "permission." << endl;
+  cout << "The copyright holders provide no reassurances that the "
+       << "source code provided does not infringe any patent, copyright, "
+       << "or any other "
+       << "intellectual property rights of third parties. "
+       << "The copyright holders disclaim any liability to any recipient "
+       << "for claims brought against "
+       << "recipient by any third party for infringement of that parties "
+       << "intellectual property rights. " << endl;
+  cout << "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "
+       << "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "
+       << "INCLUDING, BUT NOT "
+       << "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "
+       << "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "
+       << "SHALL THE COPYRIGHT "
+       << "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "
+       << "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "
+       << "(INCLUDING, BUT NOT "
+       << "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "
+       << "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "
+       << "HOWEVER CAUSED AND ON ANY "
+       << "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "
+       << "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "
+       << "OUT OF THE USE "
+       << "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "
+       << "SUCH DAMAGE." << endl;
+  cout << endl;
+
+  return;
 }
 
 void GEMMA::PrintHelp(size_t option) {
-  if (option==0) {
-    cout<<endl;
-    cout<<" GEMMA version "<<version<<", released on "<<date<<endl;
-    cout<<" implemented by Xiang Zhou"<<endl;
-    cout<<endl;
-    cout<<" type ./gemma -h [num] for detailed helps"<<endl;
-    cout<<" options: " << endl;
-    cout<<" 1: quick guide"<<endl;
-    cout<<" 2: file I/O related"<<endl;
-    cout<<" 3: SNP QC"<<endl;
-    cout<<" 4: calculate relatedness matrix"<<endl;
-    cout<<" 5: perform eigen decomposition"<<endl;
-    cout<<" 6: perform variance component estimation"<<endl;
-    cout<<" 7: fit a linear model"<<endl;
-    cout<<" 8: fit a linear mixed model"<<endl;
-    cout<<" 9: fit a multivariate linear mixed model"<<endl;
-    cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl;
-    cout<<" 11: obtain predicted values"<<endl;
-    cout<<" 12: calculate snp variance covariance"<<endl;
-    cout<<" 13: note"<<endl;
-    cout<<endl;
+  if (option == 0) {
+    cout << endl;
+    cout << " GEMMA version " << version << ", released on " << date << endl;
+    cout << " implemented by Xiang Zhou" << endl;
+    cout << endl;
+    cout << " type ./gemma -h [num] for detailed helps" << endl;
+    cout << " options: " << endl;
+    cout << " 1: quick guide" << endl;
+    cout << " 2: file I/O related" << endl;
+    cout << " 3: SNP QC" << endl;
+    cout << " 4: calculate relatedness matrix" << endl;
+    cout << " 5: perform eigen decomposition" << endl;
+    cout << " 6: perform variance component estimation" << endl;
+    cout << " 7: fit a linear model" << endl;
+    cout << " 8: fit a linear mixed model" << endl;
+    cout << " 9: fit a multivariate linear mixed model" << endl;
+    cout << " 10: fit a Bayesian sparse linear mixed model" << endl;
+    cout << " 11: obtain predicted values" << endl;
+    cout << " 12: calculate snp variance covariance" << endl;
+    cout << " 13: note" << endl;
+    cout << endl;
   }
 
-  if (option==1) {
-    cout<<" QUICK GUIDE" << endl;
-    cout<<" to generate a relatedness matrix: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl;
-    cout<<" to generate the S matrix: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -gs -o [prefix]"<<endl;
-    cout<<"         ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl;
-    cout<<"         ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl;
-    cout<<"         ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl;
-    cout<<"         ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl;
-    cout<<"         ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl;
-    cout<<" to generate the q vector: "<<endl;
-    cout<<"         ./gemma -beta [filename] -gq -o [prefix]"<<endl;
-    cout<<"         ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl;
-    cout<<" to generate the ldsc weigthts: "<<endl;
-    cout<<"         ./gemma -beta [filename] -gw -o [prefix]"<<endl;
-    cout<<"         ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl;
-    cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl;
-    cout<<" to estimate variance components: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl;
-    cout<<"         options for the above two commands: -crt -windowbp [num]"<<endl;
-    cout<<"         ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl;
-    cout<<"         or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl;
-    cout<<"         ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl;
-    cout<<" to fit a linear mixed model: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
-    cout<<" to fit a linear mixed model to test g by e effects: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
-    cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
-    cout<<" to fit a multivariate linear mixed model: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
-    cout<<" to fit a Bayesian sparse linear mixed model: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl;
-    cout<<" to obtain predicted values: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
-    cout<<" to calculate correlations between SNPs: "<<endl;
-    cout<<"         ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl;
-    cout<<"         ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl;
-    cout<<endl;
+  if (option == 1) {
+    cout << " QUICK GUIDE" << endl;
+    cout << " to generate a relatedness matrix: " << endl;
+    cout << "         ./gemma -bfile [prefix] -gk [num] -o [prefix]" << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"
+         << endl;
+    cout << " to generate the S matrix: " << endl;
+    cout << "         ./gemma -bfile [prefix] -gs -o [prefix]" << endl;
+    cout << "         ./gemma -p [filename] -g [filename] -gs -o [prefix]"
+         << endl;
+    cout << "         ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"
+         << endl;
+    cout << "         ./gemma -p [filename] -g [filename] -cat [filename] -gs "
+            "-o [prefix]"
+         << endl;
+    cout << "         ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"
+         << endl;
+    cout << "         ./gemma -p [filename] -g [filename] -sample [num] -gs -o "
+            "[prefix]"
+         << endl;
+    cout << " to generate the q vector: " << endl;
+    cout << "         ./gemma -beta [filename] -gq -o [prefix]" << endl;
+    cout << "         ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"
+         << endl;
+    cout << " to generate the ldsc weigthts: " << endl;
+    cout << "         ./gemma -beta [filename] -gw -o [prefix]" << endl;
+    cout << "         ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"
+         << endl;
+    cout << " to perform eigen decomposition of the relatedness matrix: "
+         << endl;
+    cout << "         ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"
+         << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -k [filename] -eigen "
+            "-o [prefix]"
+         << endl;
+    cout << " to estimate variance components: " << endl;
+    cout << "         ./gemma -bfile [prefix] -k [filename] -vc [num] -o "
+            "[prefix]"
+         << endl;
+    cout << "         ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"
+         << endl;
+    cout << "         ./gemma -bfile [prefix] -mk [filename] -vc [num] -o "
+            "[prefix]"
+         << endl;
+    cout
+        << "         ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"
+        << endl;
+    cout << "         ./gemma -beta [filename] -cor [filename] -vc [num] -o "
+            "[prefix]"
+         << endl;
+    cout << "         ./gemma -beta [filename] -cor [filename] -cat [filename] "
+            "-vc [num] -o [prefix]"
+         << endl;
+    cout << "         options for the above two commands: -crt -windowbp [num]"
+         << endl;
+    cout << "         ./gemma -mq [filename] -ms [filename] -mv [filename] -vc "
+            "[num] -o [prefix]"
+         << endl;
+    cout << "         or with summary statistics, replace bfile with mbfile, "
+            "or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"
+         << endl;
+    cout << "         ./gemma -beta [filename] -bfile [filename] -cat "
+            "[filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"
+         << endl;
+    cout << "         ./gemma -beta [filename] -bfile [filename] -cat "
+            "[filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"
+         << endl;
+    cout << " to fit a linear mixed model: " << endl;
+    cout << "         ./gemma -bfile [prefix] -k [filename] -lmm [num] -o "
+            "[prefix]"
+         << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -a [filename] -k "
+            "[filename] -lmm [num] -o [prefix]"
+         << endl;
+    cout << " to fit a linear mixed model to test g by e effects: " << endl;
+    cout << "         ./gemma -bfile [prefix] -gxe [filename] -k [filename] "
+            "-lmm [num] -o [prefix]"
+         << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -a [filename] -gxe "
+            "[filename] -k [filename] -lmm [num] -o [prefix]"
+         << endl;
+    cout << " to fit a univariate linear mixed model with different residual "
+            "weights for different individuals: "
+         << endl;
+    cout << "         ./gemma -bfile [prefix] -weight [filename] -k [filename] "
+            "-lmm [num] -o [prefix]"
+         << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -a [filename] "
+            "-weight [filename] -k [filename] -lmm [num] -o [prefix]"
+         << endl;
+    cout << " to fit a multivariate linear mixed model: " << endl;
+    cout << "         ./gemma -bfile [prefix] -k [filename] -lmm [num] -n "
+            "[num1] [num2] -o [prefix]"
+         << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -a [filename] -k "
+            "[filename] -lmm [num] -n [num1] [num2] -o [prefix]"
+         << endl;
+    cout << " to fit a Bayesian sparse linear mixed model: " << endl;
+    cout << "         ./gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -a [filename] -bslmm "
+            "[num] -o [prefix]"
+         << endl;
+    cout << " to obtain predicted values: " << endl;
+    cout << "         ./gemma -bfile [prefix] -epm [filename] -emu [filename] "
+            "-ebv [filename] -k [filename] -predict [num] -o [prefix]"
+         << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -epm [filename] -emu "
+            "[filename] -ebv [filename] -k [filename] -predict [num] -o "
+            "[prefix]"
+         << endl;
+    cout << " to calculate correlations between SNPs: " << endl;
+    cout << "         ./gemma -bfile [prefix] -calccor -o [prefix]" << endl;
+    cout << "         ./gemma -g [filename] -p [filename] -calccor -o [prefix]"
+         << endl;
+    cout << endl;
   }
 
-  if (option==2) {
-    cout<<" FILE I/O RELATED OPTIONS" << endl;
-    cout<<" -bfile    [prefix]       "<<" specify input PLINK binary ped file prefix."<<endl;
-    cout<<"          requires: *.fam, *.bim and *.bed files"<<endl;
-    cout<<"          missing value: -9"<<endl;
-    cout<<" -g        [filename]     "<<" specify input BIMBAM mean genotype file name"<<endl;
-    cout<<"          format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
-    cout<<"                  rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
-    cout<<"                  ..."<<endl;
-    cout<<"          missing value: NA"<<endl;
-    cout<<" -p        [filename]     "<<" specify input BIMBAM phenotype file name"<<endl;
-    cout<<"          format: phenotype for individual 1"<<endl;
-    cout<<"                  phenotype for individual 2"<<endl;
-    cout<<"                  ..."<<endl;
-    cout<<"          missing value: NA"<<endl;
-    cout<<" -a        [filename]     "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl;
-    cout<<"          format: rs#1, base_position, chr_number"<<endl;
-    cout<<"                  rs#2, base_position, chr_number"<<endl;
-    cout<<"                  ..."<<endl;
+  if (option == 2) {
+    cout << " FILE I/O RELATED OPTIONS" << endl;
+    cout << " -bfile    [prefix]       "
+         << " specify input PLINK binary ped file prefix." << endl;
+    cout << "          requires: *.fam, *.bim and *.bed files" << endl;
+    cout << "          missing value: -9" << endl;
+    cout << " -g        [filename]     "
+         << " specify input BIMBAM mean genotype file name" << endl;
+    cout << "          format: rs#1, allele0, allele1, genotype for individual "
+            "1, genotype for individual 2, ..."
+         << endl;
+    cout << "                  rs#2, allele0, allele1, genotype for individual "
+            "1, genotype for individual 2, ..."
+         << endl;
+    cout << "                  ..." << endl;
+    cout << "          missing value: NA" << endl;
+    cout << " -p        [filename]     "
+         << " specify input BIMBAM phenotype file name" << endl;
+    cout << "          format: phenotype for individual 1" << endl;
+    cout << "                  phenotype for individual 2" << endl;
+    cout << "                  ..." << endl;
+    cout << "          missing value: NA" << endl;
+    cout << " -a        [filename]     "
+         << " specify input BIMBAM SNP annotation file name (optional)" << endl;
+    cout << "          format: rs#1, base_position, chr_number" << endl;
+    cout << "                  rs#2, base_position, chr_number" << endl;
+    cout << "                  ..." << endl;
 
     // WJA added.
-    cout<<" -oxford    [prefix]       "<<" specify input Oxford genotype bgen file prefix."<<endl;
-    cout<<"          requires: *.bgen, *.sample files"<<endl;
-
-    cout<<" -gxe      [filename]     "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl;
-    cout<<"          format: variable for individual 1"<<endl;
-    cout<<"                  variable for individual 2"<<endl;
-    cout<<"                  ..."<<endl;
-    cout<<"          missing value: NA"<<endl;
-    cout<<" -widv   [filename]     "<<" specify input file that contains a column of residual weights"<<endl;
-    cout<<"          format: variable for individual 1"<<endl;
-    cout<<"                  variable for individual 2"<<endl;
-    cout<<"                  ..."<<endl;
-    cout<<"          missing value: NA"<<endl;
-    cout<<" -k        [filename]     "<<" specify input kinship/relatedness matrix file name"<<endl;
-    cout<<" -mk       [filename]     "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl;
-    cout<<" -u        [filename]     "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl;
-    cout<<" -d        [filename]     "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl;
-    cout<<" -c        [filename]     "<<" specify input covariates file name (optional)"<<endl;
-    cout<<" -cat      [filename]     "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl;
-    cout<<" -beta     [filename]     "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl;
-    cout<<" -cor      [filename]     "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl;
-    cout<<"          missing value: NA"<<endl;
-    cout<<"          note: the intercept (a column of 1s) may need to be included"<<endl;
-    cout<<" -epm      [filename]     "<<" specify input estimated parameter file name"<<endl;
-    cout<<" -en [n1] [n2] [n3] [n4]  "<<" specify values for the input estimated parameter file (with a header)"<<endl;
-    cout<<"          options: n1: rs column number"<<endl;
-    cout<<"                   n2: estimated alpha column number (0 to ignore)"<<endl;
-    cout<<"                   n3: estimated beta column number (0 to ignore)"<<endl;
-    cout<<"                   n4: estimated gamma column number (0 to ignore)"<<endl;
-    cout<<"          default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl;
-    cout<<" -ebv      [filename]     "<<" specify input estimated random effect (breeding value) file name"<<endl;
-    cout<<"          format: value for individual 1"<<endl;
-    cout<<"                  value for individual 2"<<endl;
-    cout<<"                  ..."<<endl;
-    cout<<"          missing value: NA"<<endl;
-    cout<<" -emu      [filename]     "<<" specify input log file name containing estimated mean"<<endl;
-    cout<<" -mu       [num]          "<<" specify input estimated mean value"<<endl;
-    cout<<" -gene     [filename]     "<<" specify input gene expression file name"<<endl;
-    cout<<"          format: header"<<endl;
-    cout<<"                  gene1, count for individual 1, count for individual 2, ..."<<endl;
-    cout<<"                  gene2, count for individual 1, count for individual 2, ..."<<endl;
-    cout<<"                  ..."<<endl;
-    cout<<"          missing value: not allowed"<<endl;
-    cout<<" -r        [filename]     "<<" specify input total read count file name"<<endl;
-    cout<<"          format: total read count for individual 1"<<endl;
-    cout<<"                  total read count for individual 2"<<endl;
-    cout<<"                  ..."<<endl;
-    cout<<"          missing value: NA"<<endl;
-    cout<<" -snps     [filename]     "<<" specify input snps file name to only analyze a certain set of snps"<<endl;
-    cout<<"          format: rs#1"<<endl;
-    cout<<"                  rs#2"<<endl;
-    cout<<"                  ..."<<endl;
-    cout<<"          missing value: NA"<<endl;
-    cout<<" -silence                 "<<" silent terminal display"<<endl;
-    cout<<" -km       [num]          "<<" specify input kinship/relatedness file type (default 1)."<<endl;
-    cout<<"          options: 1: \"n by n matrix\" format"<<endl;
-    cout<<"                   2: \"id  id  value\" format"<<endl;
-    cout<<" -n        [num]          "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl;
-    cout<<" -pace     [num]          "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl;
-    cout<<" -outdir   [path]         "<<" specify output directory path (default \"./output/\")"<<endl;
-    cout<<" -o        [prefix]       "<<" specify output file prefix (default \"result\")"<<endl;
-    cout<<"          output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl;
-    cout<<"          output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl;
-    cout<<endl;
+    cout << " -oxford    [prefix]       "
+         << " specify input Oxford genotype bgen file prefix." << endl;
+    cout << "          requires: *.bgen, *.sample files" << endl;
+
+    cout << " -gxe      [filename]     "
+         << " specify input file that contains a column of environmental "
+            "factor for g by e tests"
+         << endl;
+    cout << "          format: variable for individual 1" << endl;
+    cout << "                  variable for individual 2" << endl;
+    cout << "                  ..." << endl;
+    cout << "          missing value: NA" << endl;
+    cout << " -widv   [filename]     "
+         << " specify input file that contains a column of residual weights"
+         << endl;
+    cout << "          format: variable for individual 1" << endl;
+    cout << "                  variable for individual 2" << endl;
+    cout << "                  ..." << endl;
+    cout << "          missing value: NA" << endl;
+    cout << " -k        [filename]     "
+         << " specify input kinship/relatedness matrix file name" << endl;
+    cout << " -mk       [filename]     "
+         << " specify input file which contains a list of kinship/relatedness "
+            "matrices"
+         << endl;
+    cout << " -u        [filename]     "
+         << " specify input file containing the eigen vectors of the "
+            "kinship/relatedness matrix"
+         << endl;
+    cout << " -d        [filename]     "
+         << " specify input file containing the eigen values of the "
+            "kinship/relatedness matrix"
+         << endl;
+    cout << " -c        [filename]     "
+         << " specify input covariates file name (optional)" << endl;
+    cout << " -cat      [filename]     "
+         << " specify input category file name (optional), which contains rs "
+            "cat1 cat2 ..."
+         << endl;
+    cout << " -beta     [filename]     "
+         << " specify input beta file name (optional), which contains rs beta "
+            "se_beta n_total (or n_mis and n_obs) estimates from a lm model"
+         << endl;
+    cout << " -cor      [filename]     "
+         << " specify input correlation file name (optional), which contains "
+            "rs window_size correlations from snps"
+         << endl;
+    cout << "          missing value: NA" << endl;
+    cout << "          note: the intercept (a column of 1s) may need to be "
+            "included"
+         << endl;
+    cout << " -epm      [filename]     "
+         << " specify input estimated parameter file name" << endl;
+    cout << " -en [n1] [n2] [n3] [n4]  "
+         << " specify values for the input estimated parameter file (with a "
+            "header)"
+         << endl;
+    cout << "          options: n1: rs column number" << endl;
+    cout << "                   n2: estimated alpha column number (0 to ignore)"
+         << endl;
+    cout << "                   n3: estimated beta column number (0 to ignore)"
+         << endl;
+    cout << "                   n4: estimated gamma column number (0 to ignore)"
+         << endl;
+    cout << "          default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if "
+            "-ebv is specified"
+         << endl;
+    cout << " -ebv      [filename]     "
+         << " specify input estimated random effect (breeding value) file name"
+         << endl;
+    cout << "          format: value for individual 1" << endl;
+    cout << "                  value for individual 2" << endl;
+    cout << "                  ..." << endl;
+    cout << "          missing value: NA" << endl;
+    cout << " -emu      [filename]     "
+         << " specify input log file name containing estimated mean" << endl;
+    cout << " -mu       [num]          "
+         << " specify input estimated mean value" << endl;
+    cout << " -gene     [filename]     "
+         << " specify input gene expression file name" << endl;
+    cout << "          format: header" << endl;
+    cout << "                  gene1, count for individual 1, count for "
+            "individual 2, ..."
+         << endl;
+    cout << "                  gene2, count for individual 1, count for "
+            "individual 2, ..."
+         << endl;
+    cout << "                  ..." << endl;
+    cout << "          missing value: not allowed" << endl;
+    cout << " -r        [filename]     "
+         << " specify input total read count file name" << endl;
+    cout << "          format: total read count for individual 1" << endl;
+    cout << "                  total read count for individual 2" << endl;
+    cout << "                  ..." << endl;
+    cout << "          missing value: NA" << endl;
+    cout
+        << " -snps     [filename]     "
+        << " specify input snps file name to only analyze a certain set of snps"
+        << endl;
+    cout << "          format: rs#1" << endl;
+    cout << "                  rs#2" << endl;
+    cout << "                  ..." << endl;
+    cout << "          missing value: NA" << endl;
+    cout << " -silence                 "
+         << " silent terminal display" << endl;
+    cout << " -km       [num]          "
+         << " specify input kinship/relatedness file type (default 1)." << endl;
+    cout << "          options: 1: \"n by n matrix\" format" << endl;
+    cout << "                   2: \"id  id  value\" format" << endl;
+    cout << " -n        [num]          "
+         << " specify phenotype column in the phenotype/*.fam file (optional; "
+            "default 1)"
+         << endl;
+    cout << " -pace     [num]          "
+         << " specify terminal display update pace (default 100000 SNPs or "
+            "100000 iterations)."
+         << endl;
+    cout << " -outdir   [path]         "
+         << " specify output directory path (default \"./output/\")" << endl;
+    cout << " -o        [prefix]       "
+         << " specify output file prefix (default \"result\")" << endl;
+    cout << "          output: prefix.cXX.txt or prefix.sXX.txt from "
+            "kinship/relatedness matrix estimation"
+         << endl;
+    cout << "          output: prefix.assoc.txt and prefix.log.txt form "
+            "association tests"
+         << endl;
+    cout << endl;
   }
 
-  if (option==3) {
-    cout<<" SNP QC OPTIONS" << endl;
-    cout<<" -miss     [num]          "<<" specify missingness threshold (default 0.05)" << endl;
-    cout<<" -maf      [num]          "<<" specify minor allele frequency threshold (default 0.01)" << endl;
-    cout<<" -hwe      [num]          "<<" specify HWE test p value threshold (default 0; no test)" << endl;
-    cout<<" -r2       [num]          "<<" specify r-squared threshold (default 0.9999)" << endl;
-    cout<<" -notsnp                  "<<" minor allele frequency cutoff is not used" << endl;
-    cout<<endl;
+  if (option == 3) {
+    cout << " SNP QC OPTIONS" << endl;
+    cout << " -miss     [num]          "
+         << " specify missingness threshold (default 0.05)" << endl;
+    cout << " -maf      [num]          "
+         << " specify minor allele frequency threshold (default 0.01)" << endl;
+    cout << " -hwe      [num]          "
+         << " specify HWE test p value threshold (default 0; no test)" << endl;
+    cout << " -r2       [num]          "
+         << " specify r-squared threshold (default 0.9999)" << endl;
+    cout << " -notsnp                  "
+         << " minor allele frequency cutoff is not used" << endl;
+    cout << endl;
   }
 
-  if (option==4) {
-    cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
-    cout<<" -gk       [num]          "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl;
-    cout<<"          options: 1: centered XX^T/p"<<endl;
-    cout<<"                   2: standardized XX^T/p"<<endl;
-    cout<<"          note: non-polymorphic SNPs are excluded "<<endl;
-    cout<<endl;
+  if (option == 4) {
+    cout << " RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
+    cout << " -gk       [num]          "
+         << " specify which type of kinship/relatedness matrix to generate "
+            "(default 1)"
+         << endl;
+    cout << "          options: 1: centered XX^T/p" << endl;
+    cout << "                   2: standardized XX^T/p" << endl;
+    cout << "          note: non-polymorphic SNPs are excluded " << endl;
+    cout << endl;
   }
 
-  if (option==5) {
-    cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl;
-    cout<<" -eigen                   "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl;
-    cout<<endl;
+  if (option == 5) {
+    cout << " EIGEN-DECOMPOSITION OPTIONS" << endl;
+    cout << " -eigen                   "
+         << " specify to perform eigen decomposition of the loaded relatedness "
+            "matrix"
+         << endl;
+    cout << endl;
   }
 
-  if (option==6) {
-    cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
-    cout<<" -vc                      "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl;
-    cout<<"          options (with kinship file):   1: HE regression (default)"<<endl;
-    cout<<"                                         2: REML"<<endl;
-    cout<<"          options (with beta/cor files): 1: Centered genotypes (default)"<<endl;
-    cout<<"                                         2: Standardized genotypes"<<endl;
-    cout<<"                                         -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl;
-    cout<<"                                         -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl;
-    cout<<"                                         -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl;
-    cout<<endl;
+  if (option == 6) {
+    cout << " VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
+    cout << " -vc                      "
+         << " specify to perform variance component estimation for the loaded "
+            "relatedness matrix/matrices"
+         << endl;
+    cout
+        << "          options (with kinship file):   1: HE regression (default)"
+        << endl;
+    cout << "                                         2: REML" << endl;
+    cout << "          options (with beta/cor files): 1: Centered genotypes "
+            "(default)"
+         << endl;
+    cout << "                                         2: Standardized genotypes"
+         << endl;
+    cout << "                                         -crt -windowbp [num]"
+         << " specify the window size based on bp (default 1000000; 1Mb)"
+         << endl;
+    cout << "                                         -crt -windowcm [num]"
+         << " specify the window size based on cm (default 0)" << endl;
+    cout << "                                         -crt -windowns [num]"
+         << " specify the window size based on number of snps (default 0)"
+         << endl;
+    cout << endl;
   }
 
-  if (option==7) {
-    cout<<" LINEAR MODEL OPTIONS" << endl;
-    cout<<" -lm       [num]         "<<" specify analysis options (default 1)."<<endl;
-    cout<<"          options: 1: Wald test"<<endl;
-    cout<<"                   2: Likelihood ratio test"<<endl;
-    cout<<"                   3: Score test"<<endl;
-    cout<<"                   4: 1-3"<<endl;
-    cout<<endl;
+  if (option == 7) {
+    cout << " LINEAR MODEL OPTIONS" << endl;
+    cout << " -lm       [num]         "
+         << " specify analysis options (default 1)." << endl;
+    cout << "          options: 1: Wald test" << endl;
+    cout << "                   2: Likelihood ratio test" << endl;
+    cout << "                   3: Score test" << endl;
+    cout << "                   4: 1-3" << endl;
+    cout << endl;
   }
 
-  if (option==8) {
-    cout<<" LINEAR MIXED MODEL OPTIONS" << endl;
-    cout<<" -lmm      [num]         "<<" specify analysis options (default 1)."<<endl;
-    cout<<"          options: 1: Wald test"<<endl;
-    cout<<"                   2: Likelihood ratio test"<<endl;
-    cout<<"                   3: Score test"<<endl;
-    cout<<"                   4: 1-3"<<endl;
-    cout<<"                   5: Parameter estimation in the null model only"<<endl;
-    cout<<" -lmin     [num]          "<<" specify minimal value for lambda (default 1e-5)" << endl;
-    cout<<" -lmax     [num]          "<<" specify maximum value for lambda (default 1e+5)" << endl;
-    cout<<" -region   [num]          "<<" specify the number of regions used to evaluate lambda (default 10)" << endl;
-    cout<<endl;
+  if (option == 8) {
+    cout << " LINEAR MIXED MODEL OPTIONS" << endl;
+    cout << " -lmm      [num]         "
+         << " specify analysis options (default 1)." << endl;
+    cout << "          options: 1: Wald test" << endl;
+    cout << "                   2: Likelihood ratio test" << endl;
+    cout << "                   3: Score test" << endl;
+    cout << "                   4: 1-3" << endl;
+    cout << "                   5: Parameter estimation in the null model only"
+         << endl;
+    cout << " -lmin     [num]          "
+         << " specify minimal value for lambda (default 1e-5)" << endl;
+    cout << " -lmax     [num]          "
+         << " specify maximum value for lambda (default 1e+5)" << endl;
+    cout
+        << " -region   [num]          "
+        << " specify the number of regions used to evaluate lambda (default 10)"
+        << endl;
+    cout << endl;
   }
 
-  if (option==9) {
-    cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
-    cout<<" -pnr				     "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl;
-    cout<<" -emi				     "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl;
-    cout<<" -nri				     "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl;
-    cout<<" -emp				     "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl;
-    cout<<" -nrp				     "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl;
-    cout<<" -crt				     "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl;
-    cout<<endl;
+  if (option == 9) {
+    cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
+    cout << " -pnr				     "
+         << " specify the pvalue threshold to use the Newton-Raphson's method "
+            "(default 0.001)"
+         << endl;
+    cout << " -emi				     "
+         << " specify the maximum number of iterations for the PX-EM method in "
+            "the null (default 10000)"
+         << endl;
+    cout << " -nri				     "
+         << " specify the maximum number of iterations for the "
+            "Newton-Raphson's method in the null (default 100)"
+         << endl;
+    cout << " -emp				     "
+         << " specify the precision for the PX-EM method in the null (default "
+            "0.0001)"
+         << endl;
+    cout << " -nrp				     "
+         << " specify the precision for the Newton-Raphson's method in the "
+            "null (default 0.0001)"
+         << endl;
+    cout << " -crt				     "
+         << " specify to output corrected pvalues for these pvalues that are "
+            "below the -pnr threshold"
+         << endl;
+    cout << endl;
   }
 
-  if (option==10) {
-    cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl;
-    cout<<" -bslmm	  [num]			 "<<" specify analysis options (default 1)."<<endl;
-    cout<<"          options: 1: BSLMM"<<endl;
-    cout<<"                   2: standard ridge regression/GBLUP (no mcmc)"<<endl;
-    cout<<"                   3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
-    cout<<"                   4: BSLMM with DAP for Hyper Parameter Estimation"<<endl;
-    cout<<"                   5: BSLMM with DAP for Fine Mapping"<<endl;
-
-    cout<<" -ldr	  [num]			 "<<" specify analysis options (default 1)."<<endl;
-    cout<<"          options: 1: LDR"<<endl;
-
-    cout<<"   MCMC OPTIONS" << endl;
-    cout<<"   Prior" << endl;
-    cout<<" -hmin     [num]          "<<" specify minimum value for h (default 0)" << endl;
-    cout<<" -hmax     [num]          "<<" specify maximum value for h (default 1)" << endl;
-    cout<<" -rmin     [num]          "<<" specify minimum value for rho (default 0)" << endl;
-    cout<<" -rmax     [num]          "<<" specify maximum value for rho (default 1)" << endl;
-    cout<<" -pmin     [num]          "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl;
-    cout<<" -pmax     [num]          "<<" specify maximum value for log10(pi) (default log10(1) )" << endl;
-    cout<<" -smin     [num]          "<<" specify minimum value for |gamma| (default 0)" << endl;
-    cout<<" -smax     [num]          "<<" specify maximum value for |gamma| (default 300)" << endl;
-
-    cout<<"   Proposal" << endl;
-    cout<<" -gmean    [num]          "<<" specify the mean for the geometric distribution (default: 2000)" << endl;
-    cout<<" -hscale   [num]          "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
-    cout<<" -rscale   [num]          "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
-    cout<<" -pscale   [num]          "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl;
-
-    cout<<"   Others" << endl;
-    cout<<" -w        [num]          "<<" specify burn-in steps (default 100,000)" << endl;
-    cout<<" -s        [num]          "<<" specify sampling steps (default 1,000,000)" << endl;
-    cout<<" -rpace    [num]          "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl;
-    cout<<" -wpace    [num]          "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl;
-    cout<<" -seed     [num]          "<<" specify random seed (a random seed is generated by default)" << endl;
-    cout<<" -mh       [num]          "<<" specify number of MH steps in each iteration (default 10)" << endl;
-    cout<<"          requires: 0/1 phenotypes and -bslmm 3 option"<<endl;
-    cout<<endl;
+  if (option == 10) {
+    cout << " MULTI-LOCUS ANALYSIS OPTIONS" << endl;
+    cout << " -bslmm	  [num]			 "
+         << " specify analysis options (default 1)." << endl;
+    cout << "          options: 1: BSLMM" << endl;
+    cout << "                   2: standard ridge regression/GBLUP (no mcmc)"
+         << endl;
+    cout << "                   3: probit BSLMM (requires 0/1 phenotypes)"
+         << endl;
+    cout
+        << "                   4: BSLMM with DAP for Hyper Parameter Estimation"
+        << endl;
+    cout << "                   5: BSLMM with DAP for Fine Mapping" << endl;
+
+    cout << " -ldr	  [num]			 "
+         << " specify analysis options (default 1)." << endl;
+    cout << "          options: 1: LDR" << endl;
+
+    cout << "   MCMC OPTIONS" << endl;
+    cout << "   Prior" << endl;
+    cout << " -hmin     [num]          "
+         << " specify minimum value for h (default 0)" << endl;
+    cout << " -hmax     [num]          "
+         << " specify maximum value for h (default 1)" << endl;
+    cout << " -rmin     [num]          "
+         << " specify minimum value for rho (default 0)" << endl;
+    cout << " -rmax     [num]          "
+         << " specify maximum value for rho (default 1)" << endl;
+    cout << " -pmin     [num]          "
+         << " specify minimum value for log10(pi) (default log10(1/p), where p "
+            "is the number of analyzed SNPs )"
+         << endl;
+    cout << " -pmax     [num]          "
+         << " specify maximum value for log10(pi) (default log10(1) )" << endl;
+    cout << " -smin     [num]          "
+         << " specify minimum value for |gamma| (default 0)" << endl;
+    cout << " -smax     [num]          "
+         << " specify maximum value for |gamma| (default 300)" << endl;
+
+    cout << "   Proposal" << endl;
+    cout << " -gmean    [num]          "
+         << " specify the mean for the geometric distribution (default: 2000)"
+         << endl;
+    cout << " -hscale   [num]          "
+         << " specify the step size scale for the proposal distribution of h "
+            "(value between 0 and 1, default min(10/sqrt(n),1) )"
+         << endl;
+    cout << " -rscale   [num]          "
+         << " specify the step size scale for the proposal distribution of rho "
+            "(value between 0 and 1, default min(10/sqrt(n),1) )"
+         << endl;
+    cout << " -pscale   [num]          "
+         << " specify the step size scale for the proposal distribution of "
+            "log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )"
+         << endl;
+
+    cout << "   Others" << endl;
+    cout << " -w        [num]          "
+         << " specify burn-in steps (default 100,000)" << endl;
+    cout << " -s        [num]          "
+         << " specify sampling steps (default 1,000,000)" << endl;
+    cout << " -rpace    [num]          "
+         << " specify recording pace, record one state in every [num] steps "
+            "(default 10)"
+         << endl;
+    cout << " -wpace    [num]          "
+         << " specify writing pace, write values down in every [num] recorded "
+            "steps (default 1000)"
+         << endl;
+    cout << " -seed     [num]          "
+         << " specify random seed (a random seed is generated by default)"
+         << endl;
+    cout << " -mh       [num]          "
+         << " specify number of MH steps in each iteration (default 10)"
+         << endl;
+    cout << "          requires: 0/1 phenotypes and -bslmm 3 option" << endl;
+    cout << endl;
   }
 
-  if (option==11) {
-    cout<<" PREDICTION OPTIONS" << endl;
-    cout<<" -predict  [num]			 "<<" specify prediction options (default 1)."<<endl;
-    cout<<"          options: 1: predict for individuals with missing phenotypes"<<endl;
-    cout<<"                   2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl;
-    cout<<endl;
+  if (option == 11) {
+    cout << " PREDICTION OPTIONS" << endl;
+    cout << " -predict  [num]			 "
+         << " specify prediction options (default 1)." << endl;
+    cout << "          options: 1: predict for individuals with missing "
+            "phenotypes"
+         << endl;
+    cout << "                   2: predict for individuals with missing "
+            "phenotypes, and convert the predicted values to probability "
+            "scale. Use only for files fitted with -bslmm 3 option"
+         << endl;
+    cout << endl;
   }
 
-  if (option==12) {
-    cout<<" CALC CORRELATION OPTIONS" << endl;
-    cout<<" -calccor       			 "<<endl;
-    cout<<" -windowbp       [num]            "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl;
-    cout<<" -windowcm       [num]            "<<" specify the window size based on cm (default 0; not used)" << endl;
-    cout<<" -windowns       [num]            "<<" specify the window size based on number of snps (default 0; not used)" << endl;
-    cout<<endl;
+  if (option == 12) {
+    cout << " CALC CORRELATION OPTIONS" << endl;
+    cout << " -calccor       			 " << endl;
+    cout << " -windowbp       [num]            "
+         << " specify the window size based on bp (default 1000000; 1Mb)"
+         << endl;
+    cout << " -windowcm       [num]            "
+         << " specify the window size based on cm (default 0; not used)"
+         << endl;
+    cout << " -windowns       [num]            "
+         << " specify the window size based on number of snps (default 0; not "
+            "used)"
+         << endl;
+    cout << endl;
   }
 
-  if (option==13) {
-    cout<<" NOTE"<<endl;
-    cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl;
-    cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl;
-    cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl;
-    cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl;
-    cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl;
-    cout<<endl;
+  if (option == 13) {
+    cout << " NOTE" << endl;
+    cout << " 1. Only individuals with non-missing phenotoypes and covariates "
+            "will be analyzed."
+         << endl;
+    cout << " 2. Missing genotoypes will be repalced with the mean genotype of "
+            "that SNP."
+         << endl;
+    cout << " 3. For lmm analysis, memory should be large enough to hold the "
+            "relatedness matrix and to perform eigen decomposition."
+         << endl;
+    cout << " 4. For multivariate lmm analysis, use a large -pnr for each snp "
+            "will increase computation time dramatically."
+         << endl;
+    cout << " 5. For bslmm analysis, in addition to 3, memory should be large "
+            "enough to hold the whole genotype matrix."
+         << endl;
+    cout << endl;
   }
 
   return;
@@ -457,2609 +700,2985 @@ void GEMMA::PrintHelp(size_t option) {
 // calccor: 71
 // gw:      72
 
-void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) {
-	string str;
-
-	for(int i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-bfile")==0 ||
-		    strcmp(argv[i], "--bfile")==0 ||
-		    strcmp(argv[i], "-b")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_bfile=str;
-		}
-		else if (strcmp(argv[i], "-mbfile")==0 ||
-			 strcmp(argv[i], "--mbfile")==0 ||
-			 strcmp(argv[i], "-mb")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_mbfile=str;
-		}
-		else if (strcmp(argv[i], "-silence")==0) {
-			cPar.mode_silence=true;
-		}
-		else if (strcmp(argv[i], "-g")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_geno=str;
-		}
-		else if (strcmp(argv[i], "-mg")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_mgeno=str;
-		}
-		else if (strcmp(argv[i], "-p")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_pheno=str;
-		}
-		else if (strcmp(argv[i], "-a")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_anno=str;
-		}
-
-		// WJA added.
-		else if (strcmp(argv[i], "-oxford")==0 ||
-			 strcmp(argv[i], "--oxford")==0 ||
-			 strcmp(argv[i], "-x")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_oxford=str;
-		}
-		else if (strcmp(argv[i], "-gxe")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_gxe=str;
-		}
-		else if (strcmp(argv[i], "-widv")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_weight=str;
-		}
-		else if (strcmp(argv[i], "-wsnp")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_wsnp=str;
-		}
-		else if (strcmp(argv[i], "-wcat")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_wcat=str;
-		}
-		else if (strcmp(argv[i], "-k")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_kin=str;
-		}
-		else if (strcmp(argv[i], "-mk")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_mk=str;
-		}
-		else if (strcmp(argv[i], "-u")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_ku=str;
-		}
-		else if (strcmp(argv[i], "-d")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_kd=str;
-		}
-		else if (strcmp(argv[i], "-c")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_cvt=str;
-		}
-		else if (strcmp(argv[i], "-cat")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_cat=str;
-		}
-		else if (strcmp(argv[i], "-mcat")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {
-			  continue;
-			}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_mcat=str;
-		}
-		else if (strcmp(argv[i], "-catc")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_catc=str;
-		}
-		else if (strcmp(argv[i], "-mcatc")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_mcatc=str;
-		}
-		else if (strcmp(argv[i], "-beta")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_beta=str;
-		}
-		else if (strcmp(argv[i], "-bf")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_bf=str;
-		}
-		else if (strcmp(argv[i], "-hyp")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_hyp=str;
-		}
-		else if (strcmp(argv[i], "-cor")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_cor=str;
-		}
-		else if (strcmp(argv[i], "-study")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_study=str;
-		}
-		else if (strcmp(argv[i], "-ref")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_ref=str;
-		}
-		else if (strcmp(argv[i], "-mstudy")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_mstudy=str;
-		}
-		else if (strcmp(argv[i], "-mref")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_mref=str;
-		}
-		else if (strcmp(argv[i], "-epm")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_epm=str;
-		}
-		else if (strcmp(argv[i], "-en")==0) {
-			while (argv[i+1] != NULL && argv[i+1][0] != '-') {
-				++i;
-				str.clear();
-				str.assign(argv[i]);
-				cPar.est_column.push_back(atoi(str.c_str()));
-			}
-		}
-		else if (strcmp(argv[i], "-ebv")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_ebv=str;
-		}
-		else if (strcmp(argv[i], "-emu")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_log=str;
-		}
-		else if (strcmp(argv[i], "-mu")==0) {
-			if(argv[i+1] == NULL) {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.pheno_mean=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-gene")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_gene=str;
-		}
-		else if (strcmp(argv[i], "-r")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_read=str;
-		}
-		else if (strcmp(argv[i], "-snps")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_snps=str;
-		}
-		else if (strcmp(argv[i], "-km")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.k_mode=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-n")==0) {
-			(cPar.p_column).clear();
-			while (argv[i+1] != NULL && argv[i+1][0] != '-') {
-				++i;
-				str.clear();
-				str.assign(argv[i]);
-				(cPar.p_column).push_back(atoi(str.c_str()));
-			}
-		}
-		else if (strcmp(argv[i], "-pace")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.d_pace=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-outdir")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.path_out=str;
-		}
-		else if (strcmp(argv[i], "-o")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.file_out=str;
-		}
-		else if (strcmp(argv[i], "-miss")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.miss_level=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-maf")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			if (cPar.maf_level!=-1) {cPar.maf_level=atof(str.c_str());}
-		}
-		else if (strcmp(argv[i], "-hwe")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.hwe_level=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-r2")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.r2_level=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-notsnp")==0) {
-			cPar.maf_level=-1;
-		}
-		else if (strcmp(argv[i], "-gk")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=21; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=20+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-gs")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=25; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=24+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-gq")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=27; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=26+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-gw")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=72; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=71+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-sample")==0) {
-		  if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.ni_subsample=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-eigen")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=31; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=30+atoi(str.c_str());
-		}
-        else if (strcmp(argv[i], "-calccor")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=71; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=70+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-vc")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=61; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=60+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-ci")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=66; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=65+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-pve")==0) {
-		  double s=0;
-		  while (argv[i+1] != NULL && (argv[i+1][0] != '-' || !isalpha(argv[i+1][1]) ) ) {
-			  ++i;
-			  str.clear();
-			  str.assign(argv[i]);
-			  cPar.v_pve.push_back(atof(str.c_str()));
-			  s+=atof(str.c_str());
-			}
-			if (s==1) {
-			  cout<<"summation of pve equals one."<<endl;
-			}
-		}
-		else if (strcmp(argv[i], "-blocks")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.n_block=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-noconstrain")==0) {
-			cPar.noconstrain=true;
-		}
-		else if (strcmp(argv[i], "-lm")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=51; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=50+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-fa")==0 || strcmp(argv[i], "-lmm")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=1; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-lmin")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.l_min=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-lmax")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.l_max=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-region")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.n_region=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-pnr")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.p_nr=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-emi")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.em_iter=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-nri")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.nr_iter=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-emp")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.em_prec=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-nrp")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.nr_prec=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-crt")==0) {
-			cPar.crt=1;
-		}
-		else if (strcmp(argv[i], "-bslmm")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=11; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=10+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-hmin")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.h_min=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-hmax")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.h_max=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-rmin")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.rho_min=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-rmax")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.rho_max=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-pmin")==0) {
-			if(argv[i+1] == NULL) {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.logp_min=atof(str.c_str())*log(10.0);
-		}
-		else if (strcmp(argv[i], "-pmax")==0) {
-			if(argv[i+1] == NULL) {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.logp_max=atof(str.c_str())*log(10.0);
-		}
-		else if (strcmp(argv[i], "-smin")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.s_min=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-smax")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.s_max=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-gmean")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.geo_mean=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-hscale")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.h_scale=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-rscale")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.rho_scale=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-pscale")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.logp_scale=atof(str.c_str())*log(10.0);
-		}
-		else if (strcmp(argv[i], "-w")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.w_step=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-s")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.s_step=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-rpace")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.r_pace=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-wpace")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.w_pace=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-seed")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.randseed=atol(str.c_str());
-		}
-		else if (strcmp(argv[i], "-mh")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.n_mh=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-predict")==0) {
-			if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=41; continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.a_mode=40+atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-windowcm")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.window_cm=atof(str.c_str());
-		}
-		else if (strcmp(argv[i], "-windowbp")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.window_bp=atoi(str.c_str());
-		}
-		else if (strcmp(argv[i], "-windowns")==0) {
-			if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
-			++i;
-			str.clear();
-			str.assign(argv[i]);
-			cPar.window_ns=atoi(str.c_str());
-		}
-		else {cout<<"error! unrecognized option: "<<argv[i]<<endl; cPar.error=true; continue;}
-	}
-
-	// Change prediction mode to 43 if the epm file is not provided.
-	if (cPar.a_mode==41 && cPar.file_epm.empty()) {
-	  cPar.a_mode=43;
-	}
-
-	return;
+void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
+  string str;
+
+  for (int i = 1; i < argc; i++) {
+    if (strcmp(argv[i], "-bfile") == 0 || strcmp(argv[i], "--bfile") == 0 ||
+        strcmp(argv[i], "-b") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_bfile = str;
+    } else if (strcmp(argv[i], "-mbfile") == 0 ||
+               strcmp(argv[i], "--mbfile") == 0 ||
+               strcmp(argv[i], "-mb") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_mbfile = str;
+    } else if (strcmp(argv[i], "-silence") == 0) {
+      cPar.mode_silence = true;
+    } else if (strcmp(argv[i], "-g") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_geno = str;
+    } else if (strcmp(argv[i], "-mg") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_mgeno = str;
+    } else if (strcmp(argv[i], "-p") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_pheno = str;
+    } else if (strcmp(argv[i], "-a") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_anno = str;
+    }
+
+    // WJA added.
+    else if (strcmp(argv[i], "-oxford") == 0 ||
+             strcmp(argv[i], "--oxford") == 0 || strcmp(argv[i], "-x") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_oxford = str;
+    } else if (strcmp(argv[i], "-gxe") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_gxe = str;
+    } else if (strcmp(argv[i], "-widv") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_weight = str;
+    } else if (strcmp(argv[i], "-wsnp") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_wsnp = str;
+    } else if (strcmp(argv[i], "-wcat") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_wcat = str;
+    } else if (strcmp(argv[i], "-k") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_kin = str;
+    } else if (strcmp(argv[i], "-mk") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_mk = str;
+    } else if (strcmp(argv[i], "-u") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_ku = str;
+    } else if (strcmp(argv[i], "-d") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_kd = str;
+    } else if (strcmp(argv[i], "-c") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_cvt = str;
+    } else if (strcmp(argv[i], "-cat") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_cat = str;
+    } else if (strcmp(argv[i], "-mcat") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_mcat = str;
+    } else if (strcmp(argv[i], "-catc") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_catc = str;
+    } else if (strcmp(argv[i], "-mcatc") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_mcatc = str;
+    } else if (strcmp(argv[i], "-beta") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_beta = str;
+    } else if (strcmp(argv[i], "-bf") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_bf = str;
+    } else if (strcmp(argv[i], "-hyp") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_hyp = str;
+    } else if (strcmp(argv[i], "-cor") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_cor = str;
+    } else if (strcmp(argv[i], "-study") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_study = str;
+    } else if (strcmp(argv[i], "-ref") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_ref = str;
+    } else if (strcmp(argv[i], "-mstudy") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_mstudy = str;
+    } else if (strcmp(argv[i], "-mref") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_mref = str;
+    } else if (strcmp(argv[i], "-epm") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_epm = str;
+    } else if (strcmp(argv[i], "-en") == 0) {
+      while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
+        ++i;
+        str.clear();
+        str.assign(argv[i]);
+        cPar.est_column.push_back(atoi(str.c_str()));
+      }
+    } else if (strcmp(argv[i], "-ebv") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_ebv = str;
+    } else if (strcmp(argv[i], "-emu") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_log = str;
+    } else if (strcmp(argv[i], "-mu") == 0) {
+      if (argv[i + 1] == NULL) {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.pheno_mean = atof(str.c_str());
+    } else if (strcmp(argv[i], "-gene") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_gene = str;
+    } else if (strcmp(argv[i], "-r") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_read = str;
+    } else if (strcmp(argv[i], "-snps") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_snps = str;
+    } else if (strcmp(argv[i], "-km") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.k_mode = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-n") == 0) {
+      (cPar.p_column).clear();
+      while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
+        ++i;
+        str.clear();
+        str.assign(argv[i]);
+        (cPar.p_column).push_back(atoi(str.c_str()));
+      }
+    } else if (strcmp(argv[i], "-pace") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.d_pace = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-outdir") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.path_out = str;
+    } else if (strcmp(argv[i], "-o") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.file_out = str;
+    } else if (strcmp(argv[i], "-miss") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.miss_level = atof(str.c_str());
+    } else if (strcmp(argv[i], "-maf") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      if (cPar.maf_level != -1) {
+        cPar.maf_level = atof(str.c_str());
+      }
+    } else if (strcmp(argv[i], "-hwe") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.hwe_level = atof(str.c_str());
+    } else if (strcmp(argv[i], "-r2") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.r2_level = atof(str.c_str());
+    } else if (strcmp(argv[i], "-notsnp") == 0) {
+      cPar.maf_level = -1;
+    } else if (strcmp(argv[i], "-gk") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 21;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 20 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-gs") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 25;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 24 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-gq") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 27;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 26 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-gw") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 72;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 71 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-sample") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.ni_subsample = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-eigen") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 31;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 30 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-calccor") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 71;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 70 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-vc") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 61;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 60 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-ci") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 66;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 65 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-pve") == 0) {
+      double s = 0;
+      while (argv[i + 1] != NULL &&
+             (argv[i + 1][0] != '-' || !isalpha(argv[i + 1][1]))) {
+        ++i;
+        str.clear();
+        str.assign(argv[i]);
+        cPar.v_pve.push_back(atof(str.c_str()));
+        s += atof(str.c_str());
+      }
+      if (s == 1) {
+        cout << "summation of pve equals one." << endl;
+      }
+    } else if (strcmp(argv[i], "-blocks") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.n_block = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-noconstrain") == 0) {
+      cPar.noconstrain = true;
+    } else if (strcmp(argv[i], "-lm") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 51;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 50 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-fa") == 0 || strcmp(argv[i], "-lmm") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 1;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-lmin") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.l_min = atof(str.c_str());
+    } else if (strcmp(argv[i], "-lmax") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.l_max = atof(str.c_str());
+    } else if (strcmp(argv[i], "-region") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.n_region = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-pnr") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.p_nr = atof(str.c_str());
+    } else if (strcmp(argv[i], "-emi") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.em_iter = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-nri") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.nr_iter = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-emp") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.em_prec = atof(str.c_str());
+    } else if (strcmp(argv[i], "-nrp") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.nr_prec = atof(str.c_str());
+    } else if (strcmp(argv[i], "-crt") == 0) {
+      cPar.crt = 1;
+    } else if (strcmp(argv[i], "-bslmm") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 11;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 10 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-hmin") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.h_min = atof(str.c_str());
+    } else if (strcmp(argv[i], "-hmax") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.h_max = atof(str.c_str());
+    } else if (strcmp(argv[i], "-rmin") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.rho_min = atof(str.c_str());
+    } else if (strcmp(argv[i], "-rmax") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.rho_max = atof(str.c_str());
+    } else if (strcmp(argv[i], "-pmin") == 0) {
+      if (argv[i + 1] == NULL) {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.logp_min = atof(str.c_str()) * log(10.0);
+    } else if (strcmp(argv[i], "-pmax") == 0) {
+      if (argv[i + 1] == NULL) {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.logp_max = atof(str.c_str()) * log(10.0);
+    } else if (strcmp(argv[i], "-smin") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.s_min = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-smax") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.s_max = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-gmean") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.geo_mean = atof(str.c_str());
+    } else if (strcmp(argv[i], "-hscale") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.h_scale = atof(str.c_str());
+    } else if (strcmp(argv[i], "-rscale") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.rho_scale = atof(str.c_str());
+    } else if (strcmp(argv[i], "-pscale") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.logp_scale = atof(str.c_str()) * log(10.0);
+    } else if (strcmp(argv[i], "-w") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.w_step = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-s") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.s_step = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-rpace") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.r_pace = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-wpace") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.w_pace = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-seed") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.randseed = atol(str.c_str());
+    } else if (strcmp(argv[i], "-mh") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.n_mh = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-predict") == 0) {
+      if (cPar.a_mode != 0) {
+        cPar.error = true;
+        cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+                "-predict -calccor options is allowed."
+             << endl;
+        break;
+      }
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        cPar.a_mode = 41;
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.a_mode = 40 + atoi(str.c_str());
+    } else if (strcmp(argv[i], "-windowcm") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.window_cm = atof(str.c_str());
+    } else if (strcmp(argv[i], "-windowbp") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.window_bp = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-windowns") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.window_ns = atoi(str.c_str());
+    } else {
+      cout << "error! unrecognized option: " << argv[i] << endl;
+      cPar.error = true;
+      continue;
+    }
+  }
+
+  // Change prediction mode to 43 if the epm file is not provided.
+  if (cPar.a_mode == 41 && cPar.file_epm.empty()) {
+    cPar.a_mode = 43;
+  }
+
+  return;
 }
 
-void GEMMA::BatchRun (PARAM &cPar) {
-	clock_t time_begin, time_start;
-	time_begin=clock();
-
-	// Read Files.
-	cout<<"Reading Files ... "<<endl;
-	cPar.ReadFiles();
-	if (cPar.error==true) {cout<<"error! fail to read files. "<<endl; return;}
-	cPar.CheckData();
-	if (cPar.error==true) {cout<<"error! fail to check data. "<<endl; return;}
-
-	//Prediction for bslmm
-	if (cPar.a_mode==41 || cPar.a_mode==42) {
-		gsl_vector *y_prdt;
-
-		y_prdt=gsl_vector_alloc (cPar.ni_total-cPar.ni_test);
-
-		//set to zero
-		gsl_vector_set_zero (y_prdt);
-
-		PRDT cPRDT;
-		cPRDT.CopyFromParam(cPar);
-
-		//add breeding value if needed
-		if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
-			cout<<"Adding Breeding Values ... "<<endl;
-
-			gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total);
-			gsl_vector *u_hat=gsl_vector_alloc (cPar.ni_test);
-
-			//read kinship matrix and set u_hat
-			vector<int> indicator_all;
-			size_t c_bv=0;
-			for (size_t i=0; i<cPar.indicator_idv.size(); i++) {
-				indicator_all.push_back(1);
-				if (cPar.indicator_bv[i]==1) {gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]); c_bv++;}
-			}
-
-			ReadFile_kin (cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode, cPar.error, G);
-			if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
-			//read u
-			cPRDT.AddBV(G, u_hat, y_prdt);
-
-			gsl_matrix_free(G);
-			gsl_vector_free(u_hat);
-		}
-
-		//add beta
-		if (!cPar.file_bfile.empty()) {
-			cPRDT.AnalyzePlink (y_prdt);
-		}
-		else {
-			cPRDT.AnalyzeBimbam (y_prdt);
-		}
-
-		//add mu
-		gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
-
-		//convert y to probability if needed
-		if (cPar.a_mode==42) {
-			double d;
-			for (size_t i=0; i<y_prdt->size; i++) {
-				d=gsl_vector_get(y_prdt, i);
-				d=gsl_cdf_gaussian_P(d, 1.0);
-				gsl_vector_set(y_prdt, i, d);
-			}
-		}
-
-
-		cPRDT.CopyToParam(cPar);
-
-		cPRDT.WriteFiles(y_prdt);
-
-		gsl_vector_free(y_prdt);
-	}
-
-	//Prediction with kinship matrix only; for one or more phenotypes
-	if (cPar.a_mode==43) {
-		//first, use individuals with full phenotypes to obtain estimates of Vg and Ve
-		gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
-		gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
-		gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
-		gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
-		gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
-		gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
-		gsl_vector *eval=gsl_vector_alloc (Y->size1);
-
-		gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph);
-		gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt);
-
-		//set covariates matrix W and phenotype matrix Y
-		//an intercept should be included in W,
-		cPar.CopyCvtPhen (W, Y, 0);
-		cPar.CopyCvtPhen (W_full, Y_full, 1);
-
-		gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph);
-		gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1);
-		gsl_matrix *H_full=gsl_matrix_alloc (Y_full->size1*Y_hat->size2, Y_full->size1*Y_hat->size2);
-
-		//read relatedness matrix G, and matrix G_full
-		ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
-		if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-		ReadFile_kin (cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, cPar.error, G_full);
-		if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
-		//center matrix G
-		CenterMatrix (G);
-		CenterMatrix (G_full);
-
-		//eigen-decomposition and calculate trace_G
-		cout<<"Start Eigen-Decomposition..."<<endl;
-		time_start=clock();
-		cPar.trace_G=EigenDecomp (G, U, eval, 0);
-		cPar.trace_G=0.0;
-		for (size_t i=0; i<eval->size; i++) {
-			if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
-			cPar.trace_G+=gsl_vector_get (eval, i);
-		}
-		cPar.trace_G/=(double)eval->size;
-		cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		//calculate UtW and Uty
-		CalcUtX (U, W, UtW);
-		CalcUtX (U, Y, UtY);
-
-		//calculate variance component and beta estimates
-		//and then obtain predicted values
-		if (cPar.n_ph==1) {
-			gsl_vector *beta=gsl_vector_alloc (W->size2);
-			gsl_vector *se_beta=gsl_vector_alloc (W->size2);
-
-			double lambda, logl, vg, ve;
-			gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
-			//obtain estimates
-			CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, lambda, logl);
-			CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, lambda, vg, ve, beta, se_beta);
-
-			cout<<"REMLE estimate for vg in the null model = "<<vg<<endl;
-			cout<<"REMLE estimate for ve in the null model = "<<ve<<endl;
-			cPar.vg_remle_null=vg; cPar.ve_remle_null=ve;
-
-			//obtain Y_hat from fixed effects
-			gsl_vector_view Yhat_col=gsl_matrix_column (Y_hat, 0);
-			gsl_blas_dgemv (CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
-
-			//obtain H
-			gsl_matrix_set_identity (H_full);
-			gsl_matrix_scale (H_full, ve);
-			gsl_matrix_scale (G_full, vg);
-			gsl_matrix_add (H_full, G_full);
-
-			//free matrices
-			gsl_vector_free(beta);
-			gsl_vector_free(se_beta);
-		} else {
-			gsl_matrix *Vg=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph);
-			gsl_matrix *Ve=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph);
-			gsl_matrix *B=gsl_matrix_alloc (cPar.n_ph, W->size2);
-			gsl_matrix *se_B=gsl_matrix_alloc (cPar.n_ph, W->size2);
-
-			//obtain estimates
-			CalcMvLmmVgVeBeta (eval, UtW, UtY, cPar.em_iter, cPar.nr_iter, cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max, cPar.n_region, Vg, Ve, B, se_B);
-
-			cout<<"REMLE estimate for Vg in the null model: "<<endl;
-			for (size_t i=0; i<Vg->size1; i++) {
-				for (size_t j=0; j<=i; j++) {
-					cout<<gsl_matrix_get(Vg, i, j)<<"\t";
-				}
-				cout<<endl;
-			}
-			cout<<"REMLE estimate for Ve in the null model: "<<endl;
-			for (size_t i=0; i<Ve->size1; i++) {
-				for (size_t j=0; j<=i; j++) {
-					cout<<gsl_matrix_get(Ve, i, j)<<"\t";
-				}
-				cout<<endl;
-			}
-			cPar.Vg_remle_null.clear();
-			cPar.Ve_remle_null.clear();
-			for (size_t i=0; i<Vg->size1; i++) {
-				for (size_t j=i; j<Vg->size2; j++) {
-					cPar.Vg_remle_null.push_back(gsl_matrix_get (Vg, i, j) );
-					cPar.Ve_remle_null.push_back(gsl_matrix_get (Ve, i, j) );
-				}
-			}
-
-			//obtain Y_hat from fixed effects
-			gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
-
-			//obtain H
-			KroneckerSym(G_full, Vg, H_full);
-			for (size_t i=0; i<G_full->size1; i++) {
-				gsl_matrix_view H_sub=gsl_matrix_submatrix (H_full, i*Ve->size1, i*Ve->size2, Ve->size1, Ve->size2);
-				gsl_matrix_add (&H_sub.matrix, Ve);
-			}
-
-			//free matrices
-			gsl_matrix_free (Vg);
-			gsl_matrix_free (Ve);
-			gsl_matrix_free (B);
-			gsl_matrix_free (se_B);
-		}
-
-		PRDT cPRDT;
-
-		cPRDT.CopyFromParam(cPar);
-
-		cout<<"Predicting Missing Phentypes ... "<<endl;
-		time_start=clock();
-		cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
-		cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		cPRDT.WriteFiles(Y_full);
-
-		gsl_matrix_free(Y);
-		gsl_matrix_free(W);
-		gsl_matrix_free(G);
-		gsl_matrix_free(U);
-		gsl_matrix_free(UtW);
-		gsl_matrix_free(UtY);
-		gsl_vector_free(eval);
-
-		gsl_matrix_free(Y_full);
-		gsl_matrix_free(Y_hat);
-		gsl_matrix_free(W_full);
-		gsl_matrix_free(G_full);
-		gsl_matrix_free(H_full);
-	}
-
-
-	//Generate Kinship matrix
-	if (cPar.a_mode==21 || cPar.a_mode==22) {
-		cout<<"Calculating Relatedness Matrix ... "<<endl;
-
-		gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total);
-
-		time_start=clock();
-		cPar.CalcKin (G);
-		cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		if (cPar.error==true) {cout<<"error! fail to calculate relatedness matrix. "<<endl; return;}
-
-		if (cPar.a_mode==21) {
-			cPar.WriteMatrix (G, "cXX");
-		} else {
-			cPar.WriteMatrix (G, "sXX");
-		}
-
-		gsl_matrix_free (G);
-	}
-
-	//Compute the LDSC weights (not implemented yet)
-	if (cPar.a_mode==72) {
-		cout<<"Calculating Weights ... "<<endl;
-
-		VARCOV cVarcov;
-		cVarcov.CopyFromParam(cPar);
-
-		if (!cPar.file_bfile.empty()) {
-		  cVarcov.AnalyzePlink ();
-		} else {
-		  cVarcov.AnalyzeBimbam ();
-		}
-
-		cVarcov.CopyToParam(cPar);
-	}
-
-	// Compute the S matrix (and its variance), that is used for
-	// variance component estimation using summary statistics.
-	if (cPar.a_mode==25 || cPar.a_mode==26) {
-	  cout<<"Calculating the S Matrix ... "<<endl;
-
-	  gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
-	  gsl_vector *ns=gsl_vector_alloc (cPar.n_vc+1);
-	  gsl_matrix_set_zero(S);
-	  gsl_vector_set_zero(ns);
-
-	  gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
-	  gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
-	  gsl_vector_view ns_vec=gsl_vector_subvector(ns, 0, cPar.n_vc);
-
-	  gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
-	  gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
-	  gsl_matrix_set_zero (K);
-	  gsl_matrix_set_zero (A);
-
-	  gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
-	  gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
-
-	  cPar.CopyCvtPhen (W, y, 0);
-
-	  set<string> setSnps_beta;
-	  map <string, double> mapRS2wA, mapRS2wK;
-
-	  cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
-	  time_start=clock();
-	  cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &ns_vec.vector);
-	  cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	  if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
-	  gsl_vector_set (ns, cPar.n_vc, cPar.ni_test);
-
-	  cPar.WriteMatrix (S, "S");
-	  cPar.WriteVector (ns, "size");
-	  cPar.WriteVar ("snps");
-
-	  gsl_matrix_free (S);
-	  gsl_vector_free (ns);
-
-	  gsl_matrix_free (A);
-	  gsl_matrix_free (K);
-
-	  gsl_vector_free (y);
-	  gsl_matrix_free (K);
-	}
-
-	//Compute the q vector, that is used for variance component estimation using summary statistics
-	if (cPar.a_mode==27 || cPar.a_mode==28) {
-	  gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
-	  gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
-	  gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
-	  gsl_vector_set_zero (q);
-	  gsl_vector_set_zero (s);
-
-	  gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
-
-	  vector<size_t> vec_cat, vec_ni;
-	  vector<double> vec_weight, vec_z2;
-	  map<string, double> mapRS2weight;
-	  mapRS2weight.clear();
-
-	  time_start=clock();
-	  ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, cPar.ns_test);
-	  cout<<"## number of total individuals = "<<cPar.ni_total<<endl;
-	  cout<<"## number of total SNPs = "<<cPar.ns_total<<endl;
-	  cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
-	  cout<<"## number of variance components = "<<cPar.n_vc<<endl;
-	  cout<<"Calculating the q vector ... "<<endl;
-	  Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
-	  cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	  if (cPar.error==true) {cout<<"error! fail to calculate the q vector. "<<endl; return;}
-
-	  gsl_vector_set (s, cPar.n_vc, cPar.ni_total);
-
-	  cPar.WriteMatrix (Vq, "Vq");
-	  cPar.WriteVector (q, "q");
-	  cPar.WriteVector (s, "size");
-	  /*
-	  for (size_t i=0; i<cPar.n_vc; i++) {
-	    cout<<gsl_vector_get(q, i)<<endl;
-	  }
-	  */
-	  gsl_matrix_free (Vq);
-	  gsl_vector_free (q);
-	  gsl_vector_free (s);
-	}
-
-	// Calculate SNP covariance.
-	if (cPar.a_mode==71) {
-	  VARCOV cVarcov;
-	  cVarcov.CopyFromParam(cPar);
-
-	  if (!cPar.file_bfile.empty()) {
-            cVarcov.AnalyzePlink ();
-	  } else {
-            cVarcov.AnalyzeBimbam ();
-	  }
-
-	  cVarcov.CopyToParam(cPar);
-	}
-
-	// LM.
-	if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) {  //Fit LM
-		gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
-		gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
-
-		//set covariates matrix W and phenotype matrix Y
-		//an intercept should be included in W,
-		cPar.CopyCvtPhen (W, Y, 0);
-
-		//Fit LM or mvLM
-		if (cPar.n_ph==1) {
-			LM cLm;
-			cLm.CopyFromParam(cPar);
-
-			gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
-
-			if (!cPar.file_gene.empty()) {
-				cLm.AnalyzeGene (W, &Y_col.vector); //y is the predictor, not the phenotype
-			} else if (!cPar.file_bfile.empty()) {
-				cLm.AnalyzePlink (W, &Y_col.vector);
-			} else if (!cPar.file_oxford.empty()) {
-				cLm.Analyzebgen (W, &Y_col.vector);
-			} else {
-				cLm.AnalyzeBimbam (W, &Y_col.vector);
-			}
-
-			cLm.WriteFiles();
-			cLm.CopyToParam(cPar);
-		}
-		/*
-		else {
-			MVLM cMvlm;
-			cMvlm.CopyFromParam(cPar);
-
-			if (!cPar.file_bfile.empty()) {
-				cMvlm.AnalyzePlink (W, Y);
-			} else {
-				cMvlm.AnalyzeBimbam (W, Y);
-			}
-
-			cMvlm.WriteFiles();
-			cMvlm.CopyToParam(cPar);
-		}
-		*/
-		//release all matrices and vectors
-		gsl_matrix_free (Y);
-		gsl_matrix_free (W);
-	}
-
-	//VC estimation with one or multiple kinship matrices
-	//REML approach only
-	//if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp
-	//for one phenotype only;
-	if (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) {
-	  if (!cPar.file_beta.empty() ) {
-	    //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
-	    //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
-	    //update indicator_snps, so that the numbers are in accordance with mapRS2wK
-	    set<string> setSnps_beta;
-	    ReadFile_snps_header (cPar.file_beta, setSnps_beta);
-
-	    map <string, double> mapRS2wA, mapRS2wK;
-	    cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
-	    cPar.UpdateSNP (mapRS2wK);
-
-	    // Setup matrices and vectors.
-	    gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
-	    gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
-	    gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
-	    gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
-
-	    gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
-	    gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
-
-	    gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
-	    gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
-
-	    gsl_matrix_set_zero (K);
-	    gsl_matrix_set_zero (A);
-
-	    gsl_matrix_set_zero(S);
-	    gsl_matrix_set_zero(Vq);
-	    gsl_vector_set_zero (q);
-	    gsl_vector_set_zero (s);
-
-	    cPar.CopyCvtPhen (W, y, 0);
-
-	    gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
-	    gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
-	    gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
-
-	    vector<size_t> vec_cat, vec_ni;
-	    vector<double> vec_weight, vec_z2;
-
-	    //read beta, based on the mapRS2wK
-	    ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, cPar.ns_test);
-
-	    cout<<"Study Panel: "<<endl;
-	    cout<<"## number of total individuals = "<<cPar.ni_study<<endl;
-	    cout<<"## number of total SNPs = "<<cPar.ns_study<<endl;
-	    cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
-	    cout<<"## number of variance components = "<<cPar.n_vc<<endl;
-
-	    //compute q
-	    Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
-
-	    //compute S
-	    time_start=clock();
-	    cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector);
-	    cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	    if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
-	    //compute vc estimates
-	    CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
-	    //if LDSC weights, then compute the weights and run the above steps again
-	    if (cPar.a_mode==62) {
-	      //compute the weights and normalize the weights for A
-	      cPar.UpdateWeight (1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
-
-	      //read beta file again, and update weigths vector
-	      ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_total, cPar.ns_test);
-
-	      //compute q
-	      Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
-
-	      //compute S
-	      time_start=clock();
-	      cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector);
-	      cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	      if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
-	      //compute vc estimates
-	      CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-	    }
-
-	    gsl_vector_set (s, cPar.n_vc, cPar.ni_test);
-
-	    cPar.WriteMatrix (S, "S");
-	    cPar.WriteMatrix (Vq, "Vq");
-	    cPar.WriteVector (q, "q");
-	    cPar.WriteVector (s, "size");
-
-	    gsl_matrix_free (S);
-	    gsl_matrix_free (Vq);
-	    gsl_vector_free (q);
-	    gsl_vector_free (s);
-
-	    gsl_matrix_free (A);
-	    gsl_matrix_free (K);
-	    gsl_vector_free (y);
-	    gsl_matrix_free (W);
-	  } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
-	    if (!cPar.file_study.empty()) {
-	      string sfile=cPar.file_study+".size.txt";
-	      CountFileLines (sfile, cPar.n_vc);
-	    } else {
-	      string file_name;
-	      igzstream infile (cPar.file_mstudy.c_str(), igzstream::in);
-	      if (!infile) {cout<<"error! fail to open mstudy file: "<<cPar.file_study<<endl; return;}
-
-	      safeGetline(infile, file_name);
-
-	      infile.clear();
-	      infile.close();
-
-	      string sfile=file_name+".size.txt";
-	      CountFileLines (sfile, cPar.n_vc);
-	    }
-
-	    cPar.n_vc=cPar.n_vc-1;
-
-	    gsl_matrix *S=gsl_matrix_alloc (2*cPar.n_vc, cPar.n_vc);
-	    gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
-	    //gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1, (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
-	    //gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1, (n_lines*(n_lines+1))/2*(n_lines+1) );
-	    gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
-	    gsl_vector *s_study=gsl_vector_alloc (cPar.n_vc);
-	    gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
-	    gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
-
-	    gsl_matrix_set_zero(S);
-	    gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
-	    gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
-
-	    gsl_matrix_set_zero(Vq);
-	    //gsl_matrix_set_zero(V);
-	    //gsl_matrix_set_zero(Vslope);
-	    gsl_vector_set_zero(q);
-	    gsl_vector_set_zero(s_study);
-	    gsl_vector_set_zero(s_ref);
-
-	    if (!cPar.file_study.empty()) {
-	      ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
-	    } else {
-	      ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
-	    }
-
-	    if (!cPar.file_ref.empty()) {
-	      ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref);
-	    } else {
-	      ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref);
-	    }
-
-	    cout<<"## number of variance components = "<<cPar.n_vc<<endl;
-	    cout<<"## number of individuals in the sample = "<<cPar.ni_study<<endl;
-	    cout<<"## number of individuals in the reference = "<<cPar.ni_ref<<endl;
-
-	    CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
-	    gsl_vector_view s_sub=gsl_vector_subvector (s, 0, cPar.n_vc);
-	    gsl_vector_memcpy (&s_sub.vector, s_ref);
-	    gsl_vector_set (s, cPar.n_vc, cPar.ni_ref);
-
-	    cPar.WriteMatrix (S, "S");
-	    cPar.WriteMatrix (Vq, "Vq");
-	    cPar.WriteVector (q, "q");
-	    cPar.WriteVector (s, "size");
-
-	    gsl_matrix_free (S);
-	    gsl_matrix_free (Vq);
-	    //gsl_matrix_free (V);
-	    //gsl_matrix_free (Vslope);
-	    gsl_vector_free (q);
-	    gsl_vector_free (s_study);
-	    gsl_vector_free (s_ref);
-	    gsl_vector_free (s);
-	  } else {
-		gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
-		gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
-		gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1*cPar.n_vc );
-
-		//set covariates matrix W and phenotype matrix Y
-		//an intercept should be included in W,
-		cPar.CopyCvtPhen (W, Y, 0);
-
-		//read kinship matrices
-		if (!(cPar.file_mk).empty()) {
-		  ReadFile_mk (cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
-		  if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
-		  //center matrix G, and obtain v_traceG
-		  double d=0;
-		  (cPar.v_traceG).clear();
-		  for (size_t i=0; i<cPar.n_vc; i++) {
-		    gsl_matrix_view G_sub=gsl_matrix_submatrix (G, 0, i*G->size1, G->size1, G->size1);
-		    CenterMatrix (&G_sub.matrix);
-		    d=0;
-		    for (size_t j=0; j<G->size1; j++) {
-		      d+=gsl_matrix_get (&G_sub.matrix, j, j);
-		    }
-		    d/=(double)G->size1;
-		    (cPar.v_traceG).push_back(d);
-		  }
-		} else if (!(cPar.file_kin).empty()) {
-			ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
-			if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
-			//center matrix G
-			CenterMatrix (G);
-
-			(cPar.v_traceG).clear();
-			double d=0;
-			for (size_t j=0; j<G->size1; j++) {
-			  d+=gsl_matrix_get (G, j, j);
-			}
-			d/=(double)G->size1;
-			(cPar.v_traceG).push_back(d);
-		}
-			/*
-			//eigen-decomposition and calculate trace_G
-			cout<<"Start Eigen-Decomposition..."<<endl;
-			time_start=clock();
-
-			if (cPar.a_mode==31) {
-				cPar.trace_G=EigenDecomp (G, U, eval, 1);
-			} else {
-				cPar.trace_G=EigenDecomp (G, U, eval, 0);
-			}
-
-			cPar.trace_G=0.0;
-			for (size_t i=0; i<eval->size; i++) {
-				if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
-				cPar.trace_G+=gsl_vector_get (eval, i);
-			}
-			cPar.trace_G/=(double)eval->size;
-
-			cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		} else {
-			ReadFile_eigenU (cPar.file_ku, cPar.error, U);
-			if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;}
-
-			ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
-			if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;}
-
-			cPar.trace_G=0.0;
-			for (size_t i=0; i<eval->size; i++) {
-				if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
-			  	cPar.trace_G+=gsl_vector_get(eval, i);
-			}
-			cPar.trace_G/=(double)eval->size;
-		}
-		*/
-		//fit multiple variance components
-		if (cPar.n_ph==1) {
-		  //		  if (cPar.n_vc==1) {
-		    /*
-		    //calculate UtW and Uty
-		    CalcUtX (U, W, UtW);
-		    CalcUtX (U, Y, UtY);
-
-		    gsl_vector_view beta=gsl_matrix_row (B, 0);
-		    gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
-		    gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
-		    CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
-		    CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
-
-		    cPar.beta_mle_null.clear();
-		    cPar.se_beta_mle_null.clear();
-		    for (size_t i=0; i<B->size2; i++) {
-		      cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
-		      cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
-		    }
-
-		    CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
-		    CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
-		    cPar.beta_remle_null.clear();
-		    cPar.se_beta_remle_null.clear();
-		    for (size_t i=0; i<B->size2; i++) {
-		      cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
-		      cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
-		    }
-
-		    CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-		    cPar.PrintSummary();
-
-		    //calculate and output residuals
-		    if (cPar.a_mode==5) {
-		      gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
-		      gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
-		      gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
-		      gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
-		      gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
-		      //obtain Utu and Ute
-		      gsl_vector_memcpy (y_hat, &UtY_col.vector);
-		      gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
-		      double d, u, e;
-		      for (size_t i=0; i<eval->size; i++) {
-			d=gsl_vector_get (eval, i);
-			u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
-			e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
-			gsl_vector_set (Utu_hat, i, u);
-			gsl_vector_set (Ute_hat, i, e);
-		      }
-
-		      //obtain u and e
-		      gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
-		      gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
-		      //output residuals
-		      cPar.WriteVector(u_hat, "residU");
-		      cPar.WriteVector(e_hat, "residE");
-
-		      gsl_vector_free(u_hat);
-		      gsl_vector_free(e_hat);
-		      gsl_vector_free(y_hat);
-		    }
-*/
-		  //		  } else {
-		    gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
-		    VC cVc;
-		    cVc.CopyFromParam(cPar);
-		    if (cPar.a_mode==61) {
-		      cVc.CalcVChe (G, W, &Y_col.vector);
-		    } else if (cPar.a_mode==62) {
-		      cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector);
-		    } else {
-		      cVc.CalcVCacl (G, W, &Y_col.vector);
-		    }
-		    cVc.CopyToParam(cPar);
-		    //obtain pve from sigma2
-		    //obtain se_pve from se_sigma2
-
-		    //}
-		}
-	  }
-
-	}
-
-
-	//compute confidence intervals with additional summary statistics
-	//we do not check the sign of z-scores here, but they have to be matched with the genotypes
-	if (cPar.a_mode==66 || cPar.a_mode==67) {
-	  //read reference file first
-	  gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
-	  gsl_matrix *Svar=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
-	  gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
-
-	  gsl_matrix_set_zero(S);
-	  gsl_matrix_set_zero(Svar);
-	  gsl_vector_set_zero(s_ref);
-
-	  if (!cPar.file_ref.empty()) {
-	    ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
-	  } else {
-	    ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
-	  }
-
-	  //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
-	  //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
-	  set<string> setSnps_beta;
-	  ReadFile_snps_header (cPar.file_beta, setSnps_beta);
-
-	  //obtain the weights for wA, which contains the SNP weights for SNPs used in the model
-	  map <string, double> mapRS2wK;
-	  cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
-	  //set up matrices and vector
-	  gsl_matrix *Xz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
-	  gsl_matrix *XWz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
-	  gsl_matrix *XtXWz=gsl_matrix_alloc (mapRS2wK.size(), cPar.n_vc*cPar.n_vc);
-	  gsl_vector *w=gsl_vector_alloc (mapRS2wK.size());
-	  gsl_vector *w1=gsl_vector_alloc (mapRS2wK.size());
-	  gsl_vector *z=gsl_vector_alloc (mapRS2wK.size());
-	  gsl_vector *s_vec=gsl_vector_alloc (cPar.n_vc);
-
-	  vector<size_t> vec_cat, vec_size;
-	  vector<double> vec_z;
-
-	  map <string, double> mapRS2z, mapRS2wA;
-	  map <string, string> mapRS2A1;
-	  string file_str;
-
-	  //update s_vec, the number of snps in each category
-	  for (size_t i=0; i<cPar.n_vc; i++) {
-	    vec_size.push_back(0);
-	  }
-
-	  for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
-	    vec_size[cPar.mapRS2cat[it->first]]++;
-	  }
-
-	  for (size_t i=0; i<cPar.n_vc; i++) {
-	    gsl_vector_set(s_vec, i, vec_size[i]);
-	  }
-
-	  //update mapRS2wA using v_pve and s_vec
-	  if (cPar.a_mode==66) {
-	    for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
-	      mapRS2wA[it->first]=1;
-	    }
-	  } else {
-	    cPar.UpdateWeight (0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
-	  }
-
-	  //read in z-scores based on allele 0, and save that into a vector
-	  ReadFile_beta (cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
-
-	  //update snp indicator, save weights to w, save z-scores to vec_z, save category label to vec_cat
-	  //sign of z is determined by matching alleles
-	  cPar.UpdateSNPnZ (mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
-
-	  //compute an n by k matrix of X_iWz
-	  cout<<"Calculating Xz ... "<<endl;
-
-	  gsl_matrix_set_zero(Xz);
-	  gsl_vector_set_all (w1, 1);
-
-	  if (!cPar.file_bfile.empty() ) {
-	    file_str=cPar.file_bfile+".bed";
-	    PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
-	  } else if (!cPar.file_geno.empty()) {
-	    BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
-	  } else if (!cPar.file_mbfile.empty() ){
-	    MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz);
-	  } else if (!cPar.file_mgeno.empty()) {
-	    MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz);
-	  }
-	  /*
-	  cout<<"Xz: "<<endl;
-	  for (size_t i=0; i<5; i++) {
-	    for (size_t j=0; j<cPar.n_vc; j++) {
-	      cout<<gsl_matrix_get (Xz, i, j)<<" ";
-	    }
-	    cout<<endl;
-	  }
-	  */
-	  if (cPar.a_mode==66) {
-	    gsl_matrix_memcpy (XWz, Xz);
-	  } else if (cPar.a_mode==67) {
-	    cout<<"Calculating XWz ... "<<endl;
-
-	    gsl_matrix_set_zero(XWz);
-
-	    if (!cPar.file_bfile.empty() ) {
-	      file_str=cPar.file_bfile+".bed";
-	      PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz);
-	    } else if (!cPar.file_geno.empty()) {
-	      BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz);
-	    } else if (!cPar.file_mbfile.empty() ){
-	      MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz);
-	    } else if (!cPar.file_mgeno.empty()) {
-	      MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz);
-	    }
-	  }
-	  /*
-	  cout<<"XWz: "<<endl;
-	  for (size_t i=0; i<5; i++) {
-	    cout<<gsl_vector_get (w, i)<<endl;
-	    for (size_t j=0; j<cPar.n_vc; j++) {
-	      cout<<gsl_matrix_get (XWz, i, j)<<" ";
-	    }
-	    cout<<endl;
-	  }
-	  */
-	  //compute an p by k matrix of X_j^TWX_iWz
-	  cout<<"Calculating XtXWz ... "<<endl;
-	  gsl_matrix_set_zero(XtXWz);
-
-	  if (!cPar.file_bfile.empty() ) {
-	    file_str=cPar.file_bfile+".bed";
-	    PlinkXtXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz);
-	  } else if (!cPar.file_geno.empty()) {
-	    BimbamXtXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz);
-	  } else if (!cPar.file_mbfile.empty() ){
-	    MFILEXtXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz);
-	  } else if (!cPar.file_mgeno.empty()) {
-	    MFILEXtXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz);
-	  }
-	  /*
-	  cout<<"XtXWz: "<<endl;
-	  for (size_t i=0; i<5; i++) {
-	    for (size_t j=0; j<cPar.n_vc; j++) {
-	      cout<<gsl_matrix_get (XtXWz, i, j)<<" ";
-	    }
-	    cout<<endl;
-	  }
-	  */
-	  //compute confidence intervals
-	  CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
-	  //write files
-	  //cPar.WriteMatrix (XWz, "XWz");
-	  //cPar.WriteMatrix (XtXWz, "XtXWz");
-	  //cPar.WriteVector (w, "w");
-
-	  gsl_matrix_free(S);
-	  gsl_matrix_free(Svar);
-	  gsl_vector_free(s_ref);
-
-	  gsl_matrix_free(Xz);
-	  gsl_matrix_free(XWz);
-	  gsl_matrix_free(XtXWz);
-	  gsl_vector_free(w);
-	  gsl_vector_free(w1);
-	  gsl_vector_free(z);
-	  gsl_vector_free(s_vec);
-	}
-
-
-	//LMM or mvLMM or Eigen-Decomposition
-	if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==31) {  //Fit LMM or mvLMM or eigen
-		gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
-		gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
-		gsl_matrix *B=gsl_matrix_alloc (Y->size2, W->size2);	//B is a d by c matrix
-		gsl_matrix *se_B=gsl_matrix_alloc (Y->size2, W->size2);
-		gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
-		gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
-		gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
-		gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
-		gsl_vector *eval=gsl_vector_alloc (Y->size1);
-		gsl_vector *env=gsl_vector_alloc (Y->size1);
-		gsl_vector *weight=gsl_vector_alloc (Y->size1);
-
-		//set covariates matrix W and phenotype matrix Y
-		//an intercept should be included in W,
-		cPar.CopyCvtPhen (W, Y, 0);
-		if (!cPar.file_gxe.empty()) {cPar.CopyGxe (env);}
-
-		//read relatedness matrix G
-		if (!(cPar.file_kin).empty()) {
-			ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
-			if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
-			//center matrix G
-			CenterMatrix (G);
-
-			//is residual weights are provided, then
-			if (!cPar.file_weight.empty()) {
-			  cPar.CopyWeight (weight);
-			  double d, wi, wj;
-			  for (size_t i=0; i<G->size1; i++) {
-			    wi=gsl_vector_get(weight, i);
-			    for (size_t j=i; j<G->size2; j++) {
-			      wj=gsl_vector_get(weight, j);
-			      d=gsl_matrix_get(G, i, j);
-			      if (wi<=0 || wj<=0) {d=0;} else {d/=sqrt(wi*wj);}
-			      gsl_matrix_set(G, i, j, d);
-			      if (j!=i) {gsl_matrix_set(G, j, i, d);}
-			    }
-			  }
-			}
-
-			//eigen-decomposition and calculate trace_G
-			cout<<"Start Eigen-Decomposition..."<<endl;
-			time_start=clock();
-
-			if (cPar.a_mode==31) {
-				cPar.trace_G=EigenDecomp (G, U, eval, 1);
-			} else {
-				cPar.trace_G=EigenDecomp (G, U, eval, 0);
-			}
-
-			if (!cPar.file_weight.empty()) {
-			  double wi;
-			  for (size_t i=0; i<U->size1; i++) {
-			    wi=gsl_vector_get(weight, i);
-			    if (wi<=0) {wi=0;} else {wi=sqrt(wi);}
-			    gsl_vector_view Urow=gsl_matrix_row (U, i);
-			    gsl_vector_scale (&Urow.vector, wi);
-			  }
-			}
-
-			cPar.trace_G=0.0;
-			for (size_t i=0; i<eval->size; i++) {
-				if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
-				cPar.trace_G+=gsl_vector_get (eval, i);
-			}
-			cPar.trace_G/=(double)eval->size;
-
-			cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		} else {
-			ReadFile_eigenU (cPar.file_ku, cPar.error, U);
-			if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;}
-
-			ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
-			if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;}
-
-			cPar.trace_G=0.0;
-			for (size_t i=0; i<eval->size; i++) {
-				if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
-			  	cPar.trace_G+=gsl_vector_get(eval, i);
-			}
-			cPar.trace_G/=(double)eval->size;
-		}
-
-		if (cPar.a_mode==31) {
-			cPar.WriteMatrix(U, "eigenU");
-			cPar.WriteVector(eval, "eigenD");
-		} else if (!cPar.file_gene.empty() ) {
-			//calculate UtW and Uty
-			CalcUtX (U, W, UtW);
-			CalcUtX (U, Y, UtY);
-
-			LMM cLmm;
-			cLmm.CopyFromParam(cPar);
-
-			gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
-			gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
-			cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype
-
-			cLmm.WriteFiles();
-			cLmm.CopyToParam(cPar);
-		} else {
-		  //calculate UtW and Uty
-		  CalcUtX (U, W, UtW);
-		  CalcUtX (U, Y, UtY);
-
-			//calculate REMLE/MLE estimate and pve for univariate model
-			if (cPar.n_ph==1) {
-				gsl_vector_view beta=gsl_matrix_row (B, 0);
-				gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
-				gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
-				CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
-				CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
-
-				cPar.beta_mle_null.clear();
-				cPar.se_beta_mle_null.clear();
-				for (size_t i=0; i<B->size2; i++) {
-					cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
-					cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
-				}
-
-				CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
-				CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
-				cPar.beta_remle_null.clear();
-				cPar.se_beta_remle_null.clear();
-				for (size_t i=0; i<B->size2; i++) {
-					cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
-					cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
-				}
-
-				CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-				cPar.PrintSummary();
-
-				//calculate and output residuals
-				if (cPar.a_mode==5) {
-					gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
-					gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
-					gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
-					gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
-					gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
-					//obtain Utu and Ute
-					gsl_vector_memcpy (y_hat, &UtY_col.vector);
-					gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
-					double d, u, e;
-					for (size_t i=0; i<eval->size; i++) {
-						d=gsl_vector_get (eval, i);
-						u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
-						e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
-						gsl_vector_set (Utu_hat, i, u);
-						gsl_vector_set (Ute_hat, i, e);
-					}
-
-					//obtain u and e
-					gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
-					gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
-					//output residuals
-					cPar.WriteVector(u_hat, "residU");
-					cPar.WriteVector(e_hat, "residE");
-
-					gsl_vector_free(u_hat);
-					gsl_vector_free(e_hat);
-					gsl_vector_free(y_hat);
-				}
-			}
-
-			//Fit LMM or mvLMM
-			if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4) {
-				if (cPar.n_ph==1) {
-					LMM cLmm;
-					cLmm.CopyFromParam(cPar);
-
-					gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
-					gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
-					if (!cPar.file_bfile.empty()) {
-					  if (cPar.file_gxe.empty()) {
-					    cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
-					  } else {
-					    cLmm.AnalyzePlinkGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
-					  }
-					}
-					// WJA added
-				       	else if(!cPar.file_oxford.empty()) {
-					  cLmm.Analyzebgen (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
-					}
-					else {
-					  if (cPar.file_gxe.empty()) {
-					    cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
-					  } else {
-					    cLmm.AnalyzeBimbamGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
-					  }
-					}
-
-					cLmm.WriteFiles();
-					cLmm.CopyToParam(cPar);
-				} else {
-					MVLMM cMvlmm;
-					cMvlmm.CopyFromParam(cPar);
-
-					if (!cPar.file_bfile.empty()) {
-					  if (cPar.file_gxe.empty()) {
-					    cMvlmm.AnalyzePlink (U, eval, UtW, UtY);
-					  } else {
-					    cMvlmm.AnalyzePlinkGXE (U, eval, UtW, UtY, env);
-					  }
-					}
-					else if(!cPar.file_oxford.empty())
-					{
-					    cMvlmm.Analyzebgen (U, eval, UtW, UtY);
-					}
-					else {
-					  if (cPar.file_gxe.empty()) {
-					    cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY);
-					  } else {
-					    cMvlmm.AnalyzeBimbamGXE (U, eval, UtW, UtY, env);
-					  }
-					}
-
-					cMvlmm.WriteFiles();
-					cMvlmm.CopyToParam(cPar);
-				}
-			}
-		}
-
-
-		//release all matrices and vectors
-		gsl_matrix_free (Y);
-		gsl_matrix_free (W);
-		gsl_matrix_free(B);
-		gsl_matrix_free(se_B);
-		gsl_matrix_free (G);
-		gsl_matrix_free (U);
-		gsl_matrix_free (UtW);
-		gsl_matrix_free (UtY);
-		gsl_vector_free (eval);
-		gsl_vector_free (env);
-	}
-
-
-	//BSLMM
-	if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
-		gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
-		gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
-		gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
-		gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
-		//set covariates matrix W and phenotype vector y
-		//an intercept should be included in W,
-		cPar.CopyCvtPhen (W, y, 0);
-
-		//center y, even for case/control data
-		cPar.pheno_mean=CenterVector(y);
-
-		//run bvsr if rho==1
-		if (cPar.rho_min==1 && cPar.rho_max==1) {
-		  //read genotypes X (not UtX)
-		  cPar.ReadGenotypes (UtX, G, false);
-
-		  //perform BSLMM analysis
-		  BSLMM cBslmm;
-		  cBslmm.CopyFromParam(cPar);
-		  time_start=clock();
-		  cBslmm.MCMC(UtX, y);
-		  cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		  cBslmm.CopyToParam(cPar);
-		  //else, if rho!=1
-		} else {
-		gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
-		gsl_vector *eval=gsl_vector_alloc (y->size);
-		gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
-		gsl_vector *Uty=gsl_vector_alloc (y->size);
-
-		//read relatedness matrix G
-		if (!(cPar.file_kin).empty()) {
-			cPar.ReadGenotypes (UtX, G, false);
-
-			//read relatedness matrix G
-			ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
-			if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
-			//center matrix G
-			CenterMatrix (G);
-		} else {
-			cPar.ReadGenotypes (UtX, G, true);
-		}
-
-		//eigen-decomposition and calculate trace_G
-		cout<<"Start Eigen-Decomposition..."<<endl;
-		time_start=clock();
-		cPar.trace_G=EigenDecomp (G, U, eval, 0);
-		cPar.trace_G=0.0;
-		for (size_t i=0; i<eval->size; i++) {
-			if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
-			cPar.trace_G+=gsl_vector_get (eval, i);
-		}
-		cPar.trace_G/=(double)eval->size;
-		cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		//calculate UtW and Uty
-		CalcUtX (U, W, UtW);
-		CalcUtX (U, y, Uty);
-
-		//calculate REMLE/MLE estimate and pve
-		CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
-		CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
-		CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
-		cPar.PrintSummary();
-
-		//Creat and calcualte UtX, use a large memory
-		cout<<"Calculating UtX..."<<endl;
-		time_start=clock();
-		CalcUtX (U, UtX);
-		cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		//perform BSLMM or BSLMMDAP analysis
-		if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
-		  BSLMM cBslmm;
-		  cBslmm.CopyFromParam(cPar);
-		  time_start=clock();
-		  if (cPar.a_mode==12) {  //ridge regression
-		    cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
-		  } else {	//Run MCMC
-		    cBslmm.MCMC(U, UtX, Uty, eval, y);
-		  }
-		  cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		  cBslmm.CopyToParam(cPar);
-		} else {
-		}
-
-		//release all matrices and vectors
-		gsl_matrix_free (G);
-		gsl_matrix_free (U);
-		gsl_matrix_free (UtW);
-		gsl_vector_free (eval);
-		gsl_vector_free (Uty);
-
-		}
-		gsl_matrix_free (W);
-		gsl_vector_free (y);
-		gsl_matrix_free (UtX);
-	}
-
-
-
-	//BSLMM-DAP
-	if (cPar.a_mode==14 || cPar.a_mode==15 || cPar.a_mode==16) {
-	  if (cPar.a_mode==14) {
-	    gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
-	    gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
-	    gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
-	    gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
-	    //set covariates matrix W and phenotype vector y
-	    //an intercept should be included in W,
-	    cPar.CopyCvtPhen (W, y, 0);
-
-	    //center y, even for case/control data
-	    cPar.pheno_mean=CenterVector(y);
-
-	    //run bvsr if rho==1
-	    if (cPar.rho_min==1 && cPar.rho_max==1) {
-	      //read genotypes X (not UtX)
-	      cPar.ReadGenotypes (UtX, G, false);
-
-	      //perform BSLMM analysis
-	      BSLMM cBslmm;
-	      cBslmm.CopyFromParam(cPar);
-	      time_start=clock();
-	      cBslmm.MCMC(UtX, y);
-	      cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	      cBslmm.CopyToParam(cPar);
-	      //else, if rho!=1
-	    } else {
-	      gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
-	      gsl_vector *eval=gsl_vector_alloc (y->size);
-	      gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
-	      gsl_vector *Uty=gsl_vector_alloc (y->size);
-
-	      //read relatedness matrix G
-	      if (!(cPar.file_kin).empty()) {
-		cPar.ReadGenotypes (UtX, G, false);
-
-		//read relatedness matrix G
-		ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
-		if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
-		//center matrix G
-		CenterMatrix (G);
-	      } else {
-		cPar.ReadGenotypes (UtX, G, true);
-	      }
-
-	      //eigen-decomposition and calculate trace_G
-	      cout<<"Start Eigen-Decomposition..."<<endl;
-	      time_start=clock();
-	      cPar.trace_G=EigenDecomp (G, U, eval, 0);
-	      cPar.trace_G=0.0;
-	      for (size_t i=0; i<eval->size; i++) {
-		if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
-		cPar.trace_G+=gsl_vector_get (eval, i);
-	      }
-	      cPar.trace_G/=(double)eval->size;
-	      cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	      //calculate UtW and Uty
-	      CalcUtX (U, W, UtW);
-	      CalcUtX (U, y, Uty);
-
-	      //calculate REMLE/MLE estimate and pve
-	      CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
-	      CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
-	      CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
-	      cPar.PrintSummary();
-
-	      //Creat and calcualte UtX, use a large memory
-	      cout<<"Calculating UtX..."<<endl;
-	      time_start=clock();
-	      CalcUtX (U, UtX);
-	      cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	      //perform analysis; assume X and y are already centered
-	      BSLMMDAP cBslmmDap;
-	      cBslmmDap.CopyFromParam(cPar);
-	      time_start=clock();
-	      cBslmmDap.DAP_CalcBF (U, UtX, Uty, eval, y);
-	      cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	      cBslmmDap.CopyToParam(cPar);
-
-	      //release all matrices and vectors
-	      gsl_matrix_free (G);
-	      gsl_matrix_free (U);
-	      gsl_matrix_free (UtW);
-	      gsl_vector_free (eval);
-	      gsl_vector_free (Uty);
-	    }
-
-	    gsl_matrix_free (W);
-	    gsl_vector_free (y);
-	    gsl_matrix_free (UtX);
-	  } else if (cPar.a_mode==15) {
-	    //perform EM algorithm and estimate parameters
-	    vector<string> vec_rs;
-	    vector<double> vec_sa2, vec_sb2, wab;
-	    vector<vector<vector<double> > > BF;
-
-	    //read hyp and bf files (functions defined in BSLMMDAP)
-	    ReadFile_hyb (cPar.file_hyp, vec_sa2, vec_sb2, wab);
-	    ReadFile_bf (cPar.file_bf, vec_rs, BF);
-
-	    cPar.ns_test=vec_rs.size();
-	    if (wab.size()!=BF[0][0].size()) {cout<<"error! hyp and bf files dimension do not match"<<endl;}
-
-	    //load annotations
-	    gsl_matrix *Ac;
-	    gsl_matrix_int *Ad;
-	    gsl_vector_int *dlevel;
-	    size_t kc, kd;
-	    if (!cPar.file_cat.empty()) {
-	      ReadFile_cat (cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
-	    } else {
-	      kc=0; kd=0;
-	    }
-
-	    cout<<"## number of blocks = "<<BF.size()<<endl;
-	    cout<<"## number of analyzed SNPs = "<<vec_rs.size()<<endl;
-	    cout<<"## grid size for hyperparameters = "<<wab.size()<<endl;
-	    cout<<"## number of continuous annotations = "<<kc<<endl;
-	    cout<<"## number of discrete annotations = "<<kd<<endl;
-
-	    //DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel);
-
-	    //perform analysis
-	    BSLMMDAP cBslmmDap;
-	    cBslmmDap.CopyFromParam(cPar);
-	    time_start=clock();
-	    cBslmmDap.DAP_EstimateHyper (kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac, Ad, dlevel);
-	    cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	    cBslmmDap.CopyToParam(cPar);
-
-	    gsl_matrix_free(Ac);
-	    gsl_matrix_int_free(Ad);
-	    gsl_vector_int_free(dlevel);
-	  } else {
-	    //
-	  }
-
-	}
-
-
-
-
-	/*
-	//LDR (change 14 to 16?)
-	if (cPar.a_mode==14) {
-		gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
-		gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
-		gsl_matrix *G=gsl_matrix_alloc (1, 1);
-		vector<vector<unsigned char> > Xt;
-
-        	//set covariates matrix W and phenotype vector y
-		//an intercept is included in W
-		cPar.CopyCvtPhen (W, y, 0);
-
-		//read in genotype matrix X
-		cPar.ReadGenotypes (Xt, G, false);
-
-		LDR cLdr;
-		cLdr.CopyFromParam(cPar);
-		time_start=clock();
-
-		cLdr.VB(Xt, W, y);
-
-		cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		cLdr.CopyToParam(cPar);
-
-		gsl_vector_free (y);
-		gsl_matrix_free (W);
-		gsl_matrix_free (G);
-	}
-	*/
-
-	cPar.time_total=(clock()-time_begin)/(double(CLOCKS_PER_SEC)*60.0);
-
-	return;
+void GEMMA::BatchRun(PARAM &cPar) {
+  clock_t time_begin, time_start;
+  time_begin = clock();
+
+  // Read Files.
+  cout << "Reading Files ... " << endl;
+  cPar.ReadFiles();
+  if (cPar.error == true) {
+    cout << "error! fail to read files. " << endl;
+    return;
+  }
+  cPar.CheckData();
+  if (cPar.error == true) {
+    cout << "error! fail to check data. " << endl;
+    return;
+  }
+
+  // Prediction for bslmm
+  if (cPar.a_mode == 41 || cPar.a_mode == 42) {
+    gsl_vector *y_prdt;
+
+    y_prdt = gsl_vector_alloc(cPar.ni_total - cPar.ni_test);
+
+    // set to zero
+    gsl_vector_set_zero(y_prdt);
+
+    PRDT cPRDT;
+    cPRDT.CopyFromParam(cPar);
+
+    // add breeding value if needed
+    if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
+      cout << "Adding Breeding Values ... " << endl;
+
+      gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+      gsl_vector *u_hat = gsl_vector_alloc(cPar.ni_test);
+
+      // read kinship matrix and set u_hat
+      vector<int> indicator_all;
+      size_t c_bv = 0;
+      for (size_t i = 0; i < cPar.indicator_idv.size(); i++) {
+        indicator_all.push_back(1);
+        if (cPar.indicator_bv[i] == 1) {
+          gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]);
+          c_bv++;
+        }
+      }
+
+      ReadFile_kin(cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode,
+                   cPar.error, G);
+      if (cPar.error == true) {
+        cout << "error! fail to read kinship/relatedness file. " << endl;
+        return;
+      }
+
+      // read u
+      cPRDT.AddBV(G, u_hat, y_prdt);
+
+      gsl_matrix_free(G);
+      gsl_vector_free(u_hat);
+    }
+
+    // add beta
+    if (!cPar.file_bfile.empty()) {
+      cPRDT.AnalyzePlink(y_prdt);
+    } else {
+      cPRDT.AnalyzeBimbam(y_prdt);
+    }
+
+    // add mu
+    gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
+
+    // convert y to probability if needed
+    if (cPar.a_mode == 42) {
+      double d;
+      for (size_t i = 0; i < y_prdt->size; i++) {
+        d = gsl_vector_get(y_prdt, i);
+        d = gsl_cdf_gaussian_P(d, 1.0);
+        gsl_vector_set(y_prdt, i, d);
+      }
+    }
+
+    cPRDT.CopyToParam(cPar);
+
+    cPRDT.WriteFiles(y_prdt);
+
+    gsl_vector_free(y_prdt);
+  }
+
+  // Prediction with kinship matrix only; for one or more phenotypes
+  if (cPar.a_mode == 43) {
+    // first, use individuals with full phenotypes to obtain estimates of Vg and
+    // Ve
+    gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+    gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+    gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
+    gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+    gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
+    gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
+    gsl_vector *eval = gsl_vector_alloc(Y->size1);
+
+    gsl_matrix *Y_full = gsl_matrix_alloc(cPar.ni_cvt, cPar.n_ph);
+    gsl_matrix *W_full = gsl_matrix_alloc(Y_full->size1, cPar.n_cvt);
+
+    // set covariates matrix W and phenotype matrix Y
+    // an intercept should be included in W,
+    cPar.CopyCvtPhen(W, Y, 0);
+    cPar.CopyCvtPhen(W_full, Y_full, 1);
+
+    gsl_matrix *Y_hat = gsl_matrix_alloc(Y_full->size1, cPar.n_ph);
+    gsl_matrix *G_full = gsl_matrix_alloc(Y_full->size1, Y_full->size1);
+    gsl_matrix *H_full = gsl_matrix_alloc(Y_full->size1 * Y_hat->size2,
+                                          Y_full->size1 * Y_hat->size2);
+
+    // read relatedness matrix G, and matrix G_full
+    ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode,
+                 cPar.error, G);
+    if (cPar.error == true) {
+      cout << "error! fail to read kinship/relatedness file. " << endl;
+      return;
+    }
+    ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
+                 cPar.error, G_full);
+    if (cPar.error == true) {
+      cout << "error! fail to read kinship/relatedness file. " << endl;
+      return;
+    }
+
+    // center matrix G
+    CenterMatrix(G);
+    CenterMatrix(G_full);
+
+    // eigen-decomposition and calculate trace_G
+    cout << "Start Eigen-Decomposition..." << endl;
+    time_start = clock();
+    cPar.trace_G = EigenDecomp(G, U, eval, 0);
+    cPar.trace_G = 0.0;
+    for (size_t i = 0; i < eval->size; i++) {
+      if (gsl_vector_get(eval, i) < 1e-10) {
+        gsl_vector_set(eval, i, 0);
+      }
+      cPar.trace_G += gsl_vector_get(eval, i);
+    }
+    cPar.trace_G /= (double)eval->size;
+    cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // calculate UtW and Uty
+    CalcUtX(U, W, UtW);
+    CalcUtX(U, Y, UtY);
+
+    // calculate variance component and beta estimates
+    // and then obtain predicted values
+    if (cPar.n_ph == 1) {
+      gsl_vector *beta = gsl_vector_alloc(W->size2);
+      gsl_vector *se_beta = gsl_vector_alloc(W->size2);
+
+      double lambda, logl, vg, ve;
+      gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+      // obtain estimates
+      CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+                 cPar.n_region, lambda, logl);
+      CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, lambda, vg, ve, beta,
+                      se_beta);
+
+      cout << "REMLE estimate for vg in the null model = " << vg << endl;
+      cout << "REMLE estimate for ve in the null model = " << ve << endl;
+      cPar.vg_remle_null = vg;
+      cPar.ve_remle_null = ve;
+
+      // obtain Y_hat from fixed effects
+      gsl_vector_view Yhat_col = gsl_matrix_column(Y_hat, 0);
+      gsl_blas_dgemv(CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
+
+      // obtain H
+      gsl_matrix_set_identity(H_full);
+      gsl_matrix_scale(H_full, ve);
+      gsl_matrix_scale(G_full, vg);
+      gsl_matrix_add(H_full, G_full);
+
+      // free matrices
+      gsl_vector_free(beta);
+      gsl_vector_free(se_beta);
+    } else {
+      gsl_matrix *Vg = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
+      gsl_matrix *Ve = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
+      gsl_matrix *B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+      gsl_matrix *se_B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+
+      // obtain estimates
+      CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter,
+                        cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max,
+                        cPar.n_region, Vg, Ve, B, se_B);
+
+      cout << "REMLE estimate for Vg in the null model: " << endl;
+      for (size_t i = 0; i < Vg->size1; i++) {
+        for (size_t j = 0; j <= i; j++) {
+          cout << gsl_matrix_get(Vg, i, j) << "\t";
+        }
+        cout << endl;
+      }
+      cout << "REMLE estimate for Ve in the null model: " << endl;
+      for (size_t i = 0; i < Ve->size1; i++) {
+        for (size_t j = 0; j <= i; j++) {
+          cout << gsl_matrix_get(Ve, i, j) << "\t";
+        }
+        cout << endl;
+      }
+      cPar.Vg_remle_null.clear();
+      cPar.Ve_remle_null.clear();
+      for (size_t i = 0; i < Vg->size1; i++) {
+        for (size_t j = i; j < Vg->size2; j++) {
+          cPar.Vg_remle_null.push_back(gsl_matrix_get(Vg, i, j));
+          cPar.Ve_remle_null.push_back(gsl_matrix_get(Ve, i, j));
+        }
+      }
+
+      // obtain Y_hat from fixed effects
+      gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
+
+      // obtain H
+      KroneckerSym(G_full, Vg, H_full);
+      for (size_t i = 0; i < G_full->size1; i++) {
+        gsl_matrix_view H_sub = gsl_matrix_submatrix(
+            H_full, i * Ve->size1, i * Ve->size2, Ve->size1, Ve->size2);
+        gsl_matrix_add(&H_sub.matrix, Ve);
+      }
+
+      // free matrices
+      gsl_matrix_free(Vg);
+      gsl_matrix_free(Ve);
+      gsl_matrix_free(B);
+      gsl_matrix_free(se_B);
+    }
+
+    PRDT cPRDT;
+
+    cPRDT.CopyFromParam(cPar);
+
+    cout << "Predicting Missing Phentypes ... " << endl;
+    time_start = clock();
+    cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
+    cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    cPRDT.WriteFiles(Y_full);
+
+    gsl_matrix_free(Y);
+    gsl_matrix_free(W);
+    gsl_matrix_free(G);
+    gsl_matrix_free(U);
+    gsl_matrix_free(UtW);
+    gsl_matrix_free(UtY);
+    gsl_vector_free(eval);
+
+    gsl_matrix_free(Y_full);
+    gsl_matrix_free(Y_hat);
+    gsl_matrix_free(W_full);
+    gsl_matrix_free(G_full);
+    gsl_matrix_free(H_full);
+  }
+
+  // Generate Kinship matrix
+  if (cPar.a_mode == 21 || cPar.a_mode == 22) {
+    cout << "Calculating Relatedness Matrix ... " << endl;
+
+    gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+
+    time_start = clock();
+    cPar.CalcKin(G);
+    cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+    if (cPar.error == true) {
+      cout << "error! fail to calculate relatedness matrix. " << endl;
+      return;
+    }
+
+    if (cPar.a_mode == 21) {
+      cPar.WriteMatrix(G, "cXX");
+    } else {
+      cPar.WriteMatrix(G, "sXX");
+    }
+
+    gsl_matrix_free(G);
+  }
+
+  // Compute the LDSC weights (not implemented yet)
+  if (cPar.a_mode == 72) {
+    cout << "Calculating Weights ... " << endl;
+
+    VARCOV cVarcov;
+    cVarcov.CopyFromParam(cPar);
+
+    if (!cPar.file_bfile.empty()) {
+      cVarcov.AnalyzePlink();
+    } else {
+      cVarcov.AnalyzeBimbam();
+    }
+
+    cVarcov.CopyToParam(cPar);
+  }
+
+  // Compute the S matrix (and its variance), that is used for
+  // variance component estimation using summary statistics.
+  if (cPar.a_mode == 25 || cPar.a_mode == 26) {
+    cout << "Calculating the S Matrix ... " << endl;
+
+    gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
+    gsl_vector *ns = gsl_vector_alloc(cPar.n_vc + 1);
+    gsl_matrix_set_zero(S);
+    gsl_vector_set_zero(ns);
+
+    gsl_matrix_view S_mat = gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+    gsl_matrix_view Svar_mat =
+        gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+    gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc);
+
+    gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+    gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+    gsl_matrix_set_zero(K);
+    gsl_matrix_set_zero(A);
+
+    gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+    gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+
+    cPar.CopyCvtPhen(W, y, 0);
+
+    set<string> setSnps_beta;
+    map<string, double> mapRS2wA, mapRS2wK;
+
+    cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+    time_start = clock();
+    cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+               &ns_vec.vector);
+    cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+    if (cPar.error == true) {
+      cout << "error! fail to calculate the S matrix. " << endl;
+      return;
+    }
+
+    gsl_vector_set(ns, cPar.n_vc, cPar.ni_test);
+
+    cPar.WriteMatrix(S, "S");
+    cPar.WriteVector(ns, "size");
+    cPar.WriteVar("snps");
+
+    gsl_matrix_free(S);
+    gsl_vector_free(ns);
+
+    gsl_matrix_free(A);
+    gsl_matrix_free(K);
+
+    gsl_vector_free(y);
+    gsl_matrix_free(K);
+  }
+
+  // Compute the q vector, that is used for variance component estimation using
+  // summary statistics
+  if (cPar.a_mode == 27 || cPar.a_mode == 28) {
+    gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+    gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+    gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+    gsl_vector_set_zero(q);
+    gsl_vector_set_zero(s);
+
+    gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
+
+    vector<size_t> vec_cat, vec_ni;
+    vector<double> vec_weight, vec_z2;
+    map<string, double> mapRS2weight;
+    mapRS2weight.clear();
+
+    time_start = clock();
+    ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni,
+                  vec_weight, vec_z2, cPar.ni_total, cPar.ns_total,
+                  cPar.ns_test);
+    cout << "## number of total individuals = " << cPar.ni_total << endl;
+    cout << "## number of total SNPs = " << cPar.ns_total << endl;
+    cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+    cout << "## number of variance components = " << cPar.n_vc << endl;
+    cout << "Calculating the q vector ... " << endl;
+    Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+          &s_vec.vector);
+    cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    if (cPar.error == true) {
+      cout << "error! fail to calculate the q vector. " << endl;
+      return;
+    }
+
+    gsl_vector_set(s, cPar.n_vc, cPar.ni_total);
+
+    cPar.WriteMatrix(Vq, "Vq");
+    cPar.WriteVector(q, "q");
+    cPar.WriteVector(s, "size");
+    /*
+    for (size_t i=0; i<cPar.n_vc; i++) {
+      cout<<gsl_vector_get(q, i)<<endl;
+    }
+    */
+    gsl_matrix_free(Vq);
+    gsl_vector_free(q);
+    gsl_vector_free(s);
+  }
+
+  // Calculate SNP covariance.
+  if (cPar.a_mode == 71) {
+    VARCOV cVarcov;
+    cVarcov.CopyFromParam(cPar);
+
+    if (!cPar.file_bfile.empty()) {
+      cVarcov.AnalyzePlink();
+    } else {
+      cVarcov.AnalyzeBimbam();
+    }
+
+    cVarcov.CopyToParam(cPar);
+  }
+
+  // LM.
+  if (cPar.a_mode == 51 || cPar.a_mode == 52 || cPar.a_mode == 53 ||
+      cPar.a_mode == 54) { // Fit LM
+    gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+    gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+
+    // set covariates matrix W and phenotype matrix Y
+    // an intercept should be included in W,
+    cPar.CopyCvtPhen(W, Y, 0);
+
+    // Fit LM or mvLM
+    if (cPar.n_ph == 1) {
+      LM cLm;
+      cLm.CopyFromParam(cPar);
+
+      gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+
+      if (!cPar.file_gene.empty()) {
+        cLm.AnalyzeGene(W,
+                        &Y_col.vector); // y is the predictor, not the phenotype
+      } else if (!cPar.file_bfile.empty()) {
+        cLm.AnalyzePlink(W, &Y_col.vector);
+      } else if (!cPar.file_oxford.empty()) {
+        cLm.Analyzebgen(W, &Y_col.vector);
+      } else {
+        cLm.AnalyzeBimbam(W, &Y_col.vector);
+      }
+
+      cLm.WriteFiles();
+      cLm.CopyToParam(cPar);
+    }
+    /*
+    else {
+            MVLM cMvlm;
+            cMvlm.CopyFromParam(cPar);
+
+            if (!cPar.file_bfile.empty()) {
+                    cMvlm.AnalyzePlink (W, Y);
+            } else {
+                    cMvlm.AnalyzeBimbam (W, Y);
+            }
+
+            cMvlm.WriteFiles();
+            cMvlm.CopyToParam(cPar);
+    }
+    */
+    // release all matrices and vectors
+    gsl_matrix_free(Y);
+    gsl_matrix_free(W);
+  }
+
+  // VC estimation with one or multiple kinship matrices
+  // REML approach only
+  // if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already,
+  // in param.cpp
+  // for one phenotype only;
+  if (cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) {
+    if (!cPar.file_beta.empty()) {
+      // need to obtain a common set of SNPs between beta file and the genotype
+      // file; these are saved in mapRS2wA and mapRS2wK
+      // normalize the weight in mapRS2wK to have an average of one; each
+      // element of mapRS2wA is 1
+      // update indicator_snps, so that the numbers are in accordance with
+      // mapRS2wK
+      set<string> setSnps_beta;
+      ReadFile_snps_header(cPar.file_beta, setSnps_beta);
+
+      map<string, double> mapRS2wA, mapRS2wK;
+      cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+      cPar.UpdateSNP(mapRS2wK);
+
+      // Setup matrices and vectors.
+      gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
+      gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+      gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+      gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+
+      gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+      gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+
+      gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+      gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+
+      gsl_matrix_set_zero(K);
+      gsl_matrix_set_zero(A);
+
+      gsl_matrix_set_zero(S);
+      gsl_matrix_set_zero(Vq);
+      gsl_vector_set_zero(q);
+      gsl_vector_set_zero(s);
+
+      cPar.CopyCvtPhen(W, y, 0);
+
+      gsl_matrix_view S_mat =
+          gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+      gsl_matrix_view Svar_mat =
+          gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+      gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
+
+      vector<size_t> vec_cat, vec_ni;
+      vector<double> vec_weight, vec_z2;
+
+      // read beta, based on the mapRS2wK
+      ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni,
+                    vec_weight, vec_z2, cPar.ni_study, cPar.ns_study,
+                    cPar.ns_test);
+
+      cout << "Study Panel: " << endl;
+      cout << "## number of total individuals = " << cPar.ni_study << endl;
+      cout << "## number of total SNPs = " << cPar.ns_study << endl;
+      cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+      cout << "## number of variance components = " << cPar.n_vc << endl;
+
+      // compute q
+      Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+            &s_vec.vector);
+
+      // compute S
+      time_start = clock();
+      cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+                 &s_vec.vector);
+      cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+      if (cPar.error == true) {
+        cout << "error! fail to calculate the S matrix. " << endl;
+        return;
+      }
+
+      // compute vc estimates
+      CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
+               cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
+               cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
+               cPar.v_enrich, cPar.v_se_enrich);
+
+      // if LDSC weights, then compute the weights and run the above steps again
+      if (cPar.a_mode == 62) {
+        // compute the weights and normalize the weights for A
+        cPar.UpdateWeight(1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
+
+        // read beta file again, and update weigths vector
+        ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni,
+                      vec_weight, vec_z2, cPar.ni_study, cPar.ns_total,
+                      cPar.ns_test);
+
+        // compute q
+        Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+              &s_vec.vector);
+
+        // compute S
+        time_start = clock();
+        cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+                   &s_vec.vector);
+        cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+        if (cPar.error == true) {
+          cout << "error! fail to calculate the S matrix. " << endl;
+          return;
+        }
+
+        // compute vc estimates
+        CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
+                 cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
+                 cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
+                 cPar.v_enrich, cPar.v_se_enrich);
+      }
+
+      gsl_vector_set(s, cPar.n_vc, cPar.ni_test);
+
+      cPar.WriteMatrix(S, "S");
+      cPar.WriteMatrix(Vq, "Vq");
+      cPar.WriteVector(q, "q");
+      cPar.WriteVector(s, "size");
+
+      gsl_matrix_free(S);
+      gsl_matrix_free(Vq);
+      gsl_vector_free(q);
+      gsl_vector_free(s);
+
+      gsl_matrix_free(A);
+      gsl_matrix_free(K);
+      gsl_vector_free(y);
+      gsl_matrix_free(W);
+    } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
+      if (!cPar.file_study.empty()) {
+        string sfile = cPar.file_study + ".size.txt";
+        CountFileLines(sfile, cPar.n_vc);
+      } else {
+        string file_name;
+        igzstream infile(cPar.file_mstudy.c_str(), igzstream::in);
+        if (!infile) {
+          cout << "error! fail to open mstudy file: " << cPar.file_study
+               << endl;
+          return;
+        }
+
+        safeGetline(infile, file_name);
+
+        infile.clear();
+        infile.close();
+
+        string sfile = file_name + ".size.txt";
+        CountFileLines(sfile, cPar.n_vc);
+      }
+
+      cPar.n_vc = cPar.n_vc - 1;
+
+      gsl_matrix *S = gsl_matrix_alloc(2 * cPar.n_vc, cPar.n_vc);
+      gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+      // gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1,
+      // (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
+      // gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1,
+      // (n_lines*(n_lines+1))/2*(n_lines+1) );
+      gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+      gsl_vector *s_study = gsl_vector_alloc(cPar.n_vc);
+      gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+      gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+
+      gsl_matrix_set_zero(S);
+      gsl_matrix_view S_mat =
+          gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+      gsl_matrix_view Svar_mat =
+          gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+
+      gsl_matrix_set_zero(Vq);
+      // gsl_matrix_set_zero(V);
+      // gsl_matrix_set_zero(Vslope);
+      gsl_vector_set_zero(q);
+      gsl_vector_set_zero(s_study);
+      gsl_vector_set_zero(s_ref);
+
+      if (!cPar.file_study.empty()) {
+        ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
+      } else {
+        ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
+      }
+
+      if (!cPar.file_ref.empty()) {
+        ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
+                     cPar.ni_ref);
+      } else {
+        ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
+                      cPar.ni_ref);
+      }
+
+      cout << "## number of variance components = " << cPar.n_vc << endl;
+      cout << "## number of individuals in the sample = " << cPar.ni_study
+           << endl;
+      cout << "## number of individuals in the reference = " << cPar.ni_ref
+           << endl;
+
+      CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study,
+               cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total,
+               cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich,
+               cPar.v_se_enrich);
+
+      gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
+      gsl_vector_memcpy(&s_sub.vector, s_ref);
+      gsl_vector_set(s, cPar.n_vc, cPar.ni_ref);
+
+      cPar.WriteMatrix(S, "S");
+      cPar.WriteMatrix(Vq, "Vq");
+      cPar.WriteVector(q, "q");
+      cPar.WriteVector(s, "size");
+
+      gsl_matrix_free(S);
+      gsl_matrix_free(Vq);
+      // gsl_matrix_free (V);
+      // gsl_matrix_free (Vslope);
+      gsl_vector_free(q);
+      gsl_vector_free(s_study);
+      gsl_vector_free(s_ref);
+      gsl_vector_free(s);
+    } else {
+      gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+      gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+      gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1 * cPar.n_vc);
+
+      // set covariates matrix W and phenotype matrix Y
+      // an intercept should be included in W,
+      cPar.CopyCvtPhen(W, Y, 0);
+
+      // read kinship matrices
+      if (!(cPar.file_mk).empty()) {
+        ReadFile_mk(cPar.file_mk, cPar.indicator_idv, cPar.mapID2num,
+                    cPar.k_mode, cPar.error, G);
+        if (cPar.error == true) {
+          cout << "error! fail to read kinship/relatedness file. " << endl;
+          return;
+        }
+
+        // center matrix G, and obtain v_traceG
+        double d = 0;
+        (cPar.v_traceG).clear();
+        for (size_t i = 0; i < cPar.n_vc; i++) {
+          gsl_matrix_view G_sub =
+              gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1);
+          CenterMatrix(&G_sub.matrix);
+          d = 0;
+          for (size_t j = 0; j < G->size1; j++) {
+            d += gsl_matrix_get(&G_sub.matrix, j, j);
+          }
+          d /= (double)G->size1;
+          (cPar.v_traceG).push_back(d);
+        }
+      } else if (!(cPar.file_kin).empty()) {
+        ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+                     cPar.k_mode, cPar.error, G);
+        if (cPar.error == true) {
+          cout << "error! fail to read kinship/relatedness file. " << endl;
+          return;
+        }
+
+        // center matrix G
+        CenterMatrix(G);
+
+        (cPar.v_traceG).clear();
+        double d = 0;
+        for (size_t j = 0; j < G->size1; j++) {
+          d += gsl_matrix_get(G, j, j);
+        }
+        d /= (double)G->size1;
+        (cPar.v_traceG).push_back(d);
+      }
+      /*
+      //eigen-decomposition and calculate trace_G
+      cout<<"Start Eigen-Decomposition..."<<endl;
+      time_start=clock();
+
+      if (cPar.a_mode==31) {
+              cPar.trace_G=EigenDecomp (G, U, eval, 1);
+      } else {
+              cPar.trace_G=EigenDecomp (G, U, eval, 0);
+      }
+
+      cPar.trace_G=0.0;
+      for (size_t i=0; i<eval->size; i++) {
+              if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
+              cPar.trace_G+=gsl_vector_get (eval, i);
+      }
+      cPar.trace_G/=(double)eval->size;
+
+      cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+} else {
+      ReadFile_eigenU (cPar.file_ku, cPar.error, U);
+      if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl;
+return;}
+
+      ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
+      if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl;
+return;}
+
+      cPar.trace_G=0.0;
+      for (size_t i=0; i<eval->size; i++) {
+              if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
+              cPar.trace_G+=gsl_vector_get(eval, i);
+      }
+      cPar.trace_G/=(double)eval->size;
 }
+*/
+      // fit multiple variance components
+      if (cPar.n_ph == 1) {
+        //		  if (cPar.n_vc==1) {
+        /*
+        //calculate UtW and Uty
+        CalcUtX (U, W, UtW);
+        CalcUtX (U, Y, UtY);
+
+        gsl_vector_view beta=gsl_matrix_row (B, 0);
+        gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
+        gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
+
+        CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+        cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+        CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null,
+        cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
+
+        cPar.beta_mle_null.clear();
+        cPar.se_beta_mle_null.clear();
+        for (size_t i=0; i<B->size2; i++) {
+          cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
+          cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
+        }
+
+        CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+        cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+        CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null,
+        cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
+        cPar.beta_remle_null.clear();
+        cPar.se_beta_remle_null.clear();
+        for (size_t i=0; i<B->size2; i++) {
+          cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
+          cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
+        }
+
+        CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
+        cPar.pve_null, cPar.pve_se_null);
+        cPar.PrintSummary();
+
+        //calculate and output residuals
+        if (cPar.a_mode==5) {
+          gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
+          gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
+          gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
+          gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
+          gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
+
+          //obtain Utu and Ute
+          gsl_vector_memcpy (y_hat, &UtY_col.vector);
+          gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
+
+          double d, u, e;
+          for (size_t i=0; i<eval->size; i++) {
+            d=gsl_vector_get (eval, i);
+            u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat,
+        i);
+            e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
+            gsl_vector_set (Utu_hat, i, u);
+            gsl_vector_set (Ute_hat, i, e);
+          }
+
+          //obtain u and e
+          gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
+          gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
+
+          //output residuals
+          cPar.WriteVector(u_hat, "residU");
+          cPar.WriteVector(e_hat, "residE");
+
+          gsl_vector_free(u_hat);
+          gsl_vector_free(e_hat);
+          gsl_vector_free(y_hat);
+        }
+*/
+        //		  } else {
+        gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+        VC cVc;
+        cVc.CopyFromParam(cPar);
+        if (cPar.a_mode == 61) {
+          cVc.CalcVChe(G, W, &Y_col.vector);
+        } else if (cPar.a_mode == 62) {
+          cVc.CalcVCreml(cPar.noconstrain, G, W, &Y_col.vector);
+        } else {
+          cVc.CalcVCacl(G, W, &Y_col.vector);
+        }
+        cVc.CopyToParam(cPar);
+        // obtain pve from sigma2
+        // obtain se_pve from se_sigma2
+
+        //}
+      }
+    }
+  }
+
+  // compute confidence intervals with additional summary statistics
+  // we do not check the sign of z-scores here, but they have to be matched with
+  // the genotypes
+  if (cPar.a_mode == 66 || cPar.a_mode == 67) {
+    // read reference file first
+    gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+    gsl_matrix *Svar = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+    gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+
+    gsl_matrix_set_zero(S);
+    gsl_matrix_set_zero(Svar);
+    gsl_vector_set_zero(s_ref);
+
+    if (!cPar.file_ref.empty()) {
+      ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
+    } else {
+      ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
+    }
+
+    // need to obtain a common set of SNPs between beta file and the genotype
+    // file; these are saved in mapRS2wA and mapRS2wK
+    // normalize the weight in mapRS2wK to have an average of one; each element
+    // of mapRS2wA is 1
+    set<string> setSnps_beta;
+    ReadFile_snps_header(cPar.file_beta, setSnps_beta);
+
+    // obtain the weights for wA, which contains the SNP weights for SNPs used
+    // in the model
+    map<string, double> mapRS2wK;
+    cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+    // set up matrices and vector
+    gsl_matrix *Xz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+    gsl_matrix *XWz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+    gsl_matrix *XtXWz =
+        gsl_matrix_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
+    gsl_vector *w = gsl_vector_alloc(mapRS2wK.size());
+    gsl_vector *w1 = gsl_vector_alloc(mapRS2wK.size());
+    gsl_vector *z = gsl_vector_alloc(mapRS2wK.size());
+    gsl_vector *s_vec = gsl_vector_alloc(cPar.n_vc);
+
+    vector<size_t> vec_cat, vec_size;
+    vector<double> vec_z;
+
+    map<string, double> mapRS2z, mapRS2wA;
+    map<string, string> mapRS2A1;
+    string file_str;
+
+    // update s_vec, the number of snps in each category
+    for (size_t i = 0; i < cPar.n_vc; i++) {
+      vec_size.push_back(0);
+    }
+
+    for (map<string, double>::const_iterator it = mapRS2wK.begin();
+         it != mapRS2wK.end(); ++it) {
+      vec_size[cPar.mapRS2cat[it->first]]++;
+    }
+
+    for (size_t i = 0; i < cPar.n_vc; i++) {
+      gsl_vector_set(s_vec, i, vec_size[i]);
+    }
+
+    // update mapRS2wA using v_pve and s_vec
+    if (cPar.a_mode == 66) {
+      for (map<string, double>::const_iterator it = mapRS2wK.begin();
+           it != mapRS2wK.end(); ++it) {
+        mapRS2wA[it->first] = 1;
+      }
+    } else {
+      cPar.UpdateWeight(0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
+    }
+
+    // read in z-scores based on allele 0, and save that into a vector
+    ReadFile_beta(cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
+
+    // update snp indicator, save weights to w, save z-scores to vec_z, save
+    // category label to vec_cat
+    // sign of z is determined by matching alleles
+    cPar.UpdateSNPnZ(mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
+
+    // compute an n by k matrix of X_iWz
+    cout << "Calculating Xz ... " << endl;
+
+    gsl_matrix_set_zero(Xz);
+    gsl_vector_set_all(w1, 1);
+
+    if (!cPar.file_bfile.empty()) {
+      file_str = cPar.file_bfile + ".bed";
+      PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+               vec_cat, w1, z, 0, Xz);
+    } else if (!cPar.file_geno.empty()) {
+      BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+                cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
+    } else if (!cPar.file_mbfile.empty()) {
+      MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+               cPar.mindicator_snp, vec_cat, w1, z, Xz);
+    } else if (!cPar.file_mgeno.empty()) {
+      MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+               cPar.mindicator_snp, vec_cat, w1, z, Xz);
+    }
+    /*
+    cout<<"Xz: "<<endl;
+    for (size_t i=0; i<5; i++) {
+      for (size_t j=0; j<cPar.n_vc; j++) {
+        cout<<gsl_matrix_get (Xz, i, j)<<" ";
+      }
+      cout<<endl;
+    }
+    */
+    if (cPar.a_mode == 66) {
+      gsl_matrix_memcpy(XWz, Xz);
+    } else if (cPar.a_mode == 67) {
+      cout << "Calculating XWz ... " << endl;
+
+      gsl_matrix_set_zero(XWz);
+
+      if (!cPar.file_bfile.empty()) {
+        file_str = cPar.file_bfile + ".bed";
+        PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+                 vec_cat, w, z, 0, XWz);
+      } else if (!cPar.file_geno.empty()) {
+        BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+                  cPar.indicator_snp, vec_cat, w, z, 0, XWz);
+      } else if (!cPar.file_mbfile.empty()) {
+        MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+                 cPar.mindicator_snp, vec_cat, w, z, XWz);
+      } else if (!cPar.file_mgeno.empty()) {
+        MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+                 cPar.mindicator_snp, vec_cat, w, z, XWz);
+      }
+    }
+    /*
+    cout<<"XWz: "<<endl;
+    for (size_t i=0; i<5; i++) {
+      cout<<gsl_vector_get (w, i)<<endl;
+      for (size_t j=0; j<cPar.n_vc; j++) {
+        cout<<gsl_matrix_get (XWz, i, j)<<" ";
+      }
+      cout<<endl;
+    }
+    */
+    // compute an p by k matrix of X_j^TWX_iWz
+    cout << "Calculating XtXWz ... " << endl;
+    gsl_matrix_set_zero(XtXWz);
+
+    if (!cPar.file_bfile.empty()) {
+      file_str = cPar.file_bfile + ".bed";
+      PlinkXtXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+                 XWz, 0, XtXWz);
+    } else if (!cPar.file_geno.empty()) {
+      BimbamXtXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+                  cPar.indicator_snp, XWz, 0, XtXWz);
+    } else if (!cPar.file_mbfile.empty()) {
+      MFILEXtXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+                 cPar.mindicator_snp, XWz, XtXWz);
+    } else if (!cPar.file_mgeno.empty()) {
+      MFILEXtXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+                 cPar.mindicator_snp, XWz, XtXWz);
+    }
+    /*
+    cout<<"XtXWz: "<<endl;
+    for (size_t i=0; i<5; i++) {
+      for (size_t j=0; j<cPar.n_vc; j++) {
+        cout<<gsl_matrix_get (XtXWz, i, j)<<" ";
+      }
+      cout<<endl;
+    }
+    */
+    // compute confidence intervals
+    CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve,
+             cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2,
+             cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+
+    // write files
+    // cPar.WriteMatrix (XWz, "XWz");
+    // cPar.WriteMatrix (XtXWz, "XtXWz");
+    // cPar.WriteVector (w, "w");
+
+    gsl_matrix_free(S);
+    gsl_matrix_free(Svar);
+    gsl_vector_free(s_ref);
+
+    gsl_matrix_free(Xz);
+    gsl_matrix_free(XWz);
+    gsl_matrix_free(XtXWz);
+    gsl_vector_free(w);
+    gsl_vector_free(w1);
+    gsl_vector_free(z);
+    gsl_vector_free(s_vec);
+  }
+
+  // LMM or mvLMM or Eigen-Decomposition
+  if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+      cPar.a_mode == 4 || cPar.a_mode == 5 ||
+      cPar.a_mode == 31) { // Fit LMM or mvLMM or eigen
+    gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+    gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+    gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c
+                                                          // matrix
+    gsl_matrix *se_B = gsl_matrix_alloc(Y->size2, W->size2);
+    gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
+    gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+    gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
+    gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
+    gsl_vector *eval = gsl_vector_alloc(Y->size1);
+    gsl_vector *env = gsl_vector_alloc(Y->size1);
+    gsl_vector *weight = gsl_vector_alloc(Y->size1);
+
+    // set covariates matrix W and phenotype matrix Y
+    // an intercept should be included in W,
+    cPar.CopyCvtPhen(W, Y, 0);
+    if (!cPar.file_gxe.empty()) {
+      cPar.CopyGxe(env);
+    }
+
+    // read relatedness matrix G
+    if (!(cPar.file_kin).empty()) {
+      ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+                   cPar.k_mode, cPar.error, G);
+      if (cPar.error == true) {
+        cout << "error! fail to read kinship/relatedness file. " << endl;
+        return;
+      }
+
+      // center matrix G
+      CenterMatrix(G);
+
+      // is residual weights are provided, then
+      if (!cPar.file_weight.empty()) {
+        cPar.CopyWeight(weight);
+        double d, wi, wj;
+        for (size_t i = 0; i < G->size1; i++) {
+          wi = gsl_vector_get(weight, i);
+          for (size_t j = i; j < G->size2; j++) {
+            wj = gsl_vector_get(weight, j);
+            d = gsl_matrix_get(G, i, j);
+            if (wi <= 0 || wj <= 0) {
+              d = 0;
+            } else {
+              d /= sqrt(wi * wj);
+            }
+            gsl_matrix_set(G, i, j, d);
+            if (j != i) {
+              gsl_matrix_set(G, j, i, d);
+            }
+          }
+        }
+      }
+
+      // eigen-decomposition and calculate trace_G
+      cout << "Start Eigen-Decomposition..." << endl;
+      time_start = clock();
+
+      if (cPar.a_mode == 31) {
+        cPar.trace_G = EigenDecomp(G, U, eval, 1);
+      } else {
+        cPar.trace_G = EigenDecomp(G, U, eval, 0);
+      }
+
+      if (!cPar.file_weight.empty()) {
+        double wi;
+        for (size_t i = 0; i < U->size1; i++) {
+          wi = gsl_vector_get(weight, i);
+          if (wi <= 0) {
+            wi = 0;
+          } else {
+            wi = sqrt(wi);
+          }
+          gsl_vector_view Urow = gsl_matrix_row(U, i);
+          gsl_vector_scale(&Urow.vector, wi);
+        }
+      }
+
+      cPar.trace_G = 0.0;
+      for (size_t i = 0; i < eval->size; i++) {
+        if (gsl_vector_get(eval, i) < 1e-10) {
+          gsl_vector_set(eval, i, 0);
+        }
+        cPar.trace_G += gsl_vector_get(eval, i);
+      }
+      cPar.trace_G /= (double)eval->size;
+
+      cPar.time_eigen =
+          (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+    } else {
+      ReadFile_eigenU(cPar.file_ku, cPar.error, U);
+      if (cPar.error == true) {
+        cout << "error! fail to read the U file. " << endl;
+        return;
+      }
+
+      ReadFile_eigenD(cPar.file_kd, cPar.error, eval);
+      if (cPar.error == true) {
+        cout << "error! fail to read the D file. " << endl;
+        return;
+      }
+
+      cPar.trace_G = 0.0;
+      for (size_t i = 0; i < eval->size; i++) {
+        if (gsl_vector_get(eval, i) < 1e-10) {
+          gsl_vector_set(eval, i, 0);
+        }
+        cPar.trace_G += gsl_vector_get(eval, i);
+      }
+      cPar.trace_G /= (double)eval->size;
+    }
+
+    if (cPar.a_mode == 31) {
+      cPar.WriteMatrix(U, "eigenU");
+      cPar.WriteVector(eval, "eigenD");
+    } else if (!cPar.file_gene.empty()) {
+      // calculate UtW and Uty
+      CalcUtX(U, W, UtW);
+      CalcUtX(U, Y, UtY);
+
+      LMM cLmm;
+      cLmm.CopyFromParam(cPar);
+
+      gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+      gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+      cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W,
+                       &Y_col.vector); // y is the predictor, not the phenotype
+
+      cLmm.WriteFiles();
+      cLmm.CopyToParam(cPar);
+    } else {
+      // calculate UtW and Uty
+      CalcUtX(U, W, UtW);
+      CalcUtX(U, Y, UtY);
+
+      // calculate REMLE/MLE estimate and pve for univariate model
+      if (cPar.n_ph == 1) {
+        gsl_vector_view beta = gsl_matrix_row(B, 0);
+        gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
+        gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+        CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+                   cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+        CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null,
+                        cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector,
+                        &se_beta.vector);
+
+        cPar.beta_mle_null.clear();
+        cPar.se_beta_mle_null.clear();
+        for (size_t i = 0; i < B->size2; i++) {
+          cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i));
+          cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i));
+        }
+
+        CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+                   cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+        CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_remle_null,
+                        cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector,
+                        &se_beta.vector);
+        cPar.beta_remle_null.clear();
+        cPar.se_beta_remle_null.clear();
+        for (size_t i = 0; i < B->size2; i++) {
+          cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i));
+          cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i));
+        }
+
+        CalcPve(eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
+                cPar.pve_null, cPar.pve_se_null);
+        cPar.PrintSummary();
+
+        // calculate and output residuals
+        if (cPar.a_mode == 5) {
+          gsl_vector *Utu_hat = gsl_vector_alloc(Y->size1);
+          gsl_vector *Ute_hat = gsl_vector_alloc(Y->size1);
+          gsl_vector *u_hat = gsl_vector_alloc(Y->size1);
+          gsl_vector *e_hat = gsl_vector_alloc(Y->size1);
+          gsl_vector *y_hat = gsl_vector_alloc(Y->size1);
+
+          // obtain Utu and Ute
+          gsl_vector_memcpy(y_hat, &UtY_col.vector);
+          gsl_blas_dgemv(CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
+
+          double d, u, e;
+          for (size_t i = 0; i < eval->size; i++) {
+            d = gsl_vector_get(eval, i);
+            u = cPar.l_remle_null * d / (cPar.l_remle_null * d + 1.0) *
+                gsl_vector_get(y_hat, i);
+            e = 1.0 / (cPar.l_remle_null * d + 1.0) * gsl_vector_get(y_hat, i);
+            gsl_vector_set(Utu_hat, i, u);
+            gsl_vector_set(Ute_hat, i, e);
+          }
+
+          // obtain u and e
+          gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
+          gsl_blas_dgemv(CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
+
+          // output residuals
+          cPar.WriteVector(u_hat, "residU");
+          cPar.WriteVector(e_hat, "residE");
+
+          gsl_vector_free(u_hat);
+          gsl_vector_free(e_hat);
+          gsl_vector_free(y_hat);
+        }
+      }
+
+      // Fit LMM or mvLMM
+      if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+          cPar.a_mode == 4) {
+        if (cPar.n_ph == 1) {
+          LMM cLmm;
+          cLmm.CopyFromParam(cPar);
+
+          gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+          gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+          if (!cPar.file_bfile.empty()) {
+            if (cPar.file_gxe.empty()) {
+              cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
+                                &Y_col.vector);
+            } else {
+              cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
+                                   &Y_col.vector, env);
+            }
+          }
+          // WJA added
+          else if (!cPar.file_oxford.empty()) {
+            cLmm.Analyzebgen(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
+          } else {
+            if (cPar.file_gxe.empty()) {
+              cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
+                                 &Y_col.vector);
+            } else {
+              cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
+                                    &Y_col.vector, env);
+            }
+          }
+
+          cLmm.WriteFiles();
+          cLmm.CopyToParam(cPar);
+        } else {
+          MVLMM cMvlmm;
+          cMvlmm.CopyFromParam(cPar);
+
+          if (!cPar.file_bfile.empty()) {
+            if (cPar.file_gxe.empty()) {
+              cMvlmm.AnalyzePlink(U, eval, UtW, UtY);
+            } else {
+              cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env);
+            }
+          } else if (!cPar.file_oxford.empty()) {
+            cMvlmm.Analyzebgen(U, eval, UtW, UtY);
+          } else {
+            if (cPar.file_gxe.empty()) {
+              cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY);
+            } else {
+              cMvlmm.AnalyzeBimbamGXE(U, eval, UtW, UtY, env);
+            }
+          }
+
+          cMvlmm.WriteFiles();
+          cMvlmm.CopyToParam(cPar);
+        }
+      }
+    }
+
+    // release all matrices and vectors
+    gsl_matrix_free(Y);
+    gsl_matrix_free(W);
+    gsl_matrix_free(B);
+    gsl_matrix_free(se_B);
+    gsl_matrix_free(G);
+    gsl_matrix_free(U);
+    gsl_matrix_free(UtW);
+    gsl_matrix_free(UtY);
+    gsl_vector_free(eval);
+    gsl_vector_free(env);
+  }
+
+  // BSLMM
+  if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
+    gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+    gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
+    gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
+    gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+
+    // set covariates matrix W and phenotype vector y
+    // an intercept should be included in W,
+    cPar.CopyCvtPhen(W, y, 0);
+
+    // center y, even for case/control data
+    cPar.pheno_mean = CenterVector(y);
+
+    // run bvsr if rho==1
+    if (cPar.rho_min == 1 && cPar.rho_max == 1) {
+      // read genotypes X (not UtX)
+      cPar.ReadGenotypes(UtX, G, false);
+
+      // perform BSLMM analysis
+      BSLMM cBslmm;
+      cBslmm.CopyFromParam(cPar);
+      time_start = clock();
+      cBslmm.MCMC(UtX, y);
+      cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+      cBslmm.CopyToParam(cPar);
+      // else, if rho!=1
+    } else {
+      gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
+      gsl_vector *eval = gsl_vector_alloc(y->size);
+      gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
+      gsl_vector *Uty = gsl_vector_alloc(y->size);
+
+      // read relatedness matrix G
+      if (!(cPar.file_kin).empty()) {
+        cPar.ReadGenotypes(UtX, G, false);
+
+        // read relatedness matrix G
+        ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+                     cPar.k_mode, cPar.error, G);
+        if (cPar.error == true) {
+          cout << "error! fail to read kinship/relatedness file. " << endl;
+          return;
+        }
+
+        // center matrix G
+        CenterMatrix(G);
+      } else {
+        cPar.ReadGenotypes(UtX, G, true);
+      }
+
+      // eigen-decomposition and calculate trace_G
+      cout << "Start Eigen-Decomposition..." << endl;
+      time_start = clock();
+      cPar.trace_G = EigenDecomp(G, U, eval, 0);
+      cPar.trace_G = 0.0;
+      for (size_t i = 0; i < eval->size; i++) {
+        if (gsl_vector_get(eval, i) < 1e-10) {
+          gsl_vector_set(eval, i, 0);
+        }
+        cPar.trace_G += gsl_vector_get(eval, i);
+      }
+      cPar.trace_G /= (double)eval->size;
+      cPar.time_eigen =
+          (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      // calculate UtW and Uty
+      CalcUtX(U, W, UtW);
+      CalcUtX(U, y, Uty);
+
+      // calculate REMLE/MLE estimate and pve
+      CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+                 cPar.l_mle_null, cPar.logl_mle_H0);
+      CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+                 cPar.l_remle_null, cPar.logl_remle_H0);
+      CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
+              cPar.pve_se_null);
+
+      cPar.PrintSummary();
+
+      // Creat and calcualte UtX, use a large memory
+      cout << "Calculating UtX..." << endl;
+      time_start = clock();
+      CalcUtX(U, UtX);
+      cPar.time_UtX = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      // perform BSLMM or BSLMMDAP analysis
+      if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
+        BSLMM cBslmm;
+        cBslmm.CopyFromParam(cPar);
+        time_start = clock();
+        if (cPar.a_mode == 12) { // ridge regression
+          cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
+        } else { // Run MCMC
+          cBslmm.MCMC(U, UtX, Uty, eval, y);
+        }
+        cPar.time_opt =
+            (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+        cBslmm.CopyToParam(cPar);
+      } else {
+      }
+
+      // release all matrices and vectors
+      gsl_matrix_free(G);
+      gsl_matrix_free(U);
+      gsl_matrix_free(UtW);
+      gsl_vector_free(eval);
+      gsl_vector_free(Uty);
+    }
+    gsl_matrix_free(W);
+    gsl_vector_free(y);
+    gsl_matrix_free(UtX);
+  }
+
+  // BSLMM-DAP
+  if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) {
+    if (cPar.a_mode == 14) {
+      gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+      gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
+      gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
+      gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+
+      // set covariates matrix W and phenotype vector y
+      // an intercept should be included in W,
+      cPar.CopyCvtPhen(W, y, 0);
+
+      // center y, even for case/control data
+      cPar.pheno_mean = CenterVector(y);
+
+      // run bvsr if rho==1
+      if (cPar.rho_min == 1 && cPar.rho_max == 1) {
+        // read genotypes X (not UtX)
+        cPar.ReadGenotypes(UtX, G, false);
+
+        // perform BSLMM analysis
+        BSLMM cBslmm;
+        cBslmm.CopyFromParam(cPar);
+        time_start = clock();
+        cBslmm.MCMC(UtX, y);
+        cPar.time_opt =
+            (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+        cBslmm.CopyToParam(cPar);
+        // else, if rho!=1
+      } else {
+        gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
+        gsl_vector *eval = gsl_vector_alloc(y->size);
+        gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
+        gsl_vector *Uty = gsl_vector_alloc(y->size);
+
+        // read relatedness matrix G
+        if (!(cPar.file_kin).empty()) {
+          cPar.ReadGenotypes(UtX, G, false);
+
+          // read relatedness matrix G
+          ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+                       cPar.k_mode, cPar.error, G);
+          if (cPar.error == true) {
+            cout << "error! fail to read kinship/relatedness file. " << endl;
+            return;
+          }
+
+          // center matrix G
+          CenterMatrix(G);
+        } else {
+          cPar.ReadGenotypes(UtX, G, true);
+        }
+
+        // eigen-decomposition and calculate trace_G
+        cout << "Start Eigen-Decomposition..." << endl;
+        time_start = clock();
+        cPar.trace_G = EigenDecomp(G, U, eval, 0);
+        cPar.trace_G = 0.0;
+        for (size_t i = 0; i < eval->size; i++) {
+          if (gsl_vector_get(eval, i) < 1e-10) {
+            gsl_vector_set(eval, i, 0);
+          }
+          cPar.trace_G += gsl_vector_get(eval, i);
+        }
+        cPar.trace_G /= (double)eval->size;
+        cPar.time_eigen =
+            (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // calculate UtW and Uty
+        CalcUtX(U, W, UtW);
+        CalcUtX(U, y, Uty);
+
+        // calculate REMLE/MLE estimate and pve
+        CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+                   cPar.l_mle_null, cPar.logl_mle_H0);
+        CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+                   cPar.l_remle_null, cPar.logl_remle_H0);
+        CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
+                cPar.pve_se_null);
+
+        cPar.PrintSummary();
+
+        // Creat and calcualte UtX, use a large memory
+        cout << "Calculating UtX..." << endl;
+        time_start = clock();
+        CalcUtX(U, UtX);
+        cPar.time_UtX =
+            (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // perform analysis; assume X and y are already centered
+        BSLMMDAP cBslmmDap;
+        cBslmmDap.CopyFromParam(cPar);
+        time_start = clock();
+        cBslmmDap.DAP_CalcBF(U, UtX, Uty, eval, y);
+        cPar.time_opt =
+            (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+        cBslmmDap.CopyToParam(cPar);
+
+        // release all matrices and vectors
+        gsl_matrix_free(G);
+        gsl_matrix_free(U);
+        gsl_matrix_free(UtW);
+        gsl_vector_free(eval);
+        gsl_vector_free(Uty);
+      }
+
+      gsl_matrix_free(W);
+      gsl_vector_free(y);
+      gsl_matrix_free(UtX);
+    } else if (cPar.a_mode == 15) {
+      // perform EM algorithm and estimate parameters
+      vector<string> vec_rs;
+      vector<double> vec_sa2, vec_sb2, wab;
+      vector<vector<vector<double>>> BF;
+
+      // read hyp and bf files (functions defined in BSLMMDAP)
+      ReadFile_hyb(cPar.file_hyp, vec_sa2, vec_sb2, wab);
+      ReadFile_bf(cPar.file_bf, vec_rs, BF);
+
+      cPar.ns_test = vec_rs.size();
+      if (wab.size() != BF[0][0].size()) {
+        cout << "error! hyp and bf files dimension do not match" << endl;
+      }
+
+      // load annotations
+      gsl_matrix *Ac;
+      gsl_matrix_int *Ad;
+      gsl_vector_int *dlevel;
+      size_t kc, kd;
+      if (!cPar.file_cat.empty()) {
+        ReadFile_cat(cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
+      } else {
+        kc = 0;
+        kd = 0;
+      }
+
+      cout << "## number of blocks = " << BF.size() << endl;
+      cout << "## number of analyzed SNPs = " << vec_rs.size() << endl;
+      cout << "## grid size for hyperparameters = " << wab.size() << endl;
+      cout << "## number of continuous annotations = " << kc << endl;
+      cout << "## number of discrete annotations = " << kd << endl;
+
+      // DAP_EstimateHyper (const size_t kc, const size_t kd, const
+      // vector<string> &vec_rs, const vector<double> &vec_sa2, const
+      // vector<double> &vec_sb2, const vector<double> &wab, const
+      // vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int
+      // *Ad, gsl_vector_int *dlevel);
+
+      // perform analysis
+      BSLMMDAP cBslmmDap;
+      cBslmmDap.CopyFromParam(cPar);
+      time_start = clock();
+      cBslmmDap.DAP_EstimateHyper(kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac,
+                                  Ad, dlevel);
+      cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+      cBslmmDap.CopyToParam(cPar);
+
+      gsl_matrix_free(Ac);
+      gsl_matrix_int_free(Ad);
+      gsl_vector_int_free(dlevel);
+    } else {
+      //
+    }
+  }
+
+  /*
+  //LDR (change 14 to 16?)
+  if (cPar.a_mode==14) {
+          gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
+          gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
+          gsl_matrix *G=gsl_matrix_alloc (1, 1);
+          vector<vector<unsigned char> > Xt;
+
+          //set covariates matrix W and phenotype vector y
+          //an intercept is included in W
+          cPar.CopyCvtPhen (W, y, 0);
 
+          //read in genotype matrix X
+          cPar.ReadGenotypes (Xt, G, false);
 
+          LDR cLdr;
+          cLdr.CopyFromParam(cPar);
+          time_start=clock();
 
+          cLdr.VB(Xt, W, y);
 
-void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
-{
-	string file_str;
-	file_str=cPar.path_out+"/"+cPar.file_out;
-	file_str+=".log.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {cout<<"error writing log file: "<<file_str.c_str()<<endl; return;}
-
-	outfile<<"##"<<endl;
-	outfile<<"## GEMMA Version = "<<version<<endl;
-
-	outfile<<"##"<<endl;
-	outfile<<"## Command Line Input = ";
-	for(int i = 0; i < argc; i++) {
-		outfile<<argv[i]<<" ";
-	}
-	outfile<<endl;
-
-	outfile<<"##"<<endl;
-	time_t  rawtime;
-	time(&rawtime);
-	tm *ptm = localtime (&rawtime);
-
-	outfile<<"## Date = "<<asctime(ptm);
-	  //ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl;
-
-	outfile<<"##"<<endl;
-	outfile<<"## Summary Statistics:"<<endl;
-	if (!cPar.file_cor.empty() || !cPar.file_study.empty() || !cPar.file_mstudy.empty() ) {
-	  outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl;
-	  outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl;
-	  //outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
-	  //outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_ref<<endl;
-	  //outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
-	  //outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl;
-	  outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
-
-	  outfile<<"## pve estimates = ";
-	    for (size_t i=0; i<cPar.v_pve.size(); i++) {
-	      outfile<<"  "<<cPar.v_pve[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## se(pve) = ";
-	    for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
-	      outfile<<"  "<<cPar.v_se_pve[i];
-	    }
-	    outfile<<endl;
-
-	    if (cPar.n_vc>1) {
-	      outfile<<"## total pve = "<<cPar.pve_total<<endl;
-	      outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
-	    }
-
-	    outfile<<"## sigma2 per snp = ";
-	    for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
-	      outfile<<"  "<<cPar.v_sigma2[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## se(sigma2 per snp) = ";
-	    for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
-	      outfile<<"  "<<cPar.v_se_sigma2[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## enrichment = ";
-	    for (size_t i=0; i<cPar.v_enrich.size(); i++) {
-	      outfile<<"  "<<cPar.v_enrich[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## se(enrichment) = ";
-	    for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
-	      outfile<<"  "<<cPar.v_se_enrich[i];
-	    }
-	    outfile<<endl;
-	} else if (!cPar.file_beta.empty() && (cPar.a_mode==61 || cPar.a_mode==62) ) {
-	  outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl;
-	  outfile<<"## number of total individuals in the reference = "<<cPar.ni_total<<endl;
-	  outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
-	  outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_total<<endl;
-	  outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
-	  outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
-	} else if (!cPar.file_beta.empty() && (cPar.a_mode==66 || cPar.a_mode==67) ) {
-	  outfile<<"## number of total individuals in the sample = "<<cPar.ni_total<<endl;
-	  outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl;
-	  outfile<<"## number of total SNPs in the sample = "<<cPar.ns_total<<endl;
-	  outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
-	  outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
-
-	  outfile<<"## pve estimates = ";
-	    for (size_t i=0; i<cPar.v_pve.size(); i++) {
-	      outfile<<"  "<<cPar.v_pve[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## se(pve) = ";
-	    for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
-	      outfile<<"  "<<cPar.v_se_pve[i];
-	    }
-	    outfile<<endl;
-
-	    if (cPar.n_vc>1) {
-	      outfile<<"## total pve = "<<cPar.pve_total<<endl;
-	      outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
-	    }
-
-	    outfile<<"## sigma2 per snp = ";
-	    for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
-	      outfile<<"  "<<cPar.v_sigma2[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## se(sigma2 per snp) = ";
-	    for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
-	      outfile<<"  "<<cPar.v_se_sigma2[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## enrichment = ";
-	    for (size_t i=0; i<cPar.v_enrich.size(); i++) {
-	      outfile<<"  "<<cPar.v_enrich[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## se(enrichment) = ";
-	    for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
-	      outfile<<"  "<<cPar.v_se_enrich[i];
-	    }
-	    outfile<<endl;
-	} else {
-	  outfile<<"## number of total individuals = "<<cPar.ni_total<<endl;
-
-	  if (cPar.a_mode==43) {
-	    outfile<<"## number of analyzed individuals = "<<cPar.ni_cvt<<endl;
-	    outfile<<"## number of individuals with full phenotypes = "<<cPar.ni_test<<endl;
-	  } else if (cPar.a_mode!=27 && cPar.a_mode!=28) {
-	    outfile<<"## number of analyzed individuals = "<<cPar.ni_test<<endl;
-	  }
-
-	  outfile<<"## number of covariates = "<<cPar.n_cvt<<endl;
-	  outfile<<"## number of phenotypes = "<<cPar.n_ph<<endl;
-	  if (cPar.a_mode==43) {
-	    outfile<<"## number of observed data = "<<cPar.np_obs<<endl;
-	    outfile<<"## number of missing data = "<<cPar.np_miss<<endl;
-	  }
-	  if (cPar.a_mode==25 || cPar.a_mode==26 || cPar.a_mode==27 || cPar.a_mode==28 || cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63 || cPar.a_mode==66 || cPar.a_mode==67) {
-	    outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
-	  }
-
-	  if (!(cPar.file_gene).empty()) {
-	    outfile<<"## number of total genes = "<<cPar.ng_total<<endl;
-	    outfile<<"## number of analyzed genes = "<<cPar.ng_test<<endl;
-	  } else if (cPar.file_epm.empty()) {
-	    outfile<<"## number of total SNPs = "<<cPar.ns_total<<endl;
-	    outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
-	  } else {
-	    outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
-	  }
-
-	  if (cPar.a_mode==13) {
-	    outfile<<"## number of cases = "<<cPar.ni_case<<endl;
-	    outfile<<"## number of controls = "<<cPar.ni_control<<endl;
-	  }
-	}
-
-	if ( (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) && cPar.file_cor.empty() && cPar.file_study.empty() && cPar.file_mstudy.empty() ) {
-	    //	        outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
-	  if (cPar.n_ph==1) {
-	    outfile<<"## pve estimates = ";
-	    for (size_t i=0; i<cPar.v_pve.size(); i++) {
-	      outfile<<"  "<<cPar.v_pve[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## se(pve) = ";
-	    for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
-	      outfile<<"  "<<cPar.v_se_pve[i];
-	    }
-	    outfile<<endl;
-
-	    if (cPar.n_vc>1) {
-	      outfile<<"## total pve = "<<cPar.pve_total<<endl;
-	      outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
-	    }
-
-	    outfile<<"## sigma2 estimates = ";
-	    for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
-	      outfile<<"  "<<cPar.v_sigma2[i];
-	    }
-	    outfile<<endl;
-
-	    outfile<<"## se(sigma2) = ";
-	    for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
-	      outfile<<"  "<<cPar.v_se_sigma2[i];
-	    }
-	    outfile<<endl;
-
-	    if (!cPar.file_beta.empty() ) {
-	      outfile<<"## enrichment = ";
-	      for (size_t i=0; i<cPar.v_enrich.size(); i++) {
-		outfile<<"  "<<cPar.v_enrich[i];
-	      }
-	      outfile<<endl;
-
-	      outfile<<"## se(enrichment) = ";
-	      for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
-		outfile<<"  "<<cPar.v_se_enrich[i];
-	      }
-	      outfile<<endl;
-	    }
-		  /*
-			outfile<<"## beta estimate in the null model = ";
-			for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
-				outfile<<"  "<<cPar.beta_remle_null[i];
-			}
-			outfile<<endl;
-			outfile<<"## se(beta) = ";
-			for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
-				outfile<<"  "<<cPar.se_beta_remle_null[i];
-			}
-			outfile<<endl;
-		  */
-	  }
-	}
-
-	if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
-		outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
-		outfile<<"## MLE log-likelihood in the null model = "<<cPar.logl_mle_H0<<endl;
-		if (cPar.n_ph==1) {
-			//outfile<<"## lambda REMLE estimate in the null (linear mixed) model = "<<cPar.l_remle_null<<endl;
-			//outfile<<"## lambda MLE estimate in the null (linear mixed) model = "<<cPar.l_mle_null<<endl;
-			outfile<<"## pve estimate in the null model = "<<cPar.pve_null<<endl;
-			outfile<<"## se(pve) in the null model = "<<cPar.pve_se_null<<endl;
-			outfile<<"## vg estimate in the null model = "<<cPar.vg_remle_null<<endl;
-			outfile<<"## ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
-			outfile<<"## beta estimate in the null model = ";
-			for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
-				outfile<<"  "<<cPar.beta_remle_null[i];
-			}
-			outfile<<endl;
-			outfile<<"## se(beta) = ";
-			for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
-				outfile<<"  "<<cPar.se_beta_remle_null[i];
-			}
-			outfile<<endl;
-
-		} else {
-			size_t c;
-			outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<=i; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<cPar.Vg_remle_null[c]<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## se(Vg): "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<=i; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<sqrt(cPar.VVg_remle_null[c])<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<=i; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<cPar.Ve_remle_null[c]<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## se(Ve): "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<=i; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<sqrt(cPar.VVe_remle_null[c])<<"\t";
-				}
-				outfile<<endl;
-			}
-
-			outfile<<"## MLE estimate for Vg in the null model: "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<cPar.n_ph; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<cPar.Vg_mle_null[c]<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## se(Vg): "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<=i; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<sqrt(cPar.VVg_mle_null[c])<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## MLE estimate for Ve in the null model: "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<cPar.n_ph; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<cPar.Ve_mle_null[c]<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## se(Ve): "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<=i; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<sqrt(cPar.VVe_mle_null[c])<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## estimate for B (d by c) in the null model (columns correspond to the covariates provided in the file): "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<cPar.n_cvt; j++) {
-					c=i*cPar.n_cvt+j;
-					outfile<<cPar.beta_remle_null[c]<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## se(B): "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<cPar.n_cvt; j++) {
-					c=i*cPar.n_cvt+j;
-					outfile<<cPar.se_beta_remle_null[c]<<"\t";
-				}
-				outfile<<endl;
-			}
-		}
-	}
-
-	/*
-	if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
-		if (cPar.n_ph==1) {
-			outfile<<"## REMLE vg estimate in the null model = "<<cPar.vg_remle_null<<endl;
-			outfile<<"## REMLE ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
-		} else {
-			size_t c;
-			outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<=i; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<cPar.Vg_remle_null[c]<<"\t";
-				}
-				outfile<<endl;
-			}
-			outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
-			for (size_t i=0; i<cPar.n_ph; i++) {
-				for (size_t j=0; j<=i; j++) {
-					c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
-					outfile<<cPar.Ve_remle_null[c]<<"\t";
-				}
-				outfile<<endl;
-			}
-		}
-	}
-	 */
-
-
-	if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
-	  outfile<<"## estimated mean = "<<cPar.pheno_mean<<endl;
-	}
-
-	if (cPar.a_mode==11 || cPar.a_mode==13) {
-		outfile<<"##"<<endl;
-		outfile<<"## MCMC related:"<<endl;
-		outfile<<"## initial value of h = "<<cPar.cHyp_initial.h<<endl;
-		outfile<<"## initial value of rho = "<<cPar.cHyp_initial.rho<<endl;
-		outfile<<"## initial value of pi = "<<exp(cPar.cHyp_initial.logp)<<endl;
-		outfile<<"## initial value of |gamma| = "<<cPar.cHyp_initial.n_gamma<<endl;
-		outfile<<"## random seed = "<<cPar.randseed<<endl;
-		outfile<<"## acceptance ratio = "<<(double)cPar.n_accept/(double)((cPar.w_step+cPar.s_step)*cPar.n_mh)<<endl;
-	}
-
-	outfile<<"##"<<endl;
-	outfile<<"## Computation Time:"<<endl;
-	outfile<<"## total computation time = "<<cPar.time_total<<" min "<<endl;
-	outfile<<"## computation time break down: "<<endl;
-	if (cPar.a_mode==21 || cPar.a_mode==22 || cPar.a_mode==11 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
-		outfile<<"##      time on calculating relatedness matrix = "<<cPar.time_G<<" min "<<endl;
-	}
-	if (cPar.a_mode==31) {
-		outfile<<"##      time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
-	}
-	if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
-		outfile<<"##      time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
-		outfile<<"##      time on calculating UtX = "<<cPar.time_UtX<<" min "<<endl;
-	}
-	if ((cPar.a_mode>=1 && cPar.a_mode<=4) || (cPar.a_mode>=51 && cPar.a_mode<=54) ) {
-		outfile<<"##      time on optimization = "<<cPar.time_opt<<" min "<<endl;
-	}
-	if (cPar.a_mode==11 || cPar.a_mode==13) {
-		outfile<<"##      time on proposal = "<<cPar.time_Proposal<<" min "<<endl;
-		outfile<<"##      time on mcmc = "<<cPar.time_opt<<" min "<<endl;
-		outfile<<"##      time on Omega = "<<cPar.time_Omega<<" min "<<endl;
-	}
-	if (cPar.a_mode==41 || cPar.a_mode==42) {
-		outfile<<"##      time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
-	}
-	if (cPar.a_mode==43) {
-		outfile<<"##      time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
-		outfile<<"##      time on predicting phenotypes = "<<cPar.time_opt<<" min "<<endl;
-	}
-	outfile<<"##"<<endl;
-
-	outfile.close();
-	outfile.clear();
-	return;
+          cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+          cLdr.CopyToParam(cPar);
+
+          gsl_vector_free (y);
+          gsl_matrix_free (W);
+          gsl_matrix_free (G);
+  }
+  */
+
+  cPar.time_total = (clock() - time_begin) / (double(CLOCKS_PER_SEC) * 60.0);
+
+  return;
 }
 
+void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
+  string file_str;
+  file_str = cPar.path_out + "/" + cPar.file_out;
+  file_str += ".log.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing log file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  outfile << "##" << endl;
+  outfile << "## GEMMA Version = " << version << endl;
 
+  outfile << "##" << endl;
+  outfile << "## Command Line Input = ";
+  for (int i = 0; i < argc; i++) {
+    outfile << argv[i] << " ";
+  }
+  outfile << endl;
+
+  outfile << "##" << endl;
+  time_t rawtime;
+  time(&rawtime);
+  tm *ptm = localtime(&rawtime);
+
+  outfile << "## Date = " << asctime(ptm);
+  // ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl;
+
+  outfile << "##" << endl;
+  outfile << "## Summary Statistics:" << endl;
+  if (!cPar.file_cor.empty() || !cPar.file_study.empty() ||
+      !cPar.file_mstudy.empty()) {
+    outfile << "## number of total individuals in the sample = "
+            << cPar.ni_study << endl;
+    outfile << "## number of total individuals in the reference = "
+            << cPar.ni_ref << endl;
+    // outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
+    // outfile<<"## number of total SNPs in the reference panel =
+    // "<<cPar.ns_ref<<endl;
+    // outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+    // outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl;
+    outfile << "## number of variance components = " << cPar.n_vc << endl;
+
+    outfile << "## pve estimates = ";
+    for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+      outfile << "  " << cPar.v_pve[i];
+    }
+    outfile << endl;
+
+    outfile << "## se(pve) = ";
+    for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+      outfile << "  " << cPar.v_se_pve[i];
+    }
+    outfile << endl;
+
+    if (cPar.n_vc > 1) {
+      outfile << "## total pve = " << cPar.pve_total << endl;
+      outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+    }
+
+    outfile << "## sigma2 per snp = ";
+    for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+      outfile << "  " << cPar.v_sigma2[i];
+    }
+    outfile << endl;
+
+    outfile << "## se(sigma2 per snp) = ";
+    for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+      outfile << "  " << cPar.v_se_sigma2[i];
+    }
+    outfile << endl;
+
+    outfile << "## enrichment = ";
+    for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+      outfile << "  " << cPar.v_enrich[i];
+    }
+    outfile << endl;
+
+    outfile << "## se(enrichment) = ";
+    for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+      outfile << "  " << cPar.v_se_enrich[i];
+    }
+    outfile << endl;
+  } else if (!cPar.file_beta.empty() &&
+             (cPar.a_mode == 61 || cPar.a_mode == 62)) {
+    outfile << "## number of total individuals in the sample = "
+            << cPar.ni_study << endl;
+    outfile << "## number of total individuals in the reference = "
+            << cPar.ni_total << endl;
+    outfile << "## number of total SNPs in the sample = " << cPar.ns_study
+            << endl;
+    outfile << "## number of total SNPs in the reference panel = "
+            << cPar.ns_total << endl;
+    outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+    outfile << "## number of variance components = " << cPar.n_vc << endl;
+  } else if (!cPar.file_beta.empty() &&
+             (cPar.a_mode == 66 || cPar.a_mode == 67)) {
+    outfile << "## number of total individuals in the sample = "
+            << cPar.ni_total << endl;
+    outfile << "## number of total individuals in the reference = "
+            << cPar.ni_ref << endl;
+    outfile << "## number of total SNPs in the sample = " << cPar.ns_total
+            << endl;
+    outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+    outfile << "## number of variance components = " << cPar.n_vc << endl;
+
+    outfile << "## pve estimates = ";
+    for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+      outfile << "  " << cPar.v_pve[i];
+    }
+    outfile << endl;
+
+    outfile << "## se(pve) = ";
+    for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+      outfile << "  " << cPar.v_se_pve[i];
+    }
+    outfile << endl;
+
+    if (cPar.n_vc > 1) {
+      outfile << "## total pve = " << cPar.pve_total << endl;
+      outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+    }
+
+    outfile << "## sigma2 per snp = ";
+    for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+      outfile << "  " << cPar.v_sigma2[i];
+    }
+    outfile << endl;
+
+    outfile << "## se(sigma2 per snp) = ";
+    for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+      outfile << "  " << cPar.v_se_sigma2[i];
+    }
+    outfile << endl;
+
+    outfile << "## enrichment = ";
+    for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+      outfile << "  " << cPar.v_enrich[i];
+    }
+    outfile << endl;
+
+    outfile << "## se(enrichment) = ";
+    for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+      outfile << "  " << cPar.v_se_enrich[i];
+    }
+    outfile << endl;
+  } else {
+    outfile << "## number of total individuals = " << cPar.ni_total << endl;
+
+    if (cPar.a_mode == 43) {
+      outfile << "## number of analyzed individuals = " << cPar.ni_cvt << endl;
+      outfile << "## number of individuals with full phenotypes = "
+              << cPar.ni_test << endl;
+    } else if (cPar.a_mode != 27 && cPar.a_mode != 28) {
+      outfile << "## number of analyzed individuals = " << cPar.ni_test << endl;
+    }
+
+    outfile << "## number of covariates = " << cPar.n_cvt << endl;
+    outfile << "## number of phenotypes = " << cPar.n_ph << endl;
+    if (cPar.a_mode == 43) {
+      outfile << "## number of observed data = " << cPar.np_obs << endl;
+      outfile << "## number of missing data = " << cPar.np_miss << endl;
+    }
+    if (cPar.a_mode == 25 || cPar.a_mode == 26 || cPar.a_mode == 27 ||
+        cPar.a_mode == 28 || cPar.a_mode == 61 || cPar.a_mode == 62 ||
+        cPar.a_mode == 63 || cPar.a_mode == 66 || cPar.a_mode == 67) {
+      outfile << "## number of variance components = " << cPar.n_vc << endl;
+    }
+
+    if (!(cPar.file_gene).empty()) {
+      outfile << "## number of total genes = " << cPar.ng_total << endl;
+      outfile << "## number of analyzed genes = " << cPar.ng_test << endl;
+    } else if (cPar.file_epm.empty()) {
+      outfile << "## number of total SNPs = " << cPar.ns_total << endl;
+      outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+    } else {
+      outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+    }
+
+    if (cPar.a_mode == 13) {
+      outfile << "## number of cases = " << cPar.ni_case << endl;
+      outfile << "## number of controls = " << cPar.ni_control << endl;
+    }
+  }
+
+  if ((cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) &&
+      cPar.file_cor.empty() && cPar.file_study.empty() &&
+      cPar.file_mstudy.empty()) {
+    //	        outfile<<"## REMLE log-likelihood in the null model =
+    //"<<cPar.logl_remle_H0<<endl;
+    if (cPar.n_ph == 1) {
+      outfile << "## pve estimates = ";
+      for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+        outfile << "  " << cPar.v_pve[i];
+      }
+      outfile << endl;
+
+      outfile << "## se(pve) = ";
+      for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+        outfile << "  " << cPar.v_se_pve[i];
+      }
+      outfile << endl;
+
+      if (cPar.n_vc > 1) {
+        outfile << "## total pve = " << cPar.pve_total << endl;
+        outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+      }
+
+      outfile << "## sigma2 estimates = ";
+      for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+        outfile << "  " << cPar.v_sigma2[i];
+      }
+      outfile << endl;
+
+      outfile << "## se(sigma2) = ";
+      for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+        outfile << "  " << cPar.v_se_sigma2[i];
+      }
+      outfile << endl;
+
+      if (!cPar.file_beta.empty()) {
+        outfile << "## enrichment = ";
+        for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+          outfile << "  " << cPar.v_enrich[i];
+        }
+        outfile << endl;
+
+        outfile << "## se(enrichment) = ";
+        for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+          outfile << "  " << cPar.v_se_enrich[i];
+        }
+        outfile << endl;
+      }
+      /*
+            outfile<<"## beta estimate in the null model = ";
+            for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
+                    outfile<<"  "<<cPar.beta_remle_null[i];
+            }
+            outfile<<endl;
+            outfile<<"## se(beta) = ";
+            for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
+                    outfile<<"  "<<cPar.se_beta_remle_null[i];
+            }
+            outfile<<endl;
+      */
+    }
+  }
+
+  if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+      cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 ||
+      cPar.a_mode == 12 || cPar.a_mode == 13) {
+    outfile << "## REMLE log-likelihood in the null model = "
+            << cPar.logl_remle_H0 << endl;
+    outfile << "## MLE log-likelihood in the null model = " << cPar.logl_mle_H0
+            << endl;
+    if (cPar.n_ph == 1) {
+      // outfile<<"## lambda REMLE estimate in the null (linear mixed) model =
+      // "<<cPar.l_remle_null<<endl;
+      // outfile<<"## lambda MLE estimate in the null (linear mixed) model =
+      // "<<cPar.l_mle_null<<endl;
+      outfile << "## pve estimate in the null model = " << cPar.pve_null
+              << endl;
+      outfile << "## se(pve) in the null model = " << cPar.pve_se_null << endl;
+      outfile << "## vg estimate in the null model = " << cPar.vg_remle_null
+              << endl;
+      outfile << "## ve estimate in the null model = " << cPar.ve_remle_null
+              << endl;
+      outfile << "## beta estimate in the null model = ";
+      for (size_t i = 0; i < cPar.beta_remle_null.size(); i++) {
+        outfile << "  " << cPar.beta_remle_null[i];
+      }
+      outfile << endl;
+      outfile << "## se(beta) = ";
+      for (size_t i = 0; i < cPar.se_beta_remle_null.size(); i++) {
+        outfile << "  " << cPar.se_beta_remle_null[i];
+      }
+      outfile << endl;
+
+    } else {
+      size_t c;
+      outfile << "## REMLE estimate for Vg in the null model: " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j <= i; j++) {
+          c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+              min(i, j);
+          outfile << cPar.Vg_remle_null[c] << "\t";
+        }
+        outfile << endl;
+      }
+      outfile << "## se(Vg): " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j <= i; j++) {
+          c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+              min(i, j);
+          outfile << sqrt(cPar.VVg_remle_null[c]) << "\t";
+        }
+        outfile << endl;
+      }
+      outfile << "## REMLE estimate for Ve in the null model: " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j <= i; j++) {
+          c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+              min(i, j);
+          outfile << cPar.Ve_remle_null[c] << "\t";
+        }
+        outfile << endl;
+      }
+      outfile << "## se(Ve): " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j <= i; j++) {
+          c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+              min(i, j);
+          outfile << sqrt(cPar.VVe_remle_null[c]) << "\t";
+        }
+        outfile << endl;
+      }
+
+      outfile << "## MLE estimate for Vg in the null model: " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j < cPar.n_ph; j++) {
+          c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+              min(i, j);
+          outfile << cPar.Vg_mle_null[c] << "\t";
+        }
+        outfile << endl;
+      }
+      outfile << "## se(Vg): " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j <= i; j++) {
+          c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+              min(i, j);
+          outfile << sqrt(cPar.VVg_mle_null[c]) << "\t";
+        }
+        outfile << endl;
+      }
+      outfile << "## MLE estimate for Ve in the null model: " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j < cPar.n_ph; j++) {
+          c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+              min(i, j);
+          outfile << cPar.Ve_mle_null[c] << "\t";
+        }
+        outfile << endl;
+      }
+      outfile << "## se(Ve): " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j <= i; j++) {
+          c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+              min(i, j);
+          outfile << sqrt(cPar.VVe_mle_null[c]) << "\t";
+        }
+        outfile << endl;
+      }
+      outfile << "## estimate for B (d by c) in the null model (columns "
+                 "correspond to the covariates provided in the file): "
+              << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j < cPar.n_cvt; j++) {
+          c = i * cPar.n_cvt + j;
+          outfile << cPar.beta_remle_null[c] << "\t";
+        }
+        outfile << endl;
+      }
+      outfile << "## se(B): " << endl;
+      for (size_t i = 0; i < cPar.n_ph; i++) {
+        for (size_t j = 0; j < cPar.n_cvt; j++) {
+          c = i * cPar.n_cvt + j;
+          outfile << cPar.se_beta_remle_null[c] << "\t";
+        }
+        outfile << endl;
+      }
+    }
+  }
+
+  /*
+  if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 ||
+  cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
+          if (cPar.n_ph==1) {
+                  outfile<<"## REMLE vg estimate in the null model =
+  "<<cPar.vg_remle_null<<endl;
+                  outfile<<"## REMLE ve estimate in the null model =
+  "<<cPar.ve_remle_null<<endl;
+          } else {
+                  size_t c;
+                  outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
+                  for (size_t i=0; i<cPar.n_ph; i++) {
+                          for (size_t j=0; j<=i; j++) {
+                                  c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
+                                  outfile<<cPar.Vg_remle_null[c]<<"\t";
+                          }
+                          outfile<<endl;
+                  }
+                  outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
+                  for (size_t i=0; i<cPar.n_ph; i++) {
+                          for (size_t j=0; j<=i; j++) {
+                                  c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
+                                  outfile<<cPar.Ve_remle_null[c]<<"\t";
+                          }
+                          outfile<<endl;
+                  }
+          }
+  }
+   */
+
+  if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13 ||
+      cPar.a_mode == 14 || cPar.a_mode == 16) {
+    outfile << "## estimated mean = " << cPar.pheno_mean << endl;
+  }
+
+  if (cPar.a_mode == 11 || cPar.a_mode == 13) {
+    outfile << "##" << endl;
+    outfile << "## MCMC related:" << endl;
+    outfile << "## initial value of h = " << cPar.cHyp_initial.h << endl;
+    outfile << "## initial value of rho = " << cPar.cHyp_initial.rho << endl;
+    outfile << "## initial value of pi = " << exp(cPar.cHyp_initial.logp)
+            << endl;
+    outfile << "## initial value of |gamma| = " << cPar.cHyp_initial.n_gamma
+            << endl;
+    outfile << "## random seed = " << cPar.randseed << endl;
+    outfile << "## acceptance ratio = "
+            << (double)cPar.n_accept /
+                   (double)((cPar.w_step + cPar.s_step) * cPar.n_mh)
+            << endl;
+  }
+
+  outfile << "##" << endl;
+  outfile << "## Computation Time:" << endl;
+  outfile << "## total computation time = " << cPar.time_total << " min "
+          << endl;
+  outfile << "## computation time break down: " << endl;
+  if (cPar.a_mode == 21 || cPar.a_mode == 22 || cPar.a_mode == 11 ||
+      cPar.a_mode == 13 || cPar.a_mode == 14 || cPar.a_mode == 16) {
+    outfile << "##      time on calculating relatedness matrix = "
+            << cPar.time_G << " min " << endl;
+  }
+  if (cPar.a_mode == 31) {
+    outfile << "##      time on eigen-decomposition = " << cPar.time_eigen
+            << " min " << endl;
+  }
+  if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+      cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 ||
+      cPar.a_mode == 12 || cPar.a_mode == 13 || cPar.a_mode == 14 ||
+      cPar.a_mode == 16) {
+    outfile << "##      time on eigen-decomposition = " << cPar.time_eigen
+            << " min " << endl;
+    outfile << "##      time on calculating UtX = " << cPar.time_UtX << " min "
+            << endl;
+  }
+  if ((cPar.a_mode >= 1 && cPar.a_mode <= 4) ||
+      (cPar.a_mode >= 51 && cPar.a_mode <= 54)) {
+    outfile << "##      time on optimization = " << cPar.time_opt << " min "
+            << endl;
+  }
+  if (cPar.a_mode == 11 || cPar.a_mode == 13) {
+    outfile << "##      time on proposal = " << cPar.time_Proposal << " min "
+            << endl;
+    outfile << "##      time on mcmc = " << cPar.time_opt << " min " << endl;
+    outfile << "##      time on Omega = " << cPar.time_Omega << " min " << endl;
+  }
+  if (cPar.a_mode == 41 || cPar.a_mode == 42) {
+    outfile << "##      time on eigen-decomposition = " << cPar.time_eigen
+            << " min " << endl;
+  }
+  if (cPar.a_mode == 43) {
+    outfile << "##      time on eigen-decomposition = " << cPar.time_eigen
+            << " min " << endl;
+    outfile << "##      time on predicting phenotypes = " << cPar.time_opt
+            << " min " << endl;
+  }
+  outfile << "##" << endl;
+
+  outfile.close();
+  outfile.clear();
+  return;
+}
diff --git a/src/gemma.h b/src/gemma.h
index 78828ef..cd1683a 100644
--- a/src/gemma.h
+++ b/src/gemma.h
@@ -26,22 +26,21 @@ using namespace std;
 class GEMMA {
 
 public:
-	// Parameters.
-	string version;
-	string date;
-	string year;
-
-	// Constructor.
-	GEMMA(void);
-
-	// Functions.
-	void PrintHeader (void);
-	void PrintHelp (size_t option);
-	void PrintLicense (void);
-	void Assign (int argc, char **argv, PARAM &cPar);
-	void BatchRun (PARAM &cPar);
-	void WriteLog (int argc, char **argv, PARAM &cPar);
+  // Parameters.
+  string version;
+  string date;
+  string year;
+
+  // Constructor.
+  GEMMA(void);
+
+  // Functions.
+  void PrintHeader(void);
+  void PrintHelp(size_t option);
+  void PrintLicense(void);
+  void Assign(int argc, char **argv, PARAM &cPar);
+  void BatchRun(PARAM &cPar);
+  void WriteLog(int argc, char **argv, PARAM &cPar);
 };
 
 #endif
-
diff --git a/src/gzstream.cpp b/src/gzstream.cpp
index 688b625..a7014d6 100644
--- a/src/gzstream.cpp
+++ b/src/gzstream.cpp
@@ -28,7 +28,7 @@
 
 #include "gzstream.h"
 #include <iostream>
-#include <string.h>  // for memcpy
+#include <string.h> // for memcpy
 
 #ifdef GZSTREAM_NAMESPACE
 namespace GZSTREAM_NAMESPACE {
@@ -42,119 +42,117 @@ namespace GZSTREAM_NAMESPACE {
 // class gzstreambuf:
 // --------------------------------------
 
-gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
-    if ( is_open())
-        return (gzstreambuf*)0;
-    mode = open_mode;
-    // no append nor read/write mode
-    if ((mode & std::ios::ate) || (mode & std::ios::app)
-        || ((mode & std::ios::in) && (mode & std::ios::out)))
-        return (gzstreambuf*)0;
-    char  fmode[10];
-    char* fmodeptr = fmode;
-    if ( mode & std::ios::in)
-        *fmodeptr++ = 'r';
-    else if ( mode & std::ios::out)
-        *fmodeptr++ = 'w';
-    *fmodeptr++ = 'b';
-    *fmodeptr = '\0';
-    file = gzopen( name, fmode);
-    if (file == 0)
-        return (gzstreambuf*)0;
-    opened = 1;
-    return this;
+gzstreambuf *gzstreambuf::open(const char *name, int open_mode) {
+  if (is_open())
+    return (gzstreambuf *)0;
+  mode = open_mode;
+  // no append nor read/write mode
+  if ((mode & std::ios::ate) || (mode & std::ios::app) ||
+      ((mode & std::ios::in) && (mode & std::ios::out)))
+    return (gzstreambuf *)0;
+  char fmode[10];
+  char *fmodeptr = fmode;
+  if (mode & std::ios::in)
+    *fmodeptr++ = 'r';
+  else if (mode & std::ios::out)
+    *fmodeptr++ = 'w';
+  *fmodeptr++ = 'b';
+  *fmodeptr = '\0';
+  file = gzopen(name, fmode);
+  if (file == 0)
+    return (gzstreambuf *)0;
+  opened = 1;
+  return this;
 }
 
-gzstreambuf * gzstreambuf::close() {
-    if ( is_open()) {
-        sync();
-        opened = 0;
-        if ( gzclose( file) == Z_OK)
-            return this;
-    }
-    return (gzstreambuf*)0;
+gzstreambuf *gzstreambuf::close() {
+  if (is_open()) {
+    sync();
+    opened = 0;
+    if (gzclose(file) == Z_OK)
+      return this;
+  }
+  return (gzstreambuf *)0;
 }
 
 int gzstreambuf::underflow() { // used for input buffer only
-    if ( gptr() && ( gptr() < egptr()))
-        return * reinterpret_cast<unsigned char *>( gptr());
-
-    if ( ! (mode & std::ios::in) || ! opened)
-        return EOF;
-    // Josuttis' implementation of inbuf
-    int n_putback = gptr() - eback();
-    if ( n_putback > 4)
-        n_putback = 4;
-    memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);
-
-    int num = gzread( file, buffer+4, bufferSize-4);
-    if (num <= 0) // ERROR or EOF
-        return EOF;
-
-    // reset buffer pointers
-    setg( buffer + (4 - n_putback),   // beginning of putback area
-          buffer + 4,                 // read position
-          buffer + 4 + num);          // end of buffer
-
-    // return next character
-    return * reinterpret_cast<unsigned char *>( gptr());
+  if (gptr() && (gptr() < egptr()))
+    return *reinterpret_cast<unsigned char *>(gptr());
+
+  if (!(mode & std::ios::in) || !opened)
+    return EOF;
+  // Josuttis' implementation of inbuf
+  int n_putback = gptr() - eback();
+  if (n_putback > 4)
+    n_putback = 4;
+  memcpy(buffer + (4 - n_putback), gptr() - n_putback, n_putback);
+
+  int num = gzread(file, buffer + 4, bufferSize - 4);
+  if (num <= 0) // ERROR or EOF
+    return EOF;
+
+  // reset buffer pointers
+  setg(buffer + (4 - n_putback), // beginning of putback area
+       buffer + 4,               // read position
+       buffer + 4 + num);        // end of buffer
+
+  // return next character
+  return *reinterpret_cast<unsigned char *>(gptr());
 }
 
 int gzstreambuf::flush_buffer() {
-    // Separate the writing of the buffer from overflow() and
-    // sync() operation.
-    int w = pptr() - pbase();
-    if ( gzwrite( file, pbase(), w) != w)
-        return EOF;
-    pbump( -w);
-    return w;
+  // Separate the writing of the buffer from overflow() and
+  // sync() operation.
+  int w = pptr() - pbase();
+  if (gzwrite(file, pbase(), w) != w)
+    return EOF;
+  pbump(-w);
+  return w;
 }
 
-int gzstreambuf::overflow( int c) { // used for output buffer only
-    if ( ! ( mode & std::ios::out) || ! opened)
-        return EOF;
-    if (c != EOF) {
-        *pptr() = c;
-        pbump(1);
-    }
-    if ( flush_buffer() == EOF)
-        return EOF;
-    return c;
+int gzstreambuf::overflow(int c) { // used for output buffer only
+  if (!(mode & std::ios::out) || !opened)
+    return EOF;
+  if (c != EOF) {
+    *pptr() = c;
+    pbump(1);
+  }
+  if (flush_buffer() == EOF)
+    return EOF;
+  return c;
 }
 
 int gzstreambuf::sync() {
-    // Changed to use flush_buffer() instead of overflow( EOF)
-    // which caused improper behavior with std::endl and flush(),
-    // bug reported by Vincent Ricard.
-    if ( pptr() && pptr() > pbase()) {
-        if ( flush_buffer() == EOF)
-            return -1;
-    }
-    return 0;
+  // Changed to use flush_buffer() instead of overflow( EOF)
+  // which caused improper behavior with std::endl and flush(),
+  // bug reported by Vincent Ricard.
+  if (pptr() && pptr() > pbase()) {
+    if (flush_buffer() == EOF)
+      return -1;
+  }
+  return 0;
 }
 
 // --------------------------------------
 // class gzstreambase:
 // --------------------------------------
 
-gzstreambase::gzstreambase( const char* name, int mode) {
-    init( &buf);
-    open( name, mode);
+gzstreambase::gzstreambase(const char *name, int mode) {
+  init(&buf);
+  open(name, mode);
 }
 
-gzstreambase::~gzstreambase() {
-    buf.close();
-}
+gzstreambase::~gzstreambase() { buf.close(); }
 
-void gzstreambase::open( const char* name, int open_mode) {
-    if ( ! buf.open( name, open_mode))
-        clear( rdstate() | std::ios::badbit);
+void gzstreambase::open(const char *name, int open_mode) {
+  if (!buf.open(name, open_mode))
+    clear(rdstate() | std::ios::badbit);
 }
 
 void gzstreambase::close() {
-    if ( buf.is_open())
-        if ( ! buf.close())
-            clear( rdstate() | std::ios::badbit);
+  if (buf.is_open())
+    if (!buf.close())
+      clear(rdstate() | std::ios::badbit);
 }
 
 #ifdef GZSTREAM_NAMESPACE
diff --git a/src/gzstream.h b/src/gzstream.h
index 241ff76..f760138 100644
--- a/src/gzstream.h
+++ b/src/gzstream.h
@@ -30,8 +30,8 @@
 #define GZSTREAM_H 1
 
 // Standard C++ with new header file names and std::namespace.
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <zlib.h>
 
 #ifdef GZSTREAM_NAMESPACE
@@ -44,43 +44,45 @@ namespace GZSTREAM_NAMESPACE {
 
 class gzstreambuf : public std::streambuf {
 private:
-    static const int bufferSize = 47+256;    // size of data buff
-    // totals 512 bytes under g++ for igzstream at the end.
+  static const int bufferSize = 47 + 256; // size of data buff
+  // totals 512 bytes under g++ for igzstream at the end.
+
+  gzFile file;             // file handle for compressed file
+  char buffer[bufferSize]; // data buffer
+  char opened;             // open/close state of stream
+  int mode;                // I/O mode
 
-    gzFile           file;               // file handle for compressed file
-    char             buffer[bufferSize]; // data buffer
-    char             opened;             // open/close state of stream
-    int              mode;               // I/O mode
+  int flush_buffer();
 
-    int flush_buffer();
 public:
-    gzstreambuf() : opened(0) {
-        setp( buffer, buffer + (bufferSize-1));
-        setg( buffer + 4,     // beginning of putback area
-              buffer + 4,     // read position
-              buffer + 4);    // end position
-        // ASSERT: both input & output capabilities will not be used together
-    }
-    int is_open() { return opened; }
-    gzstreambuf* open( const char* name, int open_mode);
-    gzstreambuf* close();
-    ~gzstreambuf() { close(); }
-
-    virtual int     overflow( int c = EOF);
-    virtual int     underflow();
-    virtual int     sync();
+  gzstreambuf() : opened(0) {
+    setp(buffer, buffer + (bufferSize - 1));
+    setg(buffer + 4,  // beginning of putback area
+         buffer + 4,  // read position
+         buffer + 4); // end position
+    // ASSERT: both input & output capabilities will not be used together
+  }
+  int is_open() { return opened; }
+  gzstreambuf *open(const char *name, int open_mode);
+  gzstreambuf *close();
+  ~gzstreambuf() { close(); }
+
+  virtual int overflow(int c = EOF);
+  virtual int underflow();
+  virtual int sync();
 };
 
 class gzstreambase : virtual public std::ios {
 protected:
-    gzstreambuf buf;
+  gzstreambuf buf;
+
 public:
-    gzstreambase() { init(&buf); }
-    gzstreambase( const char* name, int open_mode);
-    ~gzstreambase();
-    void open( const char* name, int open_mode);
-    void close();
-    gzstreambuf* rdbuf() { return &buf; }
+  gzstreambase() { init(&buf); }
+  gzstreambase(const char *name, int open_mode);
+  ~gzstreambase();
+  void open(const char *name, int open_mode);
+  void close();
+  gzstreambuf *rdbuf() { return &buf; }
 };
 
 // ----------------------------------------------------------------------------
@@ -91,24 +93,24 @@ public:
 
 class igzstream : public gzstreambase, public std::istream {
 public:
-    igzstream() : std::istream( &buf) {}
-    igzstream( const char* name, int open_mode = std::ios::in)
-        : gzstreambase( name, open_mode), std::istream( &buf) {}
-    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
-    void open( const char* name, int open_mode = std::ios::in) {
-        gzstreambase::open( name, open_mode);
-    }
+  igzstream() : std::istream(&buf) {}
+  igzstream(const char *name, int open_mode = std::ios::in)
+      : gzstreambase(name, open_mode), std::istream(&buf) {}
+  gzstreambuf *rdbuf() { return gzstreambase::rdbuf(); }
+  void open(const char *name, int open_mode = std::ios::in) {
+    gzstreambase::open(name, open_mode);
+  }
 };
 
 class ogzstream : public gzstreambase, public std::ostream {
 public:
-    ogzstream() : std::ostream( &buf) {}
-    ogzstream( const char* name, int mode = std::ios::out)
-        : gzstreambase( name, mode), std::ostream( &buf) {}
-    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
-    void open( const char* name, int open_mode = std::ios::out) {
-        gzstreambase::open( name, open_mode);
-    }
+  ogzstream() : std::ostream(&buf) {}
+  ogzstream(const char *name, int mode = std::ios::out)
+      : gzstreambase(name, mode), std::ostream(&buf) {}
+  gzstreambuf *rdbuf() { return gzstreambase::rdbuf(); }
+  void open(const char *name, int open_mode = std::ios::out) {
+    gzstreambase::open(name, open_mode);
+  }
 };
 
 #ifdef GZSTREAM_NAMESPACE
diff --git a/src/io.cpp b/src/io.cpp
index 3bf6a9e..44251aa 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -16,3007 +16,3191 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
+#include <assert.h>
+#include <bitset>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
 #include <fstream>
-#include <sstream>
-#include <string>
 #include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
 #include <map>
 #include <set>
-#include <cstring>
-#include <cmath>
-#include <cstdint>
+#include <sstream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <assert.h>
+#include <string>
+#include <vector>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 
-#include "lapack.h"
-#include "gzstream.h"
-#include "mathfunc.h"
 #include "eigenlib.h"
+#include "gzstream.h"
 #include "io.h"
+#include "lapack.h"
+#include "mathfunc.h"
 
 using namespace std;
 
 // Print progress bar.
-void ProgressBar (string str, double p, double total) {
-	double progress = (100.0 * p / total);
-	int barsize = (int) (progress / 2.0);
-	char bar[51];
-
-	cout<<str;
-	for (int i = 0; i <50; i++) {
-		if (i<barsize) {bar[i] = '=';}
-		else {bar[i]=' ';}
-		cout<<bar[i];
-	}
-	cout<<setprecision(2)<<fixed<<progress<<"%\r"<<flush;
-
-	return;
-}
+void ProgressBar(string str, double p, double total) {
+  double progress = (100.0 * p / total);
+  int barsize = (int)(progress / 2.0);
+  char bar[51];
+
+  cout << str;
+  for (int i = 0; i < 50; i++) {
+    if (i < barsize) {
+      bar[i] = '=';
+    } else {
+      bar[i] = ' ';
+    }
+    cout << bar[i];
+  }
+  cout << setprecision(2) << fixed << progress << "%\r" << flush;
 
-// Print progress bar with acceptance ratio.
-void ProgressBar (string str, double p, double total, double ratio) {
-	double progress = (100.0 * p / total);
-	int barsize = (int) (progress / 2.0);
-	char bar[51];
-
-	cout<<str;
-	for (int i = 0; i <50; i++) {
-		if (i<barsize) {bar[i] = '=';}
-		else {bar[i]=' ';}
-		cout<<bar[i];
-	}
-	cout<<setprecision(2)<<fixed<<progress<<"%    "<<ratio<<"\r"<<flush;
-	return;
+  return;
 }
 
-bool isBlankLine(char const* line) {
-    for ( char const* cp = line; *cp; ++cp ) {
-        if ( !isspace(*cp) )
-	  return false;
+// Print progress bar with acceptance ratio.
+void ProgressBar(string str, double p, double total, double ratio) {
+  double progress = (100.0 * p / total);
+  int barsize = (int)(progress / 2.0);
+  char bar[51];
+
+  cout << str;
+  for (int i = 0; i < 50; i++) {
+    if (i < barsize) {
+      bar[i] = '=';
+    } else {
+      bar[i] = ' ';
     }
-    return true;
+    cout << bar[i];
+  }
+  cout << setprecision(2) << fixed << progress << "%    " << ratio << "\r"
+       << flush;
+  return;
 }
 
-bool isBlankLine(std::string const& line) {
-   return isBlankLine(line.c_str());
+bool isBlankLine(char const *line) {
+  for (char const *cp = line; *cp; ++cp) {
+    if (!isspace(*cp))
+      return false;
+  }
+  return true;
 }
 
+bool isBlankLine(std::string const &line) { return isBlankLine(line.c_str()); }
+
 // In case files are ended with "\r" or "\r\n".
-std::istream& safeGetline(std::istream& is, std::string& t) {
-    t.clear();
-
-    // The characters in the stream are read one-by-one using a
-    // std::streambuf. That is faster than reading them one-by-one
-    // using the std::istream. Code that uses streambuf this way must
-    // be guarded by a sentry object. The sentry object performs
-    // various tasks, such as thread synchronization and updating the
-    // stream state.
-    std::istream::sentry se(is, true);
-    std::streambuf* sb = is.rdbuf();
-
-    for(;;) {
-        int c = sb->sbumpc();
-        switch (c) {
-        case '\n':
-            return is;
-        case '\r':
-            if(sb->sgetc() == '\n')
-                sb->sbumpc();
-            return is;
-        case EOF:
-
-            // Also handle the case when the last line has no line
-            // ending.
-            if(t.empty())
-                is.setstate(std::ios::eofbit);
-            return is;
-        default:
-            t += (char)c;
-        }
+std::istream &safeGetline(std::istream &is, std::string &t) {
+  t.clear();
+
+  // The characters in the stream are read one-by-one using a
+  // std::streambuf. That is faster than reading them one-by-one
+  // using the std::istream. Code that uses streambuf this way must
+  // be guarded by a sentry object. The sentry object performs
+  // various tasks, such as thread synchronization and updating the
+  // stream state.
+  std::istream::sentry se(is, true);
+  std::streambuf *sb = is.rdbuf();
+
+  for (;;) {
+    int c = sb->sbumpc();
+    switch (c) {
+    case '\n':
+      return is;
+    case '\r':
+      if (sb->sgetc() == '\n')
+        sb->sbumpc();
+      return is;
+    case EOF:
+
+      // Also handle the case when the last line has no line
+      // ending.
+      if (t.empty())
+        is.setstate(std::ios::eofbit);
+      return is;
+    default:
+      t += (char)c;
     }
+  }
 }
 
 // Read SNP file.
-bool ReadFile_snps (const string &file_snps, set<string> &setSnps) {
-	setSnps.clear();
+bool ReadFile_snps(const string &file_snps, set<string> &setSnps) {
+  setSnps.clear();
 
-	igzstream infile (file_snps.c_str(), igzstream::in);
-	if (!infile) {
-	  cout << "error! fail to open snps file: " << file_snps << endl;
-	  return false;
-	}
+  igzstream infile(file_snps.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open snps file: " << file_snps << endl;
+    return false;
+  }
 
-	string line;
-	char *ch_ptr;
+  string line;
+  char *ch_ptr;
 
-	while (getline(infile, line)) {
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		setSnps.insert(ch_ptr);
-	}
+  while (getline(infile, line)) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    setSnps.insert(ch_ptr);
+  }
 
-	infile.close();
-	infile.clear();
+  infile.close();
+  infile.clear();
 
-	return true;
+  return true;
 }
 
-bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps) {
-	setSnps.clear();
+bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
+  setSnps.clear();
 
-	igzstream infile (file_snps.c_str(), igzstream::in);
-	if (!infile) {
-	  cout << "error! fail to open snps file: " << file_snps << endl;
-	  return false;
-	}
+  igzstream infile(file_snps.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open snps file: " << file_snps << endl;
+    return false;
+  }
 
-	string line, rs, chr, pos;
-	char *ch_ptr;
+  string line, rs, chr, pos;
+  char *ch_ptr;
 
-	// Read header.
-	HEADER header;
-	!safeGetline(infile, line).eof();
-	ReadHeader_io (line, header);
+  // Read header.
+  HEADER header;
+  !safeGetline(infile, line).eof();
+  ReadHeader_io(line, header);
 
-	if (header.rs_col==0 && (header.chr_col==0 || header.pos_col==0) ) {
-	  cout<<"missing rs id in the hearder"<<endl;
-	}
+  if (header.rs_col == 0 && (header.chr_col == 0 || header.pos_col == 0)) {
+    cout << "missing rs id in the hearder" << endl;
+  }
 
-	while (!safeGetline(infile, line).eof()) {
-	  if (isBlankLine(line)) {continue;}
-	  ch_ptr=strtok ((char *)line.c_str(), " , \t");
+  while (!safeGetline(infile, line).eof()) {
+    if (isBlankLine(line)) {
+      continue;
+    }
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
 
-	  for (size_t i=0; i<header.coln; i++) {
-	    if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
-	    if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
-	    if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
+    for (size_t i = 0; i < header.coln; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      }
+      if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      }
+      if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+      }
 
-	    ch_ptr=strtok (NULL, " , \t");
-	  }
+      ch_ptr = strtok(NULL, " , \t");
+    }
 
-	  if (header.rs_col==0) {
-	    rs=chr+":"+pos;
-	  }
+    if (header.rs_col == 0) {
+      rs = chr + ":" + pos;
+    }
 
-	  setSnps.insert(rs);
-	}
+    setSnps.insert(rs);
+  }
 
-	infile.close();
-	infile.clear();
+  infile.close();
+  infile.clear();
 
-	return true;
+  return true;
 }
 
 // Read log file.
-bool ReadFile_log (const string &file_log, double &pheno_mean) {
-	ifstream infile (file_log.c_str(), ifstream::in);
-	if (!infile) {
-	  cout << "error! fail to open log file: " << file_log << endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-	size_t flag=0;
-
-	while (getline(infile, line)) {
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		if (ch_ptr!=NULL && strcmp(ch_ptr, "estimated")==0) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (ch_ptr!=NULL && strcmp(ch_ptr, "mean")==0) {
-				ch_ptr=strtok (NULL, " , \t");
-				if (ch_ptr!=NULL && strcmp(ch_ptr, "=")==0) {
-					ch_ptr=strtok (NULL, " , \t");
-					pheno_mean=atof(ch_ptr);
-					flag=1;
-				}
-			}
-		}
-
-		if (flag==1) {break;}
-	}
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool ReadFile_log(const string &file_log, double &pheno_mean) {
+  ifstream infile(file_log.c_str(), ifstream::in);
+  if (!infile) {
+    cout << "error! fail to open log file: " << file_log << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+  size_t flag = 0;
+
+  while (getline(infile, line)) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    if (ch_ptr != NULL && strcmp(ch_ptr, "estimated") == 0) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (ch_ptr != NULL && strcmp(ch_ptr, "mean") == 0) {
+        ch_ptr = strtok(NULL, " , \t");
+        if (ch_ptr != NULL && strcmp(ch_ptr, "=") == 0) {
+          ch_ptr = strtok(NULL, " , \t");
+          pheno_mean = atof(ch_ptr);
+          flag = 1;
+        }
+      }
+    }
+
+    if (flag == 1) {
+      break;
+    }
+  }
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read bimbam annotation file.
-bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr,
-		    map<string, long int> &mapRS2bp,
-		    map<string, double> &mapRS2cM) {
-	mapRS2chr.clear();
-	mapRS2bp.clear();
-
-	ifstream infile (file_anno.c_str(), ifstream::in);
-	if (!infile) {
-	  cout << "error opening annotation file: " << file_anno << endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	string rs;
-	long int b_pos;
-	string chr;
-	double cM;
-
-	while (!safeGetline(infile, line).eof()) {
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		rs=ch_ptr;
-		ch_ptr=strtok (NULL, " , \t");
-		if (strcmp(ch_ptr, "NA")==0) {
-		  b_pos=-9;
-		} else {
-		  b_pos=atol(ch_ptr);
-		}
-		ch_ptr=strtok (NULL, " , \t");
-		if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {
-		  chr="-9";
-		} else {
-		  chr=ch_ptr;
-		}
-		ch_ptr=strtok (NULL, " , \t");
-		if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {
-		  cM=-9;
-		} else {
-		  cM=atof(ch_ptr);
-		}
-
-		mapRS2chr[rs]=chr;
-		mapRS2bp[rs]=b_pos;
-		mapRS2cM[rs]=cM;
-	}
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr,
+                   map<string, long int> &mapRS2bp,
+                   map<string, double> &mapRS2cM) {
+  mapRS2chr.clear();
+  mapRS2bp.clear();
+
+  ifstream infile(file_anno.c_str(), ifstream::in);
+  if (!infile) {
+    cout << "error opening annotation file: " << file_anno << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  string rs;
+  long int b_pos;
+  string chr;
+  double cM;
+
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
+    ch_ptr = strtok(NULL, " , \t");
+    if (strcmp(ch_ptr, "NA") == 0) {
+      b_pos = -9;
+    } else {
+      b_pos = atol(ch_ptr);
+    }
+    ch_ptr = strtok(NULL, " , \t");
+    if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) {
+      chr = "-9";
+    } else {
+      chr = ch_ptr;
+    }
+    ch_ptr = strtok(NULL, " , \t");
+    if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) {
+      cM = -9;
+    } else {
+      cM = atof(ch_ptr);
+    }
+
+    mapRS2chr[rs] = chr;
+    mapRS2bp[rs] = b_pos;
+    mapRS2cM[rs] = cM;
+  }
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read 1 column of phenotype.
-bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv,
-		      vector<double> &pheno, const int &p_column) {
-	indicator_idv.clear();
-	pheno.clear();
-
-	igzstream infile (file_pheno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout << "error! fail to open phenotype file: " << file_pheno << endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	string id;
-	double p;
-	while (!safeGetline(infile, line).eof()) {
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		for (int i=0; i<(p_column-1); ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-		}
-		if (strcmp(ch_ptr, "NA")==0) {
-		  indicator_idv.push_back(0);
-		  pheno.push_back(-9);
-		}
-		else {
-
-		  // Pheno is different from pimass2.
-		  p=atof(ch_ptr);
-		  indicator_idv.push_back(1);
-		  pheno.push_back(p);
-		}
-	}
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
+                     vector<double> &pheno, const int &p_column) {
+  indicator_idv.clear();
+  pheno.clear();
+
+  igzstream infile(file_pheno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open phenotype file: " << file_pheno << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  string id;
+  double p;
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    for (int i = 0; i < (p_column - 1); ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+    }
+    if (strcmp(ch_ptr, "NA") == 0) {
+      indicator_idv.push_back(0);
+      pheno.push_back(-9);
+    } else {
+
+      // Pheno is different from pimass2.
+      p = atof(ch_ptr);
+      indicator_idv.push_back(1);
+      pheno.push_back(p);
+    }
+  }
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read bimbam phenotype file, p_column=1, 2,...
-bool ReadFile_pheno (const string &file_pheno,
-		     vector<vector<int> > &indicator_pheno,
-		     vector<vector<double> > &pheno,
-		     const vector<size_t> &p_column) {
-	indicator_pheno.clear();
-	pheno.clear();
-
-	igzstream infile (file_pheno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout << "error! fail to open phenotype file: " << file_pheno << endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	string id;
-	double p;
-
-	vector<double> pheno_row;
-	vector<int> ind_pheno_row;
-
-	size_t p_max=*max_element(p_column.begin(), p_column.end() );
-	map<size_t, size_t> mapP2c;
-	for (size_t i=0; i<p_column.size(); i++) {
-		mapP2c[p_column[i]]=i;
-		pheno_row.push_back(-9);
-		ind_pheno_row.push_back(0);
-	}
-
-	while (!safeGetline(infile, line).eof()) {
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
-		size_t i=0;
-		while (i<p_max ) {
-			if (mapP2c.count(i+1)!=0) {
-				if (strcmp(ch_ptr, "NA")==0) {
-				  ind_pheno_row[mapP2c[i+1]]=0;
-				  pheno_row[mapP2c[i+1]]=-9;
-				}
-				else {
-				  p=atof(ch_ptr);
-				  ind_pheno_row[mapP2c[i+1]]=1;
-				  pheno_row[mapP2c[i+1]]=p;
-				}
-			}
-			i++;
-			ch_ptr=strtok (NULL, " , \t");
-		}
-
-		indicator_pheno.push_back(ind_pheno_row);
-		pheno.push_back(pheno_row);
-	}
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool ReadFile_pheno(const string &file_pheno,
+                    vector<vector<int>> &indicator_pheno,
+                    vector<vector<double>> &pheno,
+                    const vector<size_t> &p_column) {
+  indicator_pheno.clear();
+  pheno.clear();
+
+  igzstream infile(file_pheno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open phenotype file: " << file_pheno << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  string id;
+  double p;
+
+  vector<double> pheno_row;
+  vector<int> ind_pheno_row;
+
+  size_t p_max = *max_element(p_column.begin(), p_column.end());
+  map<size_t, size_t> mapP2c;
+  for (size_t i = 0; i < p_column.size(); i++) {
+    mapP2c[p_column[i]] = i;
+    pheno_row.push_back(-9);
+    ind_pheno_row.push_back(0);
+  }
+
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+    size_t i = 0;
+    while (i < p_max) {
+      if (mapP2c.count(i + 1) != 0) {
+        if (strcmp(ch_ptr, "NA") == 0) {
+          ind_pheno_row[mapP2c[i + 1]] = 0;
+          pheno_row[mapP2c[i + 1]] = -9;
+        } else {
+          p = atof(ch_ptr);
+          ind_pheno_row[mapP2c[i + 1]] = 1;
+          pheno_row[mapP2c[i + 1]] = p;
+        }
+      }
+      i++;
+      ch_ptr = strtok(NULL, " , \t");
+    }
+
+    indicator_pheno.push_back(ind_pheno_row);
+    pheno.push_back(pheno_row);
+  }
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
-bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt,
-		   vector<vector<double> > &cvt, size_t &n_cvt) {
-	indicator_cvt.clear();
-
-	ifstream infile (file_cvt.c_str(), ifstream::in);
-	if (!infile) {
-	  cout << "error! fail to open covariates file: " << file_cvt << endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-	double d;
-
-	int flag_na=0;
-
-	while (!safeGetline(infile, line).eof()) {
-		vector<double> v_d; flag_na=0;
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		while (ch_ptr!=NULL) {
-			if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
-			else {d=atof(ch_ptr);}
-
-			v_d.push_back(d);
-			ch_ptr=strtok (NULL, " , \t");
-		}
-		if (flag_na==0) {
-		  indicator_cvt.push_back(1);
-		} else {
-		  indicator_cvt.push_back(0);
-		}
-		cvt.push_back(v_d);
-	}
-
-	if (indicator_cvt.empty()) {n_cvt=0;}
-	else {
-		flag_na=0;
-		for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
-			if (indicator_cvt[i]==0) {
-			  continue;
-			}
-
-			if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
-			if (flag_na!=0 && n_cvt!=cvt[i].size()) {
-			  cout << "error! number of covariates in row " <<
-			    i << " do not match other rows." << endl;
-			  return false;
-			}
-		}
-	}
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt,
+                  vector<vector<double>> &cvt, size_t &n_cvt) {
+  indicator_cvt.clear();
+
+  ifstream infile(file_cvt.c_str(), ifstream::in);
+  if (!infile) {
+    cout << "error! fail to open covariates file: " << file_cvt << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+  double d;
+
+  int flag_na = 0;
+
+  while (!safeGetline(infile, line).eof()) {
+    vector<double> v_d;
+    flag_na = 0;
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    while (ch_ptr != NULL) {
+      if (strcmp(ch_ptr, "NA") == 0) {
+        flag_na = 1;
+        d = -9;
+      } else {
+        d = atof(ch_ptr);
+      }
+
+      v_d.push_back(d);
+      ch_ptr = strtok(NULL, " , \t");
+    }
+    if (flag_na == 0) {
+      indicator_cvt.push_back(1);
+    } else {
+      indicator_cvt.push_back(0);
+    }
+    cvt.push_back(v_d);
+  }
+
+  if (indicator_cvt.empty()) {
+    n_cvt = 0;
+  } else {
+    flag_na = 0;
+    for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) {
+      if (indicator_cvt[i] == 0) {
+        continue;
+      }
+
+      if (flag_na == 0) {
+        flag_na = 1;
+        n_cvt = cvt[i].size();
+      }
+      if (flag_na != 0 && n_cvt != cvt[i].size()) {
+        cout << "error! number of covariates in row " << i
+             << " do not match other rows." << endl;
+        return false;
+      }
+    }
+  }
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read .bim file.
-bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo) {
-        snpInfo.clear();
-
-	ifstream infile (file_bim.c_str(), ifstream::in);
-	if (!infile) {
-	  cout << "error opening .bim file: " << file_bim << endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	string rs;
-	long int b_pos;
-	string chr;
-	double cM;
-	string major;
-	string minor;
-
-	while (getline(infile, line)) {
-		ch_ptr=strtok ((char *)line.c_str(), " \t");
-		chr=ch_ptr;
-		ch_ptr=strtok (NULL, " \t");
-		rs=ch_ptr;
-		ch_ptr=strtok (NULL, " \t");
-		cM=atof(ch_ptr);
-		ch_ptr=strtok (NULL, " \t");
-		b_pos=atol(ch_ptr);
-		ch_ptr=strtok (NULL, " \t");
-		minor=ch_ptr;
-		ch_ptr=strtok (NULL, " \t");
-		major=ch_ptr;
-
-		SNPINFO sInfo={chr, rs, cM, b_pos, minor, major,
-			       0, -9, -9, 0, 0, 0};
-		snpInfo.push_back(sInfo);
-	}
-
-	infile.close();
-	infile.clear();
-	return true;
+bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
+  snpInfo.clear();
+
+  ifstream infile(file_bim.c_str(), ifstream::in);
+  if (!infile) {
+    cout << "error opening .bim file: " << file_bim << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  string rs;
+  long int b_pos;
+  string chr;
+  double cM;
+  string major;
+  string minor;
+
+  while (getline(infile, line)) {
+    ch_ptr = strtok((char *)line.c_str(), " \t");
+    chr = ch_ptr;
+    ch_ptr = strtok(NULL, " \t");
+    rs = ch_ptr;
+    ch_ptr = strtok(NULL, " \t");
+    cM = atof(ch_ptr);
+    ch_ptr = strtok(NULL, " \t");
+    b_pos = atol(ch_ptr);
+    ch_ptr = strtok(NULL, " \t");
+    minor = ch_ptr;
+    ch_ptr = strtok(NULL, " \t");
+    major = ch_ptr;
+
+    SNPINFO sInfo = {chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0};
+    snpInfo.push_back(sInfo);
+  }
+
+  infile.close();
+  infile.clear();
+  return true;
 }
 
 // Read .fam file.
-bool ReadFile_fam (const string &file_fam,
-		   vector<vector<int> > &indicator_pheno,
-		   vector<vector<double> > &pheno,
-		   map<string, int> &mapID2num,
-		   const vector<size_t> &p_column) {
-	indicator_pheno.clear();
-	pheno.clear();
-	mapID2num.clear();
-
-	igzstream infile (file_fam.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error opening .fam file: "<<file_fam<<endl; return false;}
-
-	string line;
-	char *ch_ptr;
-
-	string id;
-	int c=0;
-	double p;
-
-	vector<double> pheno_row;
-	vector<int> ind_pheno_row;
-
-	size_t p_max=*max_element(p_column.begin(), p_column.end() );
-	map<size_t, size_t> mapP2c;
-	for (size_t i=0; i<p_column.size(); i++) {
-		mapP2c[p_column[i]]=i;
-		pheno_row.push_back(-9);
-		ind_pheno_row.push_back(0);
-	}
-
-	while (!safeGetline(infile, line).eof()) {
-		ch_ptr=strtok ((char *)line.c_str(), " \t");
-		ch_ptr=strtok (NULL, " \t");
-		id=ch_ptr;
-		ch_ptr=strtok (NULL, " \t");
-		ch_ptr=strtok (NULL, " \t");
-		ch_ptr=strtok (NULL, " \t");
-		ch_ptr=strtok (NULL, " \t");
-
-		size_t i=0;
-		while (i<p_max ) {
-			if (mapP2c.count(i+1)!=0 ) {
-				if (strcmp(ch_ptr, "NA")==0) {
-				  ind_pheno_row[mapP2c[i+1]]=0;
-				  pheno_row[mapP2c[i+1]]=-9;
-				} else {
-				  p=atof(ch_ptr);
-
-				  if (p==-9) {
-				    ind_pheno_row[mapP2c[i+1]]=0;
-				    pheno_row[mapP2c[i+1]]=-9;
-				  }
-				  else {
-				    ind_pheno_row[mapP2c[i+1]]=1;
-				    pheno_row[mapP2c[i+1]]=p;
-				  }
-				}
-			}
-			i++;
-			ch_ptr=strtok (NULL, " , \t");
-		}
-
-		indicator_pheno.push_back(ind_pheno_row);
-		pheno.push_back(pheno_row);
-
-		mapID2num[id]=c; c++;
-	}
-
-	infile.close();
-	infile.clear();
-	return true;
+bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
+                  vector<vector<double>> &pheno, map<string, int> &mapID2num,
+                  const vector<size_t> &p_column) {
+  indicator_pheno.clear();
+  pheno.clear();
+  mapID2num.clear();
+
+  igzstream infile(file_fam.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error opening .fam file: " << file_fam << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  string id;
+  int c = 0;
+  double p;
+
+  vector<double> pheno_row;
+  vector<int> ind_pheno_row;
+
+  size_t p_max = *max_element(p_column.begin(), p_column.end());
+  map<size_t, size_t> mapP2c;
+  for (size_t i = 0; i < p_column.size(); i++) {
+    mapP2c[p_column[i]] = i;
+    pheno_row.push_back(-9);
+    ind_pheno_row.push_back(0);
+  }
+
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr = strtok((char *)line.c_str(), " \t");
+    ch_ptr = strtok(NULL, " \t");
+    id = ch_ptr;
+    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok(NULL, " \t");
+
+    size_t i = 0;
+    while (i < p_max) {
+      if (mapP2c.count(i + 1) != 0) {
+        if (strcmp(ch_ptr, "NA") == 0) {
+          ind_pheno_row[mapP2c[i + 1]] = 0;
+          pheno_row[mapP2c[i + 1]] = -9;
+        } else {
+          p = atof(ch_ptr);
+
+          if (p == -9) {
+            ind_pheno_row[mapP2c[i + 1]] = 0;
+            pheno_row[mapP2c[i + 1]] = -9;
+          } else {
+            ind_pheno_row[mapP2c[i + 1]] = 1;
+            pheno_row[mapP2c[i + 1]] = p;
+          }
+        }
+      }
+      i++;
+      ch_ptr = strtok(NULL, " , \t");
+    }
+
+    indicator_pheno.push_back(ind_pheno_row);
+    pheno.push_back(pheno_row);
+
+    mapID2num[id] = c;
+    c++;
+  }
+
+  infile.close();
+  infile.clear();
+  return true;
 }
 
 // Read bimbam mean genotype file, the first time, to obtain #SNPs for
 // analysis (ns_test) and total #SNP (ns_total).
-bool ReadFile_geno (const string &file_geno, const set<string> &setSnps,
-		    const gsl_matrix *W, vector<int> &indicator_idv,
-		    vector<int> &indicator_snp, const double &maf_level,
-		    const double &miss_level, const double &hwe_level,
-		    const double &r2_level,
-		    map<string, string> &mapRS2chr,
-		    map<string, long int> &mapRS2bp,
-		    map<string, double> &mapRS2cM,
-		    vector<SNPINFO> &snpInfo,
-		    size_t &ns_test) {
-	indicator_snp.clear();
-	snpInfo.clear();
-
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return false;
-	}
-
-	gsl_vector *genotype=gsl_vector_alloc (W->size1);
-	gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	double v_x, v_w;
-	int c_idv=0;
-
-	string line;
-	char *ch_ptr;
-
-	string rs;
-	long int b_pos;
-	string chr;
-	string major;
-	string minor;
-	double cM;
-	size_t file_pos;
-
-	double maf, geno, geno_old;
-	size_t n_miss;
-	size_t n_0, n_1, n_2;
-	int flag_poly;
-
-	int ni_total=indicator_idv.size();
-	int ni_test=0;
-	for (int i=0; i<ni_total; ++i) {
-		ni_test+=indicator_idv[i];
-	}
-	ns_test=0;
-
-	file_pos=0;
-	while (!safeGetline(infile, line).eof()) {
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		rs=ch_ptr;
-		ch_ptr=strtok (NULL, " , \t");
-		minor=ch_ptr;
-		ch_ptr=strtok (NULL, " , \t");
-		major=ch_ptr;
-
-		if (setSnps.size()!=0 && setSnps.count(rs)==0) {
-		  SNPINFO sInfo={"-9", rs, -9, -9, minor, major, 0, -9, -9,
-				 0, 0, file_pos};
-		  snpInfo.push_back(sInfo);
-		  indicator_snp.push_back(0);
-
-		  file_pos++;
-		  continue;
-		}
-
-		if (mapRS2bp.count(rs)==0) {chr="-9"; b_pos=-9;cM=-9;}
-		else {b_pos=mapRS2bp[rs]; chr=mapRS2chr[rs]; cM=mapRS2cM[rs];}
-
-		maf=0; n_miss=0; flag_poly=0; geno_old=-9;
-		n_0=0; n_1=0; n_2=0;
-		c_idv=0; gsl_vector_set_zero (genotype_miss);
-		for (int i=0; i<ni_total; ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==0) {continue;}
-
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set (genotype_miss, c_idv, 1);
-			  n_miss++;
-			  c_idv++;
-			  continue;
-			}
-
-			geno=atof(ch_ptr);
-			if (geno>=0 && geno<=0.5) {n_0++;}
-			if (geno>0.5 && geno<1.5) {n_1++;}
-			if (geno>=1.5 && geno<=2.0) {n_2++;}
-
-			gsl_vector_set (genotype, c_idv, geno);
-
-			if (flag_poly==0) {geno_old=geno; flag_poly=2;}
-			if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
-
-			maf+=geno;
-
-			c_idv++;
-		}
-		maf/=2.0*(double)(ni_test-n_miss);
-
-		SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss,
-			       (double)n_miss/(double)ni_test, maf,
-			       ni_test-n_miss, 0, file_pos};
-		snpInfo.push_back(sInfo);
-		file_pos++;
-
-		if ( (double)n_miss/(double)ni_test > miss_level) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		if (flag_poly!=1) {indicator_snp.push_back(0); continue;}
-
-		if (hwe_level!=0 && maf_level!=-1) {
-			if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
-			  indicator_snp.push_back(0);
-			  continue;
-			}
-		}
-
-		// Filter SNP if it is correlated with W unless W has
-		// only one column, of 1s.
-		for (size_t i=0; i<genotype->size; ++i) {
-			if (gsl_vector_get (genotype_miss, i)==1) {
-			  geno=maf*2.0;
-			  gsl_vector_set (genotype, i, geno);
-			}
-		}
-
-		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
-		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
-		gsl_blas_ddot (genotype, genotype, &v_x);
-		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-
-		if (W->size2!=1 && v_w/v_x >= r2_level) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		indicator_snp.push_back(1);
-		ns_test++;
-	}
-
-	gsl_vector_free (genotype);
-	gsl_vector_free (genotype_miss);
-	gsl_matrix_free (WtW);
-	gsl_matrix_free (WtWi);
-	gsl_vector_free (Wtx);
-	gsl_vector_free (WtWiWtx);
-	gsl_permutation_free (pmt);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
+                   const gsl_matrix *W, vector<int> &indicator_idv,
+                   vector<int> &indicator_snp, const double &maf_level,
+                   const double &miss_level, const double &hwe_level,
+                   const double &r2_level, map<string, string> &mapRS2chr,
+                   map<string, long int> &mapRS2bp,
+                   map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
+                   size_t &ns_test) {
+  indicator_snp.clear();
+  snpInfo.clear();
+
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return false;
+  }
+
+  gsl_vector *genotype = gsl_vector_alloc(W->size1);
+  gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  double v_x, v_w;
+  int c_idv = 0;
+
+  string line;
+  char *ch_ptr;
+
+  string rs;
+  long int b_pos;
+  string chr;
+  string major;
+  string minor;
+  double cM;
+  size_t file_pos;
+
+  double maf, geno, geno_old;
+  size_t n_miss;
+  size_t n_0, n_1, n_2;
+  int flag_poly;
+
+  int ni_total = indicator_idv.size();
+  int ni_test = 0;
+  for (int i = 0; i < ni_total; ++i) {
+    ni_test += indicator_idv[i];
+  }
+  ns_test = 0;
+
+  file_pos = 0;
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
+    ch_ptr = strtok(NULL, " , \t");
+    minor = ch_ptr;
+    ch_ptr = strtok(NULL, " , \t");
+    major = ch_ptr;
+
+    if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
+      SNPINFO sInfo = {"-9", rs, -9, -9, minor, major,
+                       0,    -9, -9, 0,  0,     file_pos};
+      snpInfo.push_back(sInfo);
+      indicator_snp.push_back(0);
+
+      file_pos++;
+      continue;
+    }
+
+    if (mapRS2bp.count(rs) == 0) {
+      chr = "-9";
+      b_pos = -9;
+      cM = -9;
+    } else {
+      b_pos = mapRS2bp[rs];
+      chr = mapRS2chr[rs];
+      cM = mapRS2cM[rs];
+    }
+
+    maf = 0;
+    n_miss = 0;
+    flag_poly = 0;
+    geno_old = -9;
+    n_0 = 0;
+    n_1 = 0;
+    n_2 = 0;
+    c_idv = 0;
+    gsl_vector_set_zero(genotype_miss);
+    for (int i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(genotype_miss, c_idv, 1);
+        n_miss++;
+        c_idv++;
+        continue;
+      }
+
+      geno = atof(ch_ptr);
+      if (geno >= 0 && geno <= 0.5) {
+        n_0++;
+      }
+      if (geno > 0.5 && geno < 1.5) {
+        n_1++;
+      }
+      if (geno >= 1.5 && geno <= 2.0) {
+        n_2++;
+      }
+
+      gsl_vector_set(genotype, c_idv, geno);
+
+      if (flag_poly == 0) {
+        geno_old = geno;
+        flag_poly = 2;
+      }
+      if (flag_poly == 2 && geno != geno_old) {
+        flag_poly = 1;
+      }
+
+      maf += geno;
+
+      c_idv++;
+    }
+    maf /= 2.0 * (double)(ni_test - n_miss);
+
+    SNPINFO sInfo = {chr,    rs,
+                     cM,     b_pos,
+                     minor,  major,
+                     n_miss, (double)n_miss / (double)ni_test,
+                     maf,    ni_test - n_miss,
+                     0,      file_pos};
+    snpInfo.push_back(sInfo);
+    file_pos++;
+
+    if ((double)n_miss / (double)ni_test > miss_level) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if (flag_poly != 1) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if (hwe_level != 0 && maf_level != -1) {
+      if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
+        indicator_snp.push_back(0);
+        continue;
+      }
+    }
+
+    // Filter SNP if it is correlated with W unless W has
+    // only one column, of 1s.
+    for (size_t i = 0; i < genotype->size; ++i) {
+      if (gsl_vector_get(genotype_miss, i) == 1) {
+        geno = maf * 2.0;
+        gsl_vector_set(genotype, i, geno);
+      }
+    }
+
+    gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+    gsl_blas_ddot(genotype, genotype, &v_x);
+    gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
+
+    if (W->size2 != 1 && v_w / v_x >= r2_level) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    indicator_snp.push_back(1);
+    ns_test++;
+  }
+
+  gsl_vector_free(genotype);
+  gsl_vector_free(genotype_miss);
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_vector_free(Wtx);
+  gsl_vector_free(WtWiWtx);
+  gsl_permutation_free(pmt);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read bed file, the first time.
-bool ReadFile_bed (const string &file_bed, const set<string> &setSnps,
-		   const gsl_matrix *W, vector<int> &indicator_idv,
-		   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
-		   const double &maf_level, const double &miss_level,
-		   const double &hwe_level, const double &r2_level,
-		   size_t &ns_test) {
-	indicator_snp.clear();
-	size_t ns_total=snpInfo.size();
-
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return false;
-	}
-
-	gsl_vector *genotype=gsl_vector_alloc (W->size1);
-	gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	double v_x, v_w, geno;
-	size_t c_idv=0;
-
-	char ch[1];
-	bitset<8> b;
-
-	size_t ni_total=indicator_idv.size();
-	size_t ni_test=0;
-	for (size_t i=0; i<ni_total; ++i) {
-		ni_test+=indicator_idv[i];
-	}
-	ns_test=0;
-
-	// Calculate n_bit and c, the number of bit for each snp.
-	size_t n_bit;
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1;}
-
-	// Ignore the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	double maf;
-	size_t n_miss;
-	size_t n_0, n_1, n_2, c;
-
-	// Start reading snps and doing association test.
-	for (size_t t=0; t<ns_total; ++t) {
-
-	  // n_bit, and 3 is the number of magic numbers.
-	  infile.seekg(t*n_bit+3);
-
-		if (setSnps.size()!=0 &&
-		    setSnps.count(snpInfo[t].rs_number) == 0) {
-			snpInfo[t].n_miss=-9;
-			snpInfo[t].missingness=-9;
-			snpInfo[t].maf=-9;
-			snpInfo[t].file_position=t;
-			indicator_snp.push_back(0);
-			continue;
-		}
-
-		// Read genotypes.
-		c=0; maf=0.0; n_miss=0; n_0=0; n_1=0; n_2=0;
-		c_idv=0; gsl_vector_set_zero (genotype_miss);
-		for (size_t i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0;
-			for (size_t j=0; j<4; ++j) {
-				if ((i==(n_bit-1)) && c==ni_total) {break;}
-				if (indicator_idv[c]==0) {c++; continue;}
-				c++;
-
-				if (b[2*j]==0) {
-				  if (b[2*j+1]==0) {
-				    gsl_vector_set(genotype, c_idv, 2.0);
-				    maf+=2.0;
-				    n_2++;
-				  }
-				  else {
-				    gsl_vector_set(genotype, c_idv, 1.0);
-				    maf+=1.0;
-				    n_1++;
-				  }
-				}
-				else {
-				  if (b[2*j+1]==1) {
-				    gsl_vector_set(genotype, c_idv, 0.0);
-				    maf+=0.0;
-				    n_0++;
-				  }
-				  else {
-				    gsl_vector_set(genotype_miss, c_idv, 1);
-				    n_miss++;
-				  }
-				}
-				c_idv++;
-			}
-		}
-		maf/=2.0*(double)(ni_test-n_miss);
-
-		snpInfo[t].n_miss=n_miss;
-		snpInfo[t].missingness=(double)n_miss/(double)ni_test;
-		snpInfo[t].maf=maf;
-		snpInfo[t].n_idv=ni_test-n_miss;
-		snpInfo[t].n_nb=0;
-		snpInfo[t].file_position=t;
-
-		if ( (double)n_miss/(double)ni_test > miss_level) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		if (hwe_level!=0 && maf_level!=-1) {
-		  if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
-		    indicator_snp.push_back(0);
-		    continue;
-		  }
-		}
-
-		// Filter SNP if it is correlated with W unless W has
-		// only one column, of 1s.
-		for (size_t i=0; i<genotype->size; ++i) {
-			if (gsl_vector_get (genotype_miss, i)==1) {
-			  geno=maf*2.0;
-			  gsl_vector_set (genotype, i, geno);
-			}
-		}
-
-		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
-		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
-		gsl_blas_ddot (genotype, genotype, &v_x);
-		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-
-		if (W->size2!=1 && v_w/v_x > r2_level) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		indicator_snp.push_back(1);
-		ns_test++;
-	}
-
-	gsl_vector_free (genotype);
-	gsl_vector_free (genotype_miss);
-	gsl_matrix_free (WtW);
-	gsl_matrix_free (WtWi);
-	gsl_vector_free (Wtx);
-	gsl_vector_free (WtWiWtx);
-	gsl_permutation_free (pmt);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
+                  const gsl_matrix *W, vector<int> &indicator_idv,
+                  vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+                  const double &maf_level, const double &miss_level,
+                  const double &hwe_level, const double &r2_level,
+                  size_t &ns_test) {
+  indicator_snp.clear();
+  size_t ns_total = snpInfo.size();
+
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return false;
+  }
+
+  gsl_vector *genotype = gsl_vector_alloc(W->size1);
+  gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  double v_x, v_w, geno;
+  size_t c_idv = 0;
+
+  char ch[1];
+  bitset<8> b;
+
+  size_t ni_total = indicator_idv.size();
+  size_t ni_test = 0;
+  for (size_t i = 0; i < ni_total; ++i) {
+    ni_test += indicator_idv[i];
+  }
+  ns_test = 0;
+
+  // Calculate n_bit and c, the number of bit for each snp.
+  size_t n_bit;
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Ignore the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  double maf;
+  size_t n_miss;
+  size_t n_0, n_1, n_2, c;
+
+  // Start reading snps and doing association test.
+  for (size_t t = 0; t < ns_total; ++t) {
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    if (setSnps.size() != 0 && setSnps.count(snpInfo[t].rs_number) == 0) {
+      snpInfo[t].n_miss = -9;
+      snpInfo[t].missingness = -9;
+      snpInfo[t].maf = -9;
+      snpInfo[t].file_position = t;
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    // Read genotypes.
+    c = 0;
+    maf = 0.0;
+    n_miss = 0;
+    n_0 = 0;
+    n_1 = 0;
+    n_2 = 0;
+    c_idv = 0;
+    gsl_vector_set_zero(genotype_miss);
+    for (size_t i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0;
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && c == ni_total) {
+          break;
+        }
+        if (indicator_idv[c] == 0) {
+          c++;
+          continue;
+        }
+        c++;
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(genotype, c_idv, 2.0);
+            maf += 2.0;
+            n_2++;
+          } else {
+            gsl_vector_set(genotype, c_idv, 1.0);
+            maf += 1.0;
+            n_1++;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(genotype, c_idv, 0.0);
+            maf += 0.0;
+            n_0++;
+          } else {
+            gsl_vector_set(genotype_miss, c_idv, 1);
+            n_miss++;
+          }
+        }
+        c_idv++;
+      }
+    }
+    maf /= 2.0 * (double)(ni_test - n_miss);
+
+    snpInfo[t].n_miss = n_miss;
+    snpInfo[t].missingness = (double)n_miss / (double)ni_test;
+    snpInfo[t].maf = maf;
+    snpInfo[t].n_idv = ni_test - n_miss;
+    snpInfo[t].n_nb = 0;
+    snpInfo[t].file_position = t;
+
+    if ((double)n_miss / (double)ni_test > miss_level) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if ((n_0 + n_1) == 0 || (n_1 + n_2) == 0 || (n_2 + n_0) == 0) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if (hwe_level != 0 && maf_level != -1) {
+      if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
+        indicator_snp.push_back(0);
+        continue;
+      }
+    }
+
+    // Filter SNP if it is correlated with W unless W has
+    // only one column, of 1s.
+    for (size_t i = 0; i < genotype->size; ++i) {
+      if (gsl_vector_get(genotype_miss, i) == 1) {
+        geno = maf * 2.0;
+        gsl_vector_set(genotype, i, geno);
+      }
+    }
+
+    gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+    gsl_blas_ddot(genotype, genotype, &v_x);
+    gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
+
+    if (W->size2 != 1 && v_w / v_x > r2_level) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    indicator_snp.push_back(1);
+    ns_test++;
+  }
+
+  gsl_vector_free(genotype);
+  gsl_vector_free(genotype_miss);
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_vector_free(Wtx);
+  gsl_vector_free(WtWiWtx);
+  gsl_permutation_free(pmt);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read the genotype for one SNP; remember to read empty lines.
 // Geno stores original genotypes without centering.
 // Missing values are replaced by mean.
-bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv,
-			igzstream &infile, gsl_vector *geno,
-			double &geno_mean) {
-  size_t ni_total=indicator_idv.size();
+bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
+                       igzstream &infile, gsl_vector *geno, double &geno_mean) {
+  size_t ni_total = indicator_idv.size();
 
   string line;
   char *ch_ptr;
-  bool flag=false;
+  bool flag = false;
 
-  for (size_t i=0; i<inc; i++) {
+  for (size_t i = 0; i < inc; i++) {
     !safeGetline(infile, line).eof();
   }
 
   if (!safeGetline(infile, line).eof()) {
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-    ch_ptr=strtok (NULL, " , \t");
-    ch_ptr=strtok (NULL, " , \t");
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
 
-    geno_mean=0.0;
+    geno_mean = 0.0;
     double d;
-    size_t c_idv=0;
+    size_t c_idv = 0;
     vector<size_t> geno_miss;
 
-    for (size_t i=0; i<ni_total; ++i) {
-      ch_ptr=strtok (NULL, " , \t");
-      if (indicator_idv[i]==0) {continue;}
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
 
-      if (strcmp(ch_ptr, "NA")==0) {
-	geno_miss.push_back(c_idv);
+      if (strcmp(ch_ptr, "NA") == 0) {
+        geno_miss.push_back(c_idv);
       } else {
-	d=atof(ch_ptr);
-	gsl_vector_set (geno, c_idv, d);
-	geno_mean+=d;
+        d = atof(ch_ptr);
+        gsl_vector_set(geno, c_idv, d);
+        geno_mean += d;
       }
       c_idv++;
     }
 
-    geno_mean/=(double)(c_idv-geno_miss.size() );
+    geno_mean /= (double)(c_idv - geno_miss.size());
 
-    for (size_t i=0; i<geno_miss.size(); ++i) {
+    for (size_t i = 0; i < geno_miss.size(); ++i) {
       gsl_vector_set(geno, geno_miss[i], geno_mean);
     }
-    flag=true;
+    flag = true;
   }
 
   return flag;
 }
 
 // For PLINK, store SNPs as double too.
-void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv,
-		       ifstream &infile, gsl_vector *geno, double &geno_mean) {
-  size_t ni_total=indicator_idv.size(), n_bit;
-  if (ni_total%4==0) {n_bit=ni_total/4;}
-  else {n_bit=ni_total/4+1;}
+void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv,
+                      ifstream &infile, gsl_vector *geno, double &geno_mean) {
+  size_t ni_total = indicator_idv.size(), n_bit;
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
 
   // n_bit, and 3 is the number of magic numbers.
-  infile.seekg(pos*n_bit+3);
+  infile.seekg(pos * n_bit + 3);
 
   // Read genotypes.
   char ch[1];
   bitset<8> b;
 
-  geno_mean=0.0;
-  size_t c=0, c_idv=0;
+  geno_mean = 0.0;
+  size_t c = 0, c_idv = 0;
   vector<size_t> geno_miss;
 
-  for (size_t i=0; i<n_bit; ++i) {
-    infile.read(ch,1);
-    b=ch[0];
+  for (size_t i = 0; i < n_bit; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
 
     // Minor allele homozygous: 2.0; major: 0.0.
-    for (size_t j=0; j<4; ++j) {
-      if ((i==(n_bit-1)) && c==ni_total) {break;}
-      if (indicator_idv[c]==0) {c++; continue;}
+    for (size_t j = 0; j < 4; ++j) {
+      if ((i == (n_bit - 1)) && c == ni_total) {
+        break;
+      }
+      if (indicator_idv[c] == 0) {
+        c++;
+        continue;
+      }
       c++;
 
-      if (b[2*j]==0) {
-	if (b[2*j+1]==0) {
-	  gsl_vector_set (geno, c_idv, 2);
-	  geno_mean+=2.0;
-	} else {
-	  gsl_vector_set (geno, c_idv, 1);
-	  geno_mean+=1.0;
-	}
+      if (b[2 * j] == 0) {
+        if (b[2 * j + 1] == 0) {
+          gsl_vector_set(geno, c_idv, 2);
+          geno_mean += 2.0;
+        } else {
+          gsl_vector_set(geno, c_idv, 1);
+          geno_mean += 1.0;
+        }
       } else {
-	if (b[2*j+1]==1) {
-	  gsl_vector_set (geno, c_idv, 0);
-	  geno_mean+=0.0;
-	} else {
-	  geno_miss.push_back(c_idv);
-	}
+        if (b[2 * j + 1] == 1) {
+          gsl_vector_set(geno, c_idv, 0);
+          geno_mean += 0.0;
+        } else {
+          geno_miss.push_back(c_idv);
+        }
       }
 
       c_idv++;
     }
   }
 
-  geno_mean/=(double)(c_idv-geno_miss.size());
+  geno_mean /= (double)(c_idv - geno_miss.size());
 
-  for (size_t i=0; i<geno_miss.size(); ++i) {
+  for (size_t i = 0; i < geno_miss.size(); ++i) {
     gsl_vector_set(geno, geno_miss[i], geno_mean);
   }
 
   return;
 }
 
-void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv,
-		   map<string, int> &mapID2num, const size_t k_mode,
-		   bool &error, gsl_matrix *G) {
-	igzstream infile (file_kin.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error! fail to open kinship file: "<<file_kin<<endl;
-	  error=true; return;
-	}
-
-	size_t ni_total=indicator_idv.size();
-
-	gsl_matrix_set_zero (G);
-
-	string line;
-	char *ch_ptr;
-	double d;
-
-	if (k_mode==1) {
-		size_t i_test=0, i_total=0, j_test=0, j_total=0;
-		while (getline(infile, line)) {
-			if (i_total==ni_total) {
-			  cout<<"error! number of rows in the kinship "<<
-			    "file is larger than the number of phentypes."<<
-			    endl;
-			  error=true;
-			}
-
-			if (indicator_idv[i_total]==0) {i_total++; continue;}
-
-			j_total=0; j_test=0;
-			ch_ptr=strtok ((char *)line.c_str(), " , \t");
-			while (ch_ptr!=NULL) {
-				if (j_total==ni_total) {
-				  cout<<"error! number of columns in the "<<
-				    "kinship file is larger than the number"<<
-				    " of phentypes for row = "<<i_total<<endl;
-				  error=true;
-				}
-
-				d=atof(ch_ptr);
-				if (indicator_idv[j_total]==1) {
-				  gsl_matrix_set (G, i_test, j_test, d);
-				  j_test++;
-				}
-				j_total++;
-
-				ch_ptr=strtok (NULL, " , \t");
-			}
-			if (j_total!=ni_total) {
-			  cout<<"error! number of columns in the kinship "<<
-			    "file do not match the number of phentypes for "<<
-			    "row = "<<i_total<<endl;
-			  error=true;
-			}
-			i_total++; i_test++;
-		}
-		if (i_total!=ni_total) {
-		  cout<<"error! number of rows in the kinship file do "<<
-		    "not match the number of phentypes."<<endl;
-		  error=true;
-		}
-	}
-	else {
-		map<size_t, size_t> mapID2ID;
-		size_t c=0;
-		for (size_t i=0; i<indicator_idv.size(); i++) {
-			if (indicator_idv[i]==1) {mapID2ID[i]=c; c++;}
-		}
-
-		string id1, id2;
-		double Cov_d;
-		size_t n_id1, n_id2;
-
-		while (getline(infile, line)) {
-			ch_ptr=strtok ((char *)line.c_str(), " , \t");
-			id1=ch_ptr;
-			ch_ptr=strtok (NULL, " , \t");
-			id2=ch_ptr;
-			ch_ptr=strtok (NULL, " , \t");
-			d=atof(ch_ptr);
-			if (mapID2num.count(id1)==0 ||
-			    mapID2num.count(id2)==0) {
-			  continue;
-			}
-			if (indicator_idv[mapID2num[id1]]==0 ||
-			    indicator_idv[mapID2num[id2]]==0) {
-			  continue;
-			}
-
-			n_id1=mapID2ID[mapID2num[id1]];
-			n_id2=mapID2ID[mapID2num[id2]];
-
-			Cov_d=gsl_matrix_get(G, n_id1, n_id2);
-			if (Cov_d!=0 && Cov_d!=d) {
-			  cout<<"error! redundant and unequal terms in the "<<
-			    "kinship file, for id1 = "<<id1<<" and id2 = "<<
-			    id2<<endl;
-			}
-			else {
-				gsl_matrix_set(G, n_id1, n_id2, d);
-				gsl_matrix_set(G, n_id2, n_id1, d);
-			}
-		}
-	}
-
-	infile.close();
-	infile.clear();
-
-	return;
+void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
+                  map<string, int> &mapID2num, const size_t k_mode, bool &error,
+                  gsl_matrix *G) {
+  igzstream infile(file_kin.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open kinship file: " << file_kin << endl;
+    error = true;
+    return;
+  }
+
+  size_t ni_total = indicator_idv.size();
+
+  gsl_matrix_set_zero(G);
+
+  string line;
+  char *ch_ptr;
+  double d;
+
+  if (k_mode == 1) {
+    size_t i_test = 0, i_total = 0, j_test = 0, j_total = 0;
+    while (getline(infile, line)) {
+      if (i_total == ni_total) {
+        cout << "error! number of rows in the kinship "
+             << "file is larger than the number of phentypes." << endl;
+        error = true;
+      }
+
+      if (indicator_idv[i_total] == 0) {
+        i_total++;
+        continue;
+      }
+
+      j_total = 0;
+      j_test = 0;
+      ch_ptr = strtok((char *)line.c_str(), " , \t");
+      while (ch_ptr != NULL) {
+        if (j_total == ni_total) {
+          cout << "error! number of columns in the "
+               << "kinship file is larger than the number"
+               << " of phentypes for row = " << i_total << endl;
+          error = true;
+        }
+
+        d = atof(ch_ptr);
+        if (indicator_idv[j_total] == 1) {
+          gsl_matrix_set(G, i_test, j_test, d);
+          j_test++;
+        }
+        j_total++;
+
+        ch_ptr = strtok(NULL, " , \t");
+      }
+      if (j_total != ni_total) {
+        cout << "error! number of columns in the kinship "
+             << "file do not match the number of phentypes for "
+             << "row = " << i_total << endl;
+        error = true;
+      }
+      i_total++;
+      i_test++;
+    }
+    if (i_total != ni_total) {
+      cout << "error! number of rows in the kinship file do "
+           << "not match the number of phentypes." << endl;
+      error = true;
+    }
+  } else {
+    map<size_t, size_t> mapID2ID;
+    size_t c = 0;
+    for (size_t i = 0; i < indicator_idv.size(); i++) {
+      if (indicator_idv[i] == 1) {
+        mapID2ID[i] = c;
+        c++;
+      }
+    }
+
+    string id1, id2;
+    double Cov_d;
+    size_t n_id1, n_id2;
+
+    while (getline(infile, line)) {
+      ch_ptr = strtok((char *)line.c_str(), " , \t");
+      id1 = ch_ptr;
+      ch_ptr = strtok(NULL, " , \t");
+      id2 = ch_ptr;
+      ch_ptr = strtok(NULL, " , \t");
+      d = atof(ch_ptr);
+      if (mapID2num.count(id1) == 0 || mapID2num.count(id2) == 0) {
+        continue;
+      }
+      if (indicator_idv[mapID2num[id1]] == 0 ||
+          indicator_idv[mapID2num[id2]] == 0) {
+        continue;
+      }
+
+      n_id1 = mapID2ID[mapID2num[id1]];
+      n_id2 = mapID2ID[mapID2num[id2]];
+
+      Cov_d = gsl_matrix_get(G, n_id1, n_id2);
+      if (Cov_d != 0 && Cov_d != d) {
+        cout << "error! redundant and unequal terms in the "
+             << "kinship file, for id1 = " << id1 << " and id2 = " << id2
+             << endl;
+      } else {
+        gsl_matrix_set(G, n_id1, n_id2, d);
+        gsl_matrix_set(G, n_id2, n_id1, d);
+      }
+    }
+  }
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv,
-		  map<string, int> &mapID2num, const size_t k_mode,
-		  bool &error, gsl_matrix *G) {
-	igzstream infile (file_mk.c_str(), igzstream::in);
-	if (!infile) {cout<<"error! fail to open file: "<<file_mk<<endl;
-	  error=true;
-	  return;
-	}
-
-	string file_kin, line;
-
-	size_t i=0;
-	while (getline(infile, line)) {
-	  file_kin=line.c_str();
-	  gsl_matrix_view G_sub=gsl_matrix_submatrix(G, 0, i*G->size1,
-						     G->size1, G->size1);
-	  ReadFile_kin (file_kin, indicator_idv, mapID2num, k_mode,
-			error, &G_sub.matrix);
-	  i++;
-	}
-
-	infile.close();
-	infile.clear();
-	return;
+void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
+                 map<string, int> &mapID2num, const size_t k_mode, bool &error,
+                 gsl_matrix *G) {
+  igzstream infile(file_mk.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open file: " << file_mk << endl;
+    error = true;
+    return;
+  }
+
+  string file_kin, line;
+
+  size_t i = 0;
+  while (getline(infile, line)) {
+    file_kin = line.c_str();
+    gsl_matrix_view G_sub =
+        gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1);
+    ReadFile_kin(file_kin, indicator_idv, mapID2num, k_mode, error,
+                 &G_sub.matrix);
+    i++;
+  }
+
+  infile.close();
+  infile.clear();
+  return;
 }
 
-void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U) {
-	igzstream infile (file_ku.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error! fail to open the U file: "<<file_ku<<endl;
-	  error=true;
-	  return;
-	}
-
-	size_t n_row=U->size1, n_col=U->size2, i_row=0, i_col=0;
-
-	gsl_matrix_set_zero (U);
-
-	string line;
-	char *ch_ptr;
-	double d;
-
-	while (getline(infile, line)) {
-		if (i_row==n_row) {
-		  cout<<"error! number of rows in the U file is larger "<<
-		    "than expected."<<endl;
-		  error=true;
-		}
-
-		i_col=0;
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		while (ch_ptr!=NULL) {
-			if (i_col==n_col) {
-			  cout<<"error! number of columns in the U file "<<
-			    "is larger than expected, for row = "<<
-			    i_row<<endl;
-			  error=true;
-			}
-
-			d=atof(ch_ptr);
-			gsl_matrix_set (U, i_row, i_col, d);
-			i_col++;
-
-			ch_ptr=strtok (NULL, " , \t");
-		}
-
-		i_row++;
-	}
-
-	infile.close();
-	infile.clear();
-
-	return;
+void ReadFile_eigenU(const string &file_ku, bool &error, gsl_matrix *U) {
+  igzstream infile(file_ku.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open the U file: " << file_ku << endl;
+    error = true;
+    return;
+  }
+
+  size_t n_row = U->size1, n_col = U->size2, i_row = 0, i_col = 0;
+
+  gsl_matrix_set_zero(U);
+
+  string line;
+  char *ch_ptr;
+  double d;
+
+  while (getline(infile, line)) {
+    if (i_row == n_row) {
+      cout << "error! number of rows in the U file is larger "
+           << "than expected." << endl;
+      error = true;
+    }
+
+    i_col = 0;
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    while (ch_ptr != NULL) {
+      if (i_col == n_col) {
+        cout << "error! number of columns in the U file "
+             << "is larger than expected, for row = " << i_row << endl;
+        error = true;
+      }
+
+      d = atof(ch_ptr);
+      gsl_matrix_set(U, i_row, i_col, d);
+      i_col++;
+
+      ch_ptr = strtok(NULL, " , \t");
+    }
+
+    i_row++;
+  }
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval) {
-	igzstream infile (file_kd.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error! fail to open the D file: "<<file_kd<<endl;
-	  error=true;
-	  return;
-	}
+void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
+  igzstream infile(file_kd.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open the D file: " << file_kd << endl;
+    error = true;
+    return;
+  }
 
-	size_t n_row=eval->size, i_row=0;
+  size_t n_row = eval->size, i_row = 0;
 
-	gsl_vector_set_zero (eval);
+  gsl_vector_set_zero(eval);
 
-	string line;
-	char *ch_ptr;
-	double d;
+  string line;
+  char *ch_ptr;
+  double d;
 
-	while (getline(infile, line)) {
-		if (i_row==n_row) {
-		  cout<<"error! number of rows in the D file is larger "<<
-		    "than expected."<<endl;
-		  error=true;
-		}
+  while (getline(infile, line)) {
+    if (i_row == n_row) {
+      cout << "error! number of rows in the D file is larger "
+           << "than expected." << endl;
+      error = true;
+    }
 
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		d=atof(ch_ptr);
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    d = atof(ch_ptr);
 
-		ch_ptr=strtok (NULL, " , \t");
-		if (ch_ptr!=NULL) {
-		  cout<<"error! number of columns in the D file is larger "<<
-		    "than expected, for row = "<<i_row<<endl;
-		  error=true;
-		}
+    ch_ptr = strtok(NULL, " , \t");
+    if (ch_ptr != NULL) {
+      cout << "error! number of columns in the D file is larger "
+           << "than expected, for row = " << i_row << endl;
+      error = true;
+    }
 
-		gsl_vector_set (eval, i_row, d);
+    gsl_vector_set(eval, i_row, d);
 
-		i_row++;
-	}
+    i_row++;
+  }
 
-	infile.close();
-	infile.clear();
+  infile.close();
+  infile.clear();
 
-	return;
+  return;
 }
 
 // Read bimbam mean genotype file and calculate kinship matrix.
-bool BimbamKin (const string &file_geno, vector<int> &indicator_snp,
-		const int k_mode, const int display_pace,
-		gsl_matrix *matrix_kin) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	size_t n_miss;
-	double d, geno_mean, geno_var;
-
-	size_t ni_total=matrix_kin->size1;
-	gsl_vector *geno=gsl_vector_alloc (ni_total);
-	gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	size_t ns_test=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		!safeGetline(infile, line).eof();
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
-		  ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		geno_mean=0.0; n_miss=0; geno_var=0.0;
-		gsl_vector_set_all(geno_miss, 0);
-		for (size_t i=0; i<ni_total; ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set(geno_miss, i, 0); n_miss++;
-			} else {
-				d=atof(ch_ptr);
-				gsl_vector_set (geno, i, d);
-				gsl_vector_set (geno_miss, i, 1);
-				geno_mean+=d;
-				geno_var+=d*d;
-			}
-		}
-
-		geno_mean/=(double)(ni_total-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_total;
-		geno_var-=geno_mean*geno_mean;
-
-		for (size_t i=0; i<ni_total; ++i) {
-			if (gsl_vector_get (geno_miss, i)==0) {
-			  gsl_vector_set(geno, i, geno_mean);
-			}
-		}
-
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
-
-		if (k_mode==2 && geno_var!=0) {
-		  gsl_vector_scale (geno, 1.0/sqrt(geno_var));
-		}
-		gsl_vector_view Xlarge_col=
-		  gsl_matrix_column (Xlarge, ns_test%msize);
-		gsl_vector_memcpy (&Xlarge_col.vector, geno);
-
-		ns_test++;
-
-		if (ns_test%msize==0) {
-		  eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0,
-				  matrix_kin);
-		  gsl_matrix_set_zero(Xlarge);
-		}
-	}
-
-	if (ns_test%msize!=0) {
-	  eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
-	}
-	cout<<endl;
-
-	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
-
-	for (size_t i=0; i<ni_total; ++i) {
-		for (size_t j=0; j<i; ++j) {
-			d=gsl_matrix_get (matrix_kin, j, i);
-			gsl_matrix_set (matrix_kin, i, j, d);
-		}
-	}
-
-	gsl_vector_free (geno);
-	gsl_vector_free (geno_miss);
-	gsl_matrix_free (Xlarge);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
+               const int k_mode, const int display_pace,
+               gsl_matrix *matrix_kin) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  size_t n_miss;
+  double d, geno_mean, geno_var;
+
+  size_t ni_total = matrix_kin->size1;
+  gsl_vector *geno = gsl_vector_alloc(ni_total);
+  gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  size_t ns_test = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    geno_mean = 0.0;
+    n_miss = 0;
+    geno_var = 0.0;
+    gsl_vector_set_all(geno_miss, 0);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(geno_miss, i, 0);
+        n_miss++;
+      } else {
+        d = atof(ch_ptr);
+        gsl_vector_set(geno, i, d);
+        gsl_vector_set(geno_miss, i, 1);
+        geno_mean += d;
+        geno_var += d * d;
+      }
+    }
+
+    geno_mean /= (double)(ni_total - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_total;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_total; ++i) {
+      if (gsl_vector_get(geno_miss, i) == 0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    if (k_mode == 2 && geno_var != 0) {
+      gsl_vector_scale(geno, 1.0 / sqrt(geno_var));
+    }
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, geno);
+
+    ns_test++;
+
+    if (ns_test % msize == 0) {
+      eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+      gsl_matrix_set_zero(Xlarge);
+    }
+  }
+
+  if (ns_test % msize != 0) {
+    eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+  }
+  cout << endl;
+
+  gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
+
+  for (size_t i = 0; i < ni_total; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      d = gsl_matrix_get(matrix_kin, j, i);
+      gsl_matrix_set(matrix_kin, i, j, d);
+    }
+  }
+
+  gsl_vector_free(geno);
+  gsl_vector_free(geno_miss);
+  gsl_matrix_free(Xlarge);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
-bool PlinkKin (const string &file_bed, vector<int> &indicator_snp,
-	       const int k_mode, const int display_pace,
-	       gsl_matrix *matrix_kin) {
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return false;
-	}
-
-	char ch[1];
-	bitset<8> b;
-
-	size_t n_miss, ci_total;
-	double d, geno_mean, geno_var;
-
-	size_t ni_total=matrix_kin->size1;
-	gsl_vector *geno=gsl_vector_alloc (ni_total);
-
-	size_t ns_test=0;
-	int n_bit;
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	// Calculate n_bit and c, the number of bit for each snp.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	//print the first three magic numbers
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
-		  ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		geno_mean=0.0;	n_miss=0; ci_total=0; geno_var=0.0;
-		for (int i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {
-				if ((i==(n_bit-1)) && ci_total==ni_total) {
-				  break;
-				}
-
-				if (b[2*j]==0) {
-					if (b[2*j+1]==0) {
-					  gsl_vector_set(geno, ci_total, 2.0);
-					  geno_mean+=2.0;
-					  geno_var+=4.0;
-					}
-					else {
-					  gsl_vector_set(geno, ci_total, 1.0);
-					  geno_mean+=1.0;
-					  geno_var+=1.0;
-					}
-				}
-				else {
-					if (b[2*j+1]==1) {
-					  gsl_vector_set(geno,ci_total,0.0);
-					}
-					else {
-					  gsl_vector_set(geno,ci_total,-9.0);
-					  n_miss++;
-					}
-				}
-
-				ci_total++;
-			}
-		}
-
-		geno_mean/=(double)(ni_total-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_total;
-		geno_var-=geno_mean*geno_mean;
-
-		for (size_t i=0; i<ni_total; ++i) {
-			d=gsl_vector_get(geno,i);
-			if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
-		}
-
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
-
-		if (k_mode==2 && geno_var!=0) {
-		  gsl_vector_scale (geno, 1.0/sqrt(geno_var));
-		}
-		gsl_vector_view Xlarge_col=
-		  gsl_matrix_column (Xlarge, ns_test%msize);
-		gsl_vector_memcpy (&Xlarge_col.vector, geno);
-
-		ns_test++;
-
-		if (ns_test%msize==0) {
-		  eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
-		  gsl_matrix_set_zero(Xlarge);
-		}
-	}
-
-	if (ns_test%msize!=0) {
-	  eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
-	}
-
-	cout<<endl;
-
-	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
-
-	for (size_t i=0; i<ni_total; ++i) {
-		for (size_t j=0; j<i; ++j) {
-			d=gsl_matrix_get (matrix_kin, j, i);
-			gsl_matrix_set (matrix_kin, i, j, d);
-		}
-	}
-
-	gsl_vector_free (geno);
-	gsl_matrix_free (Xlarge);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
+              const int k_mode, const int display_pace,
+              gsl_matrix *matrix_kin) {
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return false;
+  }
+
+  char ch[1];
+  bitset<8> b;
+
+  size_t n_miss, ci_total;
+  double d, geno_mean, geno_var;
+
+  size_t ni_total = matrix_kin->size1;
+  gsl_vector *geno = gsl_vector_alloc(ni_total);
+
+  size_t ns_test = 0;
+  int n_bit;
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  // Calculate n_bit and c, the number of bit for each snp.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // print the first three magic numbers
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    geno_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    geno_var = 0.0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == ni_total) {
+          break;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(geno, ci_total, 2.0);
+            geno_mean += 2.0;
+            geno_var += 4.0;
+          } else {
+            gsl_vector_set(geno, ci_total, 1.0);
+            geno_mean += 1.0;
+            geno_var += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(geno, ci_total, 0.0);
+          } else {
+            gsl_vector_set(geno, ci_total, -9.0);
+            n_miss++;
+          }
+        }
+
+        ci_total++;
+      }
+    }
+
+    geno_mean /= (double)(ni_total - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_total;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_total; ++i) {
+      d = gsl_vector_get(geno, i);
+      if (d == -9.0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    if (k_mode == 2 && geno_var != 0) {
+      gsl_vector_scale(geno, 1.0 / sqrt(geno_var));
+    }
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, geno);
+
+    ns_test++;
+
+    if (ns_test % msize == 0) {
+      eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+      gsl_matrix_set_zero(Xlarge);
+    }
+  }
+
+  if (ns_test % msize != 0) {
+    eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+  }
+
+  cout << endl;
+
+  gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
+
+  for (size_t i = 0; i < ni_total; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      d = gsl_matrix_get(matrix_kin, j, i);
+      gsl_matrix_set(matrix_kin, i, j, d);
+    }
+  }
+
+  gsl_vector_free(geno);
+  gsl_matrix_free(Xlarge);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read bimbam mean genotype file, the second time, recode "mean"
 // genotype and calculate K.
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
-		    vector<int> &indicator_snp, gsl_matrix *UtX,
-		    gsl_matrix *K, const bool calc_K) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	if (calc_K==true) {gsl_matrix_set_zero (K);}
-
-	gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
-	gsl_vector *genotype_miss=gsl_vector_alloc (UtX->size1);
-	double geno, geno_mean;
-	size_t n_miss;
-
-	int ni_total=(int)indicator_idv.size();
-	int ns_total=(int)indicator_snp.size();
-	int ni_test=UtX->size1;
-	int ns_test=UtX->size2;
-
-	int c_idv=0, c_snp=0;
-
-	for (int i=0; i<ns_total; ++i) {
-		!safeGetline(infile, line).eof();
-		if (indicator_snp[i]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		c_idv=0; geno_mean=0; n_miss=0;
-		gsl_vector_set_zero (genotype_miss);
-		for (int j=0; j<ni_total; ++j) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[j]==0) {continue;}
-
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set (genotype_miss, c_idv, 1);
-			  n_miss++;
-			} else {
-				geno=atof(ch_ptr);
-				gsl_vector_set (genotype, c_idv, geno);
-				geno_mean+=geno;
-			}
-			c_idv++;
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<genotype->size; ++i) {
-			if (gsl_vector_get (genotype_miss, i)==1) {
-			  geno=0;
-			}
-			else {
-			  geno=gsl_vector_get (genotype, i);
-			  geno-=geno_mean;
-			}
-
-			gsl_vector_set (genotype, i, geno);
-			gsl_matrix_set (UtX, i, c_snp, geno);
-		}
-
-		if (calc_K==true) {
-		  gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
-		}
-
-		c_snp++;
-	}
-
-	if (calc_K==true) {
-		gsl_matrix_scale (K, 1.0/(double)ns_test);
-
-		for (size_t i=0; i<genotype->size; ++i) {
-			for (size_t j=0; j<i; ++j) {
-				geno=gsl_matrix_get (K, j, i);
-				gsl_matrix_set (K, i, j, geno);
-			}
-		}
-	}
-
-	gsl_vector_free (genotype);
-	gsl_vector_free (genotype_miss);
-
-	infile.clear();
-	infile.close();
-
-	return true;
+bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+                   vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
+                   const bool calc_K) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  if (calc_K == true) {
+    gsl_matrix_set_zero(K);
+  }
+
+  gsl_vector *genotype = gsl_vector_alloc(UtX->size1);
+  gsl_vector *genotype_miss = gsl_vector_alloc(UtX->size1);
+  double geno, geno_mean;
+  size_t n_miss;
+
+  int ni_total = (int)indicator_idv.size();
+  int ns_total = (int)indicator_snp.size();
+  int ni_test = UtX->size1;
+  int ns_test = UtX->size2;
+
+  int c_idv = 0, c_snp = 0;
+
+  for (int i = 0; i < ns_total; ++i) {
+    !safeGetline(infile, line).eof();
+    if (indicator_snp[i] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    c_idv = 0;
+    geno_mean = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(genotype_miss);
+    for (int j = 0; j < ni_total; ++j) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[j] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(genotype_miss, c_idv, 1);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+        gsl_vector_set(genotype, c_idv, geno);
+        geno_mean += geno;
+      }
+      c_idv++;
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < genotype->size; ++i) {
+      if (gsl_vector_get(genotype_miss, i) == 1) {
+        geno = 0;
+      } else {
+        geno = gsl_vector_get(genotype, i);
+        geno -= geno_mean;
+      }
+
+      gsl_vector_set(genotype, i, geno);
+      gsl_matrix_set(UtX, i, c_snp, geno);
+    }
+
+    if (calc_K == true) {
+      gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
+    }
+
+    c_snp++;
+  }
+
+  if (calc_K == true) {
+    gsl_matrix_scale(K, 1.0 / (double)ns_test);
+
+    for (size_t i = 0; i < genotype->size; ++i) {
+      for (size_t j = 0; j < i; ++j) {
+        geno = gsl_matrix_get(K, j, i);
+        gsl_matrix_set(K, i, j, geno);
+      }
+    }
+  }
+
+  gsl_vector_free(genotype);
+  gsl_vector_free(genotype_miss);
+
+  infile.clear();
+  infile.close();
+
+  return true;
 }
 
 // Compact version of the above function, using uchar instead of
 // gsl_matrix.
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
-		    vector<int> &indicator_snp,
-		    vector<vector<unsigned char> > &Xt,
-		    gsl_matrix *K, const bool calc_K, const size_t ni_test,
-		    const size_t ns_test) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return false;
-	}
-
-	Xt.clear();
-	vector<unsigned char> Xt_row;
-	for (size_t i=0; i<ni_test; i++) {
-	  Xt_row.push_back(0);
-	}
-
-	string line;
-	char *ch_ptr;
-
-	if (calc_K==true) {gsl_matrix_set_zero (K);}
-
-	gsl_vector *genotype=gsl_vector_alloc (ni_test);
-	gsl_vector *genotype_miss=gsl_vector_alloc (ni_test);
-	double geno, geno_mean;
-	size_t n_miss;
-
-	size_t ni_total= indicator_idv.size();
-	size_t ns_total= indicator_snp.size();
-
-	size_t c_idv=0, c_snp=0;
-
-	for (size_t i=0; i<ns_total; ++i) {
-		!safeGetline(infile, line).eof();
-		if (indicator_snp[i]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		c_idv=0; geno_mean=0; n_miss=0;
-		gsl_vector_set_zero (genotype_miss);
-		for (uint j=0; j<ni_total; ++j) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[j]==0) {continue;}
-
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set (genotype_miss, c_idv, 1);
-			  n_miss++;
-			} else {
-				geno=atof(ch_ptr);
-				gsl_vector_set (genotype, c_idv, geno);
-				geno_mean+=geno;
-			}
-			c_idv++;
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-
-		for (size_t j=0; j<genotype->size; ++j) {
-			if (gsl_vector_get (genotype_miss, j)==1) {
-			  geno=geno_mean;
-			} else {
-			  geno=gsl_vector_get (genotype, j);
-			}
-
-			Xt_row[j]=Double02ToUchar(geno);
-			gsl_vector_set (genotype, j, (geno-geno_mean));
-		}
-		Xt.push_back(Xt_row);
-
-		if (calc_K==true) {
-		  gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
-		}
-
-		c_snp++;
-	}
-
-	if (calc_K==true) {
-		gsl_matrix_scale (K, 1.0/(double)ns_test);
-
-		for (size_t i=0; i<genotype->size; ++i) {
-			for (size_t j=0; j<i; ++j) {
-				geno=gsl_matrix_get (K, j, i);
-				gsl_matrix_set (K, i, j, geno);
-			}
-		}
-	}
-
-	gsl_vector_free (genotype);
-	gsl_vector_free (genotype_miss);
-
-	infile.clear();
-	infile.close();
-
-	return true;
+bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+                   vector<int> &indicator_snp,
+                   vector<vector<unsigned char>> &Xt, gsl_matrix *K,
+                   const bool calc_K, const size_t ni_test,
+                   const size_t ns_test) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return false;
+  }
+
+  Xt.clear();
+  vector<unsigned char> Xt_row;
+  for (size_t i = 0; i < ni_test; i++) {
+    Xt_row.push_back(0);
+  }
+
+  string line;
+  char *ch_ptr;
+
+  if (calc_K == true) {
+    gsl_matrix_set_zero(K);
+  }
+
+  gsl_vector *genotype = gsl_vector_alloc(ni_test);
+  gsl_vector *genotype_miss = gsl_vector_alloc(ni_test);
+  double geno, geno_mean;
+  size_t n_miss;
+
+  size_t ni_total = indicator_idv.size();
+  size_t ns_total = indicator_snp.size();
+
+  size_t c_idv = 0, c_snp = 0;
+
+  for (size_t i = 0; i < ns_total; ++i) {
+    !safeGetline(infile, line).eof();
+    if (indicator_snp[i] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    c_idv = 0;
+    geno_mean = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(genotype_miss);
+    for (uint j = 0; j < ni_total; ++j) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[j] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(genotype_miss, c_idv, 1);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+        gsl_vector_set(genotype, c_idv, geno);
+        geno_mean += geno;
+      }
+      c_idv++;
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+
+    for (size_t j = 0; j < genotype->size; ++j) {
+      if (gsl_vector_get(genotype_miss, j) == 1) {
+        geno = geno_mean;
+      } else {
+        geno = gsl_vector_get(genotype, j);
+      }
+
+      Xt_row[j] = Double02ToUchar(geno);
+      gsl_vector_set(genotype, j, (geno - geno_mean));
+    }
+    Xt.push_back(Xt_row);
+
+    if (calc_K == true) {
+      gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
+    }
+
+    c_snp++;
+  }
+
+  if (calc_K == true) {
+    gsl_matrix_scale(K, 1.0 / (double)ns_test);
+
+    for (size_t i = 0; i < genotype->size; ++i) {
+      for (size_t j = 0; j < i; ++j) {
+        geno = gsl_matrix_get(K, j, i);
+        gsl_matrix_set(K, i, j, geno);
+      }
+    }
+  }
+
+  gsl_vector_free(genotype);
+  gsl_vector_free(genotype_miss);
+
+  infile.clear();
+  infile.close();
+
+  return true;
 }
 
 // Read bimbam mean genotype file, the second time, recode "mean"
 // genotype and calculate K.
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
-		   vector<int> &indicator_snp, gsl_matrix *UtX,
-		   gsl_matrix *K, const bool calc_K) {
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return false;
-	}
-
-	char ch[1];
-	bitset<8> b;
-
-	size_t ni_total=indicator_idv.size();
-	size_t ns_total=indicator_snp.size();
-	size_t ni_test=UtX->size1;
-	size_t ns_test=UtX->size2;
-	int n_bit;
-
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1;}
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	if (calc_K==true) {gsl_matrix_set_zero (K);}
-
-	gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
-
-	double geno, geno_mean;
-	size_t n_miss;
-	size_t c_idv=0, c_snp=0, c=0;
-
-	// Start reading snps and doing association test.
-	for (size_t t=0; t<ns_total; ++t) {
-		if (indicator_snp[t]==0) {continue;}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		c_idv=0; geno_mean=0.0; n_miss=0; c=0;
-		for (int i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {
-			  if ((i==(n_bit-1)) && c==ni_total) {break;}
-				if (indicator_idv[c]==0) {c++; continue;}
-				c++;
-
-				if (b[2*j]==0) {
-				  if (b[2*j+1]==0) {
-				    gsl_vector_set(genotype, c_idv, 2.0);
-				    geno_mean+=2.0;
-				  }
-				  else {
-				    gsl_vector_set(genotype, c_idv, 1.0);
-				    geno_mean+=1.0;
-				  }
-				}
-				else {
-				  if (b[2*j+1]==1) {
-				    gsl_vector_set(genotype, c_idv, 0.0);
-				    geno_mean+=0.0;
-				  }
-				  else {
-				    gsl_vector_set(genotype, c_idv, -9.0);
-				    n_miss++;
-				  }
-				}
-				c_idv++;
-			}
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<genotype->size; ++i) {
-			geno=gsl_vector_get (genotype, i);
-			if (geno==-9) {geno=0;}
-			else {geno-=geno_mean;}
-
-			gsl_vector_set (genotype, i, geno);
-			gsl_matrix_set (UtX, i, c_snp, geno);
-		}
-
-		if (calc_K==true) {
-		  gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
-		}
-
-		c_snp++;
-	}
-
-	if (calc_K==true) {
-		gsl_matrix_scale (K, 1.0/(double)ns_test);
-
-		for (size_t i=0; i<genotype->size; ++i) {
-			for (size_t j=0; j<i; ++j) {
-				geno=gsl_matrix_get (K, j, i);
-				gsl_matrix_set (K, i, j, geno);
-			}
-		}
-	}
-
-	gsl_vector_free (genotype);
-	infile.clear();
-	infile.close();
-
-	return true;
+bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
+                  vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
+                  const bool calc_K) {
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return false;
+  }
+
+  char ch[1];
+  bitset<8> b;
+
+  size_t ni_total = indicator_idv.size();
+  size_t ns_total = indicator_snp.size();
+  size_t ni_test = UtX->size1;
+  size_t ns_test = UtX->size2;
+  int n_bit;
+
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  if (calc_K == true) {
+    gsl_matrix_set_zero(K);
+  }
+
+  gsl_vector *genotype = gsl_vector_alloc(UtX->size1);
+
+  double geno, geno_mean;
+  size_t n_miss;
+  size_t c_idv = 0, c_snp = 0, c = 0;
+
+  // Start reading snps and doing association test.
+  for (size_t t = 0; t < ns_total; ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    c_idv = 0;
+    geno_mean = 0.0;
+    n_miss = 0;
+    c = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && c == ni_total) {
+          break;
+        }
+        if (indicator_idv[c] == 0) {
+          c++;
+          continue;
+        }
+        c++;
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(genotype, c_idv, 2.0);
+            geno_mean += 2.0;
+          } else {
+            gsl_vector_set(genotype, c_idv, 1.0);
+            geno_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(genotype, c_idv, 0.0);
+            geno_mean += 0.0;
+          } else {
+            gsl_vector_set(genotype, c_idv, -9.0);
+            n_miss++;
+          }
+        }
+        c_idv++;
+      }
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < genotype->size; ++i) {
+      geno = gsl_vector_get(genotype, i);
+      if (geno == -9) {
+        geno = 0;
+      } else {
+        geno -= geno_mean;
+      }
+
+      gsl_vector_set(genotype, i, geno);
+      gsl_matrix_set(UtX, i, c_snp, geno);
+    }
+
+    if (calc_K == true) {
+      gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
+    }
+
+    c_snp++;
+  }
+
+  if (calc_K == true) {
+    gsl_matrix_scale(K, 1.0 / (double)ns_test);
+
+    for (size_t i = 0; i < genotype->size; ++i) {
+      for (size_t j = 0; j < i; ++j) {
+        geno = gsl_matrix_get(K, j, i);
+        gsl_matrix_set(K, i, j, geno);
+      }
+    }
+  }
+
+  gsl_vector_free(genotype);
+  infile.clear();
+  infile.close();
+
+  return true;
 }
 
 // Compact version of the above function, using uchar instead of gsl_matrix.
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
-		   vector<int> &indicator_snp,
-		   vector<vector<unsigned char> > &Xt, gsl_matrix *K,
-		   const bool calc_K, const size_t ni_test,
-		   const size_t ns_test) {
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return false;
-	}
-
-	Xt.clear();
-	vector<unsigned char> Xt_row;
-	for (size_t i=0; i<ni_test; i++) {
-	  Xt_row.push_back(0);
-	}
-
-	char ch[1];
-	bitset<8> b;
-
-	size_t ni_total=indicator_idv.size();
-	size_t ns_total=indicator_snp.size();
-	int n_bit;
-
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1;}
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	if (calc_K==true) {gsl_matrix_set_zero (K);}
-
-	gsl_vector *genotype=gsl_vector_alloc (ni_test);
-
-	double geno, geno_mean;
-	size_t n_miss;
-	size_t c_idv=0, c_snp=0, c=0;
-
-	// Start reading SNPs and doing association test.
-	for (size_t t=0; t<ns_total; ++t) {
-		if (indicator_snp[t]==0) {continue;}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		c_idv=0; geno_mean=0.0; n_miss=0; c=0;
-		for (int i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {
-			  if ((i==(n_bit-1)) && c==ni_total) {break;}
-				if (indicator_idv[c]==0) {c++; continue;}
-				c++;
-
-				if (b[2*j]==0) {
-				  if (b[2*j+1]==0) {
-				    gsl_vector_set(genotype, c_idv, 2.0);
-				    geno_mean+=2.0;
-				  }
-				  else {
-				    gsl_vector_set(genotype, c_idv, 1.0);
-				    geno_mean+=1.0;
-				  }
-				}
-				else {
-				  if (b[2*j+1]==1) {
-				    gsl_vector_set(genotype, c_idv, 0.0);
-				    geno_mean+=0.0;
-				  }
-				  else {
-				    gsl_vector_set(genotype, c_idv, -9.0);
-				    n_miss++;
-				  }
-				}
-				c_idv++;
-			}
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<genotype->size; ++i) {
-			geno=gsl_vector_get (genotype, i);
-			if (geno==-9) {geno=geno_mean;}
-
-			Xt_row[i]=Double02ToUchar(geno);
-
-			geno-=geno_mean;
-
-			gsl_vector_set (genotype, i, geno);
-		}
-		Xt.push_back(Xt_row);
-
-		if (calc_K==true) {
-		  gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
-		}
-
-		c_snp++;
-	}
-
-	if (calc_K==true) {
-		gsl_matrix_scale (K, 1.0/(double)ns_test);
-
-		for (size_t i=0; i<genotype->size; ++i) {
-			for (size_t j=0; j<i; ++j) {
-				geno=gsl_matrix_get (K, j, i);
-				gsl_matrix_set (K, i, j, geno);
-			}
-		}
-	}
-
-	gsl_vector_free (genotype);
-	infile.clear();
-	infile.close();
-
-	return true;
+bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
+                  vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
+                  gsl_matrix *K, const bool calc_K, const size_t ni_test,
+                  const size_t ns_test) {
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return false;
+  }
+
+  Xt.clear();
+  vector<unsigned char> Xt_row;
+  for (size_t i = 0; i < ni_test; i++) {
+    Xt_row.push_back(0);
+  }
+
+  char ch[1];
+  bitset<8> b;
+
+  size_t ni_total = indicator_idv.size();
+  size_t ns_total = indicator_snp.size();
+  int n_bit;
+
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  if (calc_K == true) {
+    gsl_matrix_set_zero(K);
+  }
+
+  gsl_vector *genotype = gsl_vector_alloc(ni_test);
+
+  double geno, geno_mean;
+  size_t n_miss;
+  size_t c_idv = 0, c_snp = 0, c = 0;
+
+  // Start reading SNPs and doing association test.
+  for (size_t t = 0; t < ns_total; ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    c_idv = 0;
+    geno_mean = 0.0;
+    n_miss = 0;
+    c = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && c == ni_total) {
+          break;
+        }
+        if (indicator_idv[c] == 0) {
+          c++;
+          continue;
+        }
+        c++;
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(genotype, c_idv, 2.0);
+            geno_mean += 2.0;
+          } else {
+            gsl_vector_set(genotype, c_idv, 1.0);
+            geno_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(genotype, c_idv, 0.0);
+            geno_mean += 0.0;
+          } else {
+            gsl_vector_set(genotype, c_idv, -9.0);
+            n_miss++;
+          }
+        }
+        c_idv++;
+      }
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < genotype->size; ++i) {
+      geno = gsl_vector_get(genotype, i);
+      if (geno == -9) {
+        geno = geno_mean;
+      }
+
+      Xt_row[i] = Double02ToUchar(geno);
+
+      geno -= geno_mean;
+
+      gsl_vector_set(genotype, i, geno);
+    }
+    Xt.push_back(Xt_row);
+
+    if (calc_K == true) {
+      gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
+    }
+
+    c_snp++;
+  }
+
+  if (calc_K == true) {
+    gsl_matrix_scale(K, 1.0 / (double)ns_test);
+
+    for (size_t i = 0; i < genotype->size; ++i) {
+      for (size_t j = 0; j < i; ++j) {
+        geno = gsl_matrix_get(K, j, i);
+        gsl_matrix_set(K, i, j, geno);
+      }
+    }
+  }
+
+  gsl_vector_free(genotype);
+  infile.clear();
+  infile.close();
+
+  return true;
 }
 
-bool ReadFile_est (const string &file_est, const vector<size_t> &est_column,
-		   map<string, double> &mapRS2est) {
-	mapRS2est.clear();
-
-	ifstream infile (file_est.c_str(), ifstream::in);
-	if (!infile) {
-	  cout<<"error opening estimated parameter file: "<<file_est<<endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	string rs;
-	double alpha, beta, gamma, d;
-
-	// Header.
-	getline(infile, line);
-
-	size_t n=*max_element(est_column.begin(), est_column.end());
-
-	while (getline(infile, line)) {
-		ch_ptr=strtok ((char *)line.c_str(), " \t");
-
-		alpha=0.0; beta=0.0; gamma=1.0;
-		for (size_t i=0; i<n+1; ++i) {
-			if (i==est_column[0]-1) {rs=ch_ptr;}
-			if (i==est_column[1]-1) {alpha=atof(ch_ptr);}
-			if (i==est_column[2]-1) {beta=atof(ch_ptr);}
-			if (i==est_column[3]-1) {gamma=atof(ch_ptr);}
-			if (i<n) {ch_ptr=strtok (NULL, " \t");}
-		}
-
-		d=alpha+beta*gamma;
-
-		if (mapRS2est.count(rs)==0) {
-			mapRS2est[rs]=d;
-		}
-		else {
-		  cout << "the same SNP occurs more than once in estimated "<<
-		    "parameter file: "<<rs<<endl;
-		  return false;
-		}
-	}
-
-	infile.clear();
-	infile.close();
-	return true;
+bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
+                  map<string, double> &mapRS2est) {
+  mapRS2est.clear();
+
+  ifstream infile(file_est.c_str(), ifstream::in);
+  if (!infile) {
+    cout << "error opening estimated parameter file: " << file_est << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  string rs;
+  double alpha, beta, gamma, d;
+
+  // Header.
+  getline(infile, line);
+
+  size_t n = *max_element(est_column.begin(), est_column.end());
+
+  while (getline(infile, line)) {
+    ch_ptr = strtok((char *)line.c_str(), " \t");
+
+    alpha = 0.0;
+    beta = 0.0;
+    gamma = 1.0;
+    for (size_t i = 0; i < n + 1; ++i) {
+      if (i == est_column[0] - 1) {
+        rs = ch_ptr;
+      }
+      if (i == est_column[1] - 1) {
+        alpha = atof(ch_ptr);
+      }
+      if (i == est_column[2] - 1) {
+        beta = atof(ch_ptr);
+      }
+      if (i == est_column[3] - 1) {
+        gamma = atof(ch_ptr);
+      }
+      if (i < n) {
+        ch_ptr = strtok(NULL, " \t");
+      }
+    }
+
+    d = alpha + beta * gamma;
+
+    if (mapRS2est.count(rs) == 0) {
+      mapRS2est[rs] = d;
+    } else {
+      cout << "the same SNP occurs more than once in estimated "
+           << "parameter file: " << rs << endl;
+      return false;
+    }
+  }
+
+  infile.clear();
+  infile.close();
+  return true;
 }
 
-bool CountFileLines (const string &file_input, size_t &n_lines) {
-	igzstream infile (file_input.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error! fail to open file: "<<file_input<<endl;
-	  return false;
-	}
+bool CountFileLines(const string &file_input, size_t &n_lines) {
+  igzstream infile(file_input.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open file: " << file_input << endl;
+    return false;
+  }
 
-	n_lines=count(istreambuf_iterator<char>(infile), istreambuf_iterator<char>(), '\n');
-	infile.seekg (0, ios::beg);
+  n_lines = count(istreambuf_iterator<char>(infile),
+                  istreambuf_iterator<char>(), '\n');
+  infile.seekg(0, ios::beg);
 
-	return true;
+  return true;
 }
 
 // Read gene expression file.
-bool ReadFile_gene (const string &file_gene, vector<double> &vec_read,
-		    vector<SNPINFO> &snpInfo, size_t &ng_total) {
-	vec_read.clear();
-	ng_total=0;
-
-	igzstream infile (file_gene.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error! fail to open gene expression file: "<<file_gene<<endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-	string rs;
-
-	size_t n_idv=0, t=0;
-
-	// Header.
-	getline(infile, line);
-
-	while (getline(infile, line)) {
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		rs=ch_ptr;
-
-		ch_ptr=strtok (NULL, " , \t");
-
-		t=0;
-		while (ch_ptr!=NULL) {
-			if (ng_total==0) {
-				vec_read.push_back(0);
-				t++;
-				n_idv++;
-			} else {
-				vec_read[t]+=atof(ch_ptr);
-				t++;
-			}
-
-			ch_ptr=strtok (NULL, " , \t");
-		}
-
-		if (t!=n_idv) {
-		  cout<<"error! number of columns doesn't match in row: "<<
-		    ng_total<<endl;
-		  return false;
-		}
-
-		SNPINFO sInfo={"-9",rs,-9,-9,"-9","-9",0,-9,-9,0,0,0};
-		snpInfo.push_back(sInfo);
-
-		ng_total++;
-	}
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
+                   vector<SNPINFO> &snpInfo, size_t &ng_total) {
+  vec_read.clear();
+  ng_total = 0;
+
+  igzstream infile(file_gene.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error! fail to open gene expression file: " << file_gene << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+  string rs;
+
+  size_t n_idv = 0, t = 0;
+
+  // Header.
+  getline(infile, line);
+
+  while (getline(infile, line)) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
+
+    ch_ptr = strtok(NULL, " , \t");
+
+    t = 0;
+    while (ch_ptr != NULL) {
+      if (ng_total == 0) {
+        vec_read.push_back(0);
+        t++;
+        n_idv++;
+      } else {
+        vec_read[t] += atof(ch_ptr);
+        t++;
+      }
+
+      ch_ptr = strtok(NULL, " , \t");
+    }
+
+    if (t != n_idv) {
+      cout << "error! number of columns doesn't match in row: " << ng_total
+           << endl;
+      return false;
+    }
+
+    SNPINFO sInfo = {"-9", rs, -9, -9, "-9", "-9", 0, -9, -9, 0, 0, 0};
+    snpInfo.push_back(sInfo);
+
+    ng_total++;
+  }
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // WJA Added
 // Read Oxford sample file.
-bool ReadFile_sample (const string &file_sample,
-		      vector<vector<int> > &indicator_pheno,
-		      vector<vector<double> > &pheno,
-		      const vector<size_t> &p_column,
-		      vector<int> &indicator_cvt,
-		      vector<vector<double> > &cvt, size_t &n_cvt) {
-	indicator_pheno.clear();
-	pheno.clear();
-	indicator_cvt.clear();
-
-	igzstream infile (file_sample.c_str(), igzstream::in);
-
-	if (!infile) {
-	  cout<<"error! fail to open sample file: "<<file_sample<<endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	string id;
-	double p,d;
-
-	vector<double> pheno_row;
-	vector<int> ind_pheno_row;
-	int flag_na=0;
-
-	size_t num_cols=0;
-	size_t num_p_in_file=0;
-	size_t num_cvt_in_file=0;
-
-	map<size_t, size_t> mapP2c;
-	for (size_t i=0; i<p_column.size(); i++) {
-		mapP2c[p_column[i]]=i;
-		pheno_row.push_back(-9);
-		ind_pheno_row.push_back(0);
-	}
-
-	// Read header line1.
-	if(!safeGetline(infile, line).eof()) {
-		ch_ptr=strtok((char *)line.c_str(), " \t");
-		if(strcmp(ch_ptr, "ID_1")!=0) {return false;}
-		ch_ptr=strtok(NULL, " \t");
-		if(strcmp(ch_ptr, "ID_2")!=0) {return false;}
-		ch_ptr=strtok(NULL, " \t");
-		if(strcmp(ch_ptr, "missing")!=0) {return false;}
-		while (ch_ptr!=NULL) {
-			num_cols++;
-			ch_ptr=strtok (NULL, " \t");
-
-		}
-		num_cols--;
-	}
-
-	vector<map<uint32_t, size_t> > cvt_factor_levels;
-
-	char col_type[num_cols];
-
-	// Read header line2.
-	if(!safeGetline(infile, line).eof()) {
-		ch_ptr=strtok ((char *)line.c_str(), " \t");
-		if(strcmp(ch_ptr, "0")!=0) {return false;}
-		ch_ptr=strtok(NULL, " \t");
-		if(strcmp(ch_ptr, "0")!=0) {return false;}
-		ch_ptr=strtok(NULL, " \t");
-		if(strcmp(ch_ptr, "0")!=0) {return false;}
-		size_t it=0;
-		ch_ptr=strtok (NULL, " \t");
-		if(ch_ptr!=NULL)
-		  while(ch_ptr!=NULL){
-		    col_type[it++]=ch_ptr[0];
-		    if(ch_ptr[0]=='D') {
-		      cvt_factor_levels.push_back(map<uint32_t,size_t>());
-		      num_cvt_in_file++;
-		    }
-		    if(ch_ptr[0]=='C') {num_cvt_in_file++;}
-		    if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) {
-		      num_p_in_file++;}
-		    ch_ptr=strtok(NULL, " \t");
-		  }
-
-	}
-
-	while (!safeGetline(infile, line).eof()) {
-
-		ch_ptr=strtok ((char *)line.c_str(), " \t");
-
-		for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");}
-
-		size_t i=0;
-		size_t p_i=0;
-		size_t fac_cvt_i=0;
-
-		while (i<num_cols) {
-
-			if((col_type[i]=='P')||(col_type[i]=='B'))
-			{
-			  if (mapP2c.count(p_i+1)!=0) {
-			    if (strcmp(ch_ptr, "NA")==0) {
-			      ind_pheno_row[mapP2c[p_i+1]]=0;
-			      pheno_row[mapP2c[p_i+1]]=-9;
-			    }
-			    else {
-			      p=atof(ch_ptr);
-			      ind_pheno_row[mapP2c[p_i+1]]=1;
-			      pheno_row[mapP2c[p_i+1]]=p;
-			    }
-			  }
-			  p_i++;
-			}
-			if(col_type[i]=='D')
-			{
-
-			  // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL
-			  // IS INTEGRAL i.e for atoi error.
-			  if (strcmp(ch_ptr, "NA")!=0) {
-			    uint32_t level=atoi(ch_ptr);
-			    if (cvt_factor_levels[fac_cvt_i].count(level)==0) {
-			      cvt_factor_levels[fac_cvt_i][level]=
-				cvt_factor_levels[fac_cvt_i].size();
-			    }
-			  }
-			  fac_cvt_i++;
-			}
-
-			ch_ptr=strtok (NULL, " \t");
-			i++;
-		}
-
-		indicator_pheno.push_back(ind_pheno_row);
-		pheno.push_back(pheno_row);
-
-	}
-
-	// Close and reopen the file.
- 	infile.close();
- 	infile.clear();
-
-	if(num_cvt_in_file>0) {
-		igzstream infile2 (file_sample.c_str(), igzstream::in);
-
-		if (!infile2) {
-		  cout<<"error! fail to open sample file: "<<
-		    file_sample<<endl;
-		  return false;
-		}
-
-		// Skip header.
-		safeGetline(infile2, line);
-		safeGetline(infile2, line);
-
-		// Pull in the covariates now we now the number of
-		// factor levels.
-		while (!safeGetline(infile2, line).eof()) {
-
-			vector<double> v_d; flag_na=0;
-			ch_ptr=strtok ((char *)line.c_str(), " \t");
-
-			for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");}
-
-			size_t i=0;
-			size_t fac_cvt_i=0;
-			size_t num_fac_levels;
-			while (i<num_cols) {
-
-			  if(col_type[i]=='C') {
-			    if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
-			    else {d=atof(ch_ptr);}
-
-			    v_d.push_back(d);
-			  }
-
-			  if(col_type[i]=='D') {
-
-			    // NOTE THIS DOES NOT CHECK TO BE SURE
-			    // LEVEL IS INTEGRAL i.e for atoi error.
-			    num_fac_levels=cvt_factor_levels[fac_cvt_i].size();
-			    if(num_fac_levels>1) {
-			      if (strcmp(ch_ptr, "NA")==0) {
-				flag_na=1;
-				for(size_t it=0;it<num_fac_levels-1; it++) {
-				  v_d.push_back(-9);
-				}
-			      }
-			      else {
-				uint32_t level=atoi(ch_ptr);
-				for(size_t it=0;it<num_fac_levels-1;it++) {
-				  cvt_factor_levels[fac_cvt_i][level]==it+1 ?
-				    v_d.push_back(1.0) :
-				    v_d.push_back(0.0);
-				}
-			      }
-			    }
-			    fac_cvt_i++;
-			  }
-
-			  ch_ptr=strtok (NULL, " \t");
-			  i++;
-			}
-
-			if (flag_na==0) {
-			  indicator_cvt.push_back(1);
-			} else {
-			  indicator_cvt.push_back(0);
-			}
-			cvt.push_back(v_d);
-
-
-		}
-
-		if (indicator_cvt.empty()) {n_cvt=0;}
-		else {
-			flag_na=0;
-			for (vector<int>::size_type i=0;
-			     i<indicator_cvt.size();
-			     ++i) {
-				if (indicator_cvt[i]==0) {continue;}
-
-				if (flag_na==0) {
-				  flag_na=1;
-				  n_cvt=cvt[i].size();
-				}
-				if (flag_na!=0 && n_cvt!=cvt[i].size()) {
-				  cout<<"error! number of covariates in row "<<
-				    i<<" do not match other rows."<<endl;
-				  return false;
-				}
-			}
-		}
-
-		infile2.close();
-		infile2.clear();
-	}
- 	return true;
+bool ReadFile_sample(const string &file_sample,
+                     vector<vector<int>> &indicator_pheno,
+                     vector<vector<double>> &pheno,
+                     const vector<size_t> &p_column, vector<int> &indicator_cvt,
+                     vector<vector<double>> &cvt, size_t &n_cvt) {
+  indicator_pheno.clear();
+  pheno.clear();
+  indicator_cvt.clear();
+
+  igzstream infile(file_sample.c_str(), igzstream::in);
+
+  if (!infile) {
+    cout << "error! fail to open sample file: " << file_sample << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  string id;
+  double p, d;
+
+  vector<double> pheno_row;
+  vector<int> ind_pheno_row;
+  int flag_na = 0;
+
+  size_t num_cols = 0;
+  size_t num_p_in_file = 0;
+  size_t num_cvt_in_file = 0;
+
+  map<size_t, size_t> mapP2c;
+  for (size_t i = 0; i < p_column.size(); i++) {
+    mapP2c[p_column[i]] = i;
+    pheno_row.push_back(-9);
+    ind_pheno_row.push_back(0);
+  }
+
+  // Read header line1.
+  if (!safeGetline(infile, line).eof()) {
+    ch_ptr = strtok((char *)line.c_str(), " \t");
+    if (strcmp(ch_ptr, "ID_1") != 0) {
+      return false;
+    }
+    ch_ptr = strtok(NULL, " \t");
+    if (strcmp(ch_ptr, "ID_2") != 0) {
+      return false;
+    }
+    ch_ptr = strtok(NULL, " \t");
+    if (strcmp(ch_ptr, "missing") != 0) {
+      return false;
+    }
+    while (ch_ptr != NULL) {
+      num_cols++;
+      ch_ptr = strtok(NULL, " \t");
+    }
+    num_cols--;
+  }
+
+  vector<map<uint32_t, size_t>> cvt_factor_levels;
+
+  char col_type[num_cols];
+
+  // Read header line2.
+  if (!safeGetline(infile, line).eof()) {
+    ch_ptr = strtok((char *)line.c_str(), " \t");
+    if (strcmp(ch_ptr, "0") != 0) {
+      return false;
+    }
+    ch_ptr = strtok(NULL, " \t");
+    if (strcmp(ch_ptr, "0") != 0) {
+      return false;
+    }
+    ch_ptr = strtok(NULL, " \t");
+    if (strcmp(ch_ptr, "0") != 0) {
+      return false;
+    }
+    size_t it = 0;
+    ch_ptr = strtok(NULL, " \t");
+    if (ch_ptr != NULL)
+      while (ch_ptr != NULL) {
+        col_type[it++] = ch_ptr[0];
+        if (ch_ptr[0] == 'D') {
+          cvt_factor_levels.push_back(map<uint32_t, size_t>());
+          num_cvt_in_file++;
+        }
+        if (ch_ptr[0] == 'C') {
+          num_cvt_in_file++;
+        }
+        if ((ch_ptr[0] == 'P') || (ch_ptr[0] == 'B')) {
+          num_p_in_file++;
+        }
+        ch_ptr = strtok(NULL, " \t");
+      }
+  }
+
+  while (!safeGetline(infile, line).eof()) {
+
+    ch_ptr = strtok((char *)line.c_str(), " \t");
+
+    for (int it = 0; it < 3; it++) {
+      ch_ptr = strtok(NULL, " \t");
+    }
+
+    size_t i = 0;
+    size_t p_i = 0;
+    size_t fac_cvt_i = 0;
+
+    while (i < num_cols) {
+
+      if ((col_type[i] == 'P') || (col_type[i] == 'B')) {
+        if (mapP2c.count(p_i + 1) != 0) {
+          if (strcmp(ch_ptr, "NA") == 0) {
+            ind_pheno_row[mapP2c[p_i + 1]] = 0;
+            pheno_row[mapP2c[p_i + 1]] = -9;
+          } else {
+            p = atof(ch_ptr);
+            ind_pheno_row[mapP2c[p_i + 1]] = 1;
+            pheno_row[mapP2c[p_i + 1]] = p;
+          }
+        }
+        p_i++;
+      }
+      if (col_type[i] == 'D') {
+
+        // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL
+        // IS INTEGRAL i.e for atoi error.
+        if (strcmp(ch_ptr, "NA") != 0) {
+          uint32_t level = atoi(ch_ptr);
+          if (cvt_factor_levels[fac_cvt_i].count(level) == 0) {
+            cvt_factor_levels[fac_cvt_i][level] =
+                cvt_factor_levels[fac_cvt_i].size();
+          }
+        }
+        fac_cvt_i++;
+      }
+
+      ch_ptr = strtok(NULL, " \t");
+      i++;
+    }
+
+    indicator_pheno.push_back(ind_pheno_row);
+    pheno.push_back(pheno_row);
+  }
+
+  // Close and reopen the file.
+  infile.close();
+  infile.clear();
+
+  if (num_cvt_in_file > 0) {
+    igzstream infile2(file_sample.c_str(), igzstream::in);
+
+    if (!infile2) {
+      cout << "error! fail to open sample file: " << file_sample << endl;
+      return false;
+    }
+
+    // Skip header.
+    safeGetline(infile2, line);
+    safeGetline(infile2, line);
+
+    // Pull in the covariates now we now the number of
+    // factor levels.
+    while (!safeGetline(infile2, line).eof()) {
+
+      vector<double> v_d;
+      flag_na = 0;
+      ch_ptr = strtok((char *)line.c_str(), " \t");
+
+      for (int it = 0; it < 3; it++) {
+        ch_ptr = strtok(NULL, " \t");
+      }
+
+      size_t i = 0;
+      size_t fac_cvt_i = 0;
+      size_t num_fac_levels;
+      while (i < num_cols) {
+
+        if (col_type[i] == 'C') {
+          if (strcmp(ch_ptr, "NA") == 0) {
+            flag_na = 1;
+            d = -9;
+          } else {
+            d = atof(ch_ptr);
+          }
+
+          v_d.push_back(d);
+        }
+
+        if (col_type[i] == 'D') {
+
+          // NOTE THIS DOES NOT CHECK TO BE SURE
+          // LEVEL IS INTEGRAL i.e for atoi error.
+          num_fac_levels = cvt_factor_levels[fac_cvt_i].size();
+          if (num_fac_levels > 1) {
+            if (strcmp(ch_ptr, "NA") == 0) {
+              flag_na = 1;
+              for (size_t it = 0; it < num_fac_levels - 1; it++) {
+                v_d.push_back(-9);
+              }
+            } else {
+              uint32_t level = atoi(ch_ptr);
+              for (size_t it = 0; it < num_fac_levels - 1; it++) {
+                cvt_factor_levels[fac_cvt_i][level] == it + 1
+                    ? v_d.push_back(1.0)
+                    : v_d.push_back(0.0);
+              }
+            }
+          }
+          fac_cvt_i++;
+        }
+
+        ch_ptr = strtok(NULL, " \t");
+        i++;
+      }
+
+      if (flag_na == 0) {
+        indicator_cvt.push_back(1);
+      } else {
+        indicator_cvt.push_back(0);
+      }
+      cvt.push_back(v_d);
+    }
+
+    if (indicator_cvt.empty()) {
+      n_cvt = 0;
+    } else {
+      flag_na = 0;
+      for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) {
+        if (indicator_cvt[i] == 0) {
+          continue;
+        }
+
+        if (flag_na == 0) {
+          flag_na = 1;
+          n_cvt = cvt[i].size();
+        }
+        if (flag_na != 0 && n_cvt != cvt[i].size()) {
+          cout << "error! number of covariates in row " << i
+               << " do not match other rows." << endl;
+          return false;
+        }
+      }
+    }
+
+    infile2.close();
+    infile2.clear();
+  }
+  return true;
 }
 
 // WJA Added.
 // Read bgen file, the first time.
 bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
-		   const gsl_matrix *W, vector<int> &indicator_idv,
-		   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
-		   const double &maf_level, const double &miss_level,
-		   const double &hwe_level, const double &r2_level,
-		   size_t &ns_test) {
-
-	indicator_snp.clear();
-
-	ifstream infile (file_bgen.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bgen file:"<<file_bgen<<endl;
-	  return false;
-	}
-
-	gsl_vector *genotype=gsl_vector_alloc (W->size1);
-	gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	// Read in header.
-	uint32_t bgen_snp_block_offset;
-	uint32_t bgen_header_length;
-	uint32_t bgen_nsamples;
-	uint32_t bgen_nsnps;
-	uint32_t bgen_flags;
-	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
-	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
-	bgen_snp_block_offset-=4;
-	infile.ignore(4+bgen_header_length-20);
-	bgen_snp_block_offset-=4+bgen_header_length-20;
-	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
-	bgen_snp_block_offset-=4;
-	bool CompressedSNPBlocks=bgen_flags&0x1;
-	bool LongIds=bgen_flags&0x4;
-
-	if(!LongIds) {return false;}
-
-	infile.ignore(bgen_snp_block_offset);
-
-	ns_test=0;
-
-	size_t ns_total=static_cast<size_t>(bgen_nsnps);
-
-	snpInfo.clear();
-	string rs;
-	long int b_pos;
-	string chr;
-	string major;
-	string minor;
-	string id;
-
-	double v_x, v_w;
-	int c_idv=0;
-
-	double maf, geno, geno_old;
-	size_t n_miss;
-	size_t n_0, n_1, n_2;
-	int flag_poly;
-
-	double bgen_geno_prob_AA, bgen_geno_prob_AB;
-	double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
-	// Total number of samples in phenotype file.
-	size_t ni_total=indicator_idv.size();
-
-	// Number of samples to use in test.
-	size_t ni_test=0;
-
-	uint32_t bgen_N;
-	uint16_t bgen_LS;
-	uint16_t bgen_LR;
-	uint16_t bgen_LC;
-	uint32_t bgen_SNP_pos;
-	uint32_t bgen_LA;
-	std::string bgen_A_allele;
-	uint32_t bgen_LB;
-	std::string bgen_B_allele;
-	uint32_t bgen_P;
-	size_t unzipped_data_size;
-
-	for (size_t i=0; i<ni_total; ++i) {
-	  ni_test+=indicator_idv[i];
-	}
-
-	for (size_t t=0; t<ns_total; ++t) {
-
-		id.clear();
-		rs.clear();
-		chr.clear();
-		bgen_A_allele.clear();
-		bgen_B_allele.clear();
-
-		infile.read(reinterpret_cast<char*>(&bgen_N),4);
-		infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
-		id.resize(bgen_LS);
-		infile.read(&id[0], bgen_LS);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
-		rs.resize(bgen_LR);
-		infile.read(&rs[0], bgen_LR);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
-		chr.resize(bgen_LC);
-		infile.read(&chr[0], bgen_LC);
-
-		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
-		bgen_A_allele.resize(bgen_LA);
-		infile.read(&bgen_A_allele[0], bgen_LA);
-
-
-		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
-		bgen_B_allele.resize(bgen_LB);
-		infile.read(&bgen_B_allele[0], bgen_LB);
-
-		// Should we switch according to MAF?
-		minor=bgen_B_allele;
-		major=bgen_A_allele;
-		b_pos=static_cast<long int>(bgen_SNP_pos);
-
-		uint16_t unzipped_data[3*bgen_N];
-
-		if (setSnps.size()!=0 && setSnps.count(rs)==0) {
-		  SNPINFO sInfo={"-9", rs, -9, -9, minor, major,
-				 static_cast<size_t>(-9), -9, (long int) -9};
-
-			snpInfo.push_back(sInfo);
-			indicator_snp.push_back(0);
-			if(CompressedSNPBlocks)
-			  infile.read(reinterpret_cast<char*>(&bgen_P),4);
-			else
-			  bgen_P=6*bgen_N;
-
-			infile.ignore(static_cast<size_t>(bgen_P));
-
-			continue;
-		}
-
-		if(CompressedSNPBlocks)
-		{
-			infile.read(reinterpret_cast<char*>(&bgen_P),4);
-			uint8_t zipped_data[bgen_P];
-
-			unzipped_data_size=6*bgen_N;
-
-			infile.read(reinterpret_cast<char*>(zipped_data),
-				    bgen_P);
-			int result=
-			  uncompress(reinterpret_cast<Bytef*>(unzipped_data),
-			    reinterpret_cast<uLongf*>(&unzipped_data_size),
-				     reinterpret_cast<Bytef*>(zipped_data),
-				     static_cast<uLong> (bgen_P));
-			assert(result == Z_OK);
-
-		}
-		else
-		{
-		  bgen_P=6*bgen_N;
-		  infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
-
-		}
-
-		maf=0; n_miss=0; flag_poly=0; geno_old=-9;
-		n_0=0; n_1=0; n_2=0;
-		c_idv=0;
-		gsl_vector_set_zero (genotype_miss);
-		for (size_t i=0; i<bgen_N; ++i) {
-
-			// CHECK this set correctly!
-			if (indicator_idv[i]==0) {continue;}
-
-			bgen_geno_prob_AA=
-			  static_cast<double>(unzipped_data[i*3])/32768.0;
-			bgen_geno_prob_AB=
-			  static_cast<double>(unzipped_data[i*3+1])/32768.0;
-			bgen_geno_prob_BB=
-			  static_cast<double>(unzipped_data[i*3+2])/32768.0;
-			bgen_geno_prob_non_miss=
-			  bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
-
-			//CHECK 0.1 OK.
-			if (bgen_geno_prob_non_miss<0.9) {
-			  gsl_vector_set (genotype_miss, c_idv, 1);
-			  n_miss++;
-			  c_idv++;
-			  continue;
-			}
-
-			bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
-			bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
-			bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
-			geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-			if (geno>=0 && geno<=0.5) {n_0++;}
-			if (geno>0.5 && geno<1.5) {n_1++;}
-			if (geno>=1.5 && geno<=2.0) {n_2++;}
-
-			gsl_vector_set (genotype, c_idv, geno);
-
-			// CHECK WHAT THIS DOES.
-			if (flag_poly==0) {geno_old=geno; flag_poly=2;}
-			if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
-
-			maf+=geno;
-
-			c_idv++;
-		}
-
-		maf/=2.0*static_cast<double>(ni_test-n_miss);
-
-		SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss,
-			       (double)n_miss/(double)ni_test, maf};
-		snpInfo.push_back(sInfo);
-
-		if ( (double)n_miss/(double)ni_test > miss_level) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		if (flag_poly!=1) {
-		  indicator_snp.push_back(0);
-		  continue;
-		}
-
-		if (hwe_level!=0 && maf_level!=-1) {
-		  if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
-		    indicator_snp.push_back(0);
-		    continue;
-		  }
-		}
-
-		// Filter SNP if it is correlated with W
-		// unless W has only one column, of 1s.
-		for (size_t i=0; i<genotype->size; ++i) {
-			if (gsl_vector_get (genotype_miss, i)==1) {
-			  geno=maf*2.0;
-			  gsl_vector_set (genotype, i, geno);
-			}
-		}
-
-		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
-		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
-		gsl_blas_ddot (genotype, genotype, &v_x);
-		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-
-		if (W->size2!=1 && v_w/v_x >= r2_level) {
-		  indicator_snp.push_back(0); continue;}
-
-		indicator_snp.push_back(1);
-		ns_test++;
-
-	}
-
-	return true;
+                   const gsl_matrix *W, vector<int> &indicator_idv,
+                   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+                   const double &maf_level, const double &miss_level,
+                   const double &hwe_level, const double &r2_level,
+                   size_t &ns_test) {
+
+  indicator_snp.clear();
+
+  ifstream infile(file_bgen.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bgen file:" << file_bgen << endl;
+    return false;
+  }
+
+  gsl_vector *genotype = gsl_vector_alloc(W->size1);
+  gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  // Read in header.
+  uint32_t bgen_snp_block_offset;
+  uint32_t bgen_header_length;
+  uint32_t bgen_nsamples;
+  uint32_t bgen_nsnps;
+  uint32_t bgen_flags;
+  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+  bgen_snp_block_offset -= 4;
+  infile.ignore(4 + bgen_header_length - 20);
+  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+  bgen_snp_block_offset -= 4;
+  bool CompressedSNPBlocks = bgen_flags & 0x1;
+  bool LongIds = bgen_flags & 0x4;
+
+  if (!LongIds) {
+    return false;
+  }
+
+  infile.ignore(bgen_snp_block_offset);
+
+  ns_test = 0;
+
+  size_t ns_total = static_cast<size_t>(bgen_nsnps);
+
+  snpInfo.clear();
+  string rs;
+  long int b_pos;
+  string chr;
+  string major;
+  string minor;
+  string id;
+
+  double v_x, v_w;
+  int c_idv = 0;
+
+  double maf, geno, geno_old;
+  size_t n_miss;
+  size_t n_0, n_1, n_2;
+  int flag_poly;
+
+  double bgen_geno_prob_AA, bgen_geno_prob_AB;
+  double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+  // Total number of samples in phenotype file.
+  size_t ni_total = indicator_idv.size();
+
+  // Number of samples to use in test.
+  size_t ni_test = 0;
+
+  uint32_t bgen_N;
+  uint16_t bgen_LS;
+  uint16_t bgen_LR;
+  uint16_t bgen_LC;
+  uint32_t bgen_SNP_pos;
+  uint32_t bgen_LA;
+  std::string bgen_A_allele;
+  uint32_t bgen_LB;
+  std::string bgen_B_allele;
+  uint32_t bgen_P;
+  size_t unzipped_data_size;
+
+  for (size_t i = 0; i < ni_total; ++i) {
+    ni_test += indicator_idv[i];
+  }
+
+  for (size_t t = 0; t < ns_total; ++t) {
+
+    id.clear();
+    rs.clear();
+    chr.clear();
+    bgen_A_allele.clear();
+    bgen_B_allele.clear();
+
+    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+    id.resize(bgen_LS);
+    infile.read(&id[0], bgen_LS);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+    rs.resize(bgen_LR);
+    infile.read(&rs[0], bgen_LR);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+    chr.resize(bgen_LC);
+    infile.read(&chr[0], bgen_LC);
+
+    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+    bgen_A_allele.resize(bgen_LA);
+    infile.read(&bgen_A_allele[0], bgen_LA);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+    bgen_B_allele.resize(bgen_LB);
+    infile.read(&bgen_B_allele[0], bgen_LB);
+
+    // Should we switch according to MAF?
+    minor = bgen_B_allele;
+    major = bgen_A_allele;
+    b_pos = static_cast<long int>(bgen_SNP_pos);
+
+    uint16_t unzipped_data[3 * bgen_N];
+
+    if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
+      SNPINFO sInfo = {
+          "-9", rs,          -9, -9, minor, major, static_cast<size_t>(-9),
+          -9,   (long int)-9};
+
+      snpInfo.push_back(sInfo);
+      indicator_snp.push_back(0);
+      if (CompressedSNPBlocks)
+        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      else
+        bgen_P = 6 * bgen_N;
+
+      infile.ignore(static_cast<size_t>(bgen_P));
+
+      continue;
+    }
+
+    if (CompressedSNPBlocks) {
+      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      uint8_t zipped_data[bgen_P];
+
+      unzipped_data_size = 6 * bgen_N;
+
+      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+                              reinterpret_cast<uLongf *>(&unzipped_data_size),
+                              reinterpret_cast<Bytef *>(zipped_data),
+                              static_cast<uLong>(bgen_P));
+      assert(result == Z_OK);
+
+    } else {
+      bgen_P = 6 * bgen_N;
+      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+    }
+
+    maf = 0;
+    n_miss = 0;
+    flag_poly = 0;
+    geno_old = -9;
+    n_0 = 0;
+    n_1 = 0;
+    n_2 = 0;
+    c_idv = 0;
+    gsl_vector_set_zero(genotype_miss);
+    for (size_t i = 0; i < bgen_N; ++i) {
+
+      // CHECK this set correctly!
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+      bgen_geno_prob_AB =
+          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+      bgen_geno_prob_BB =
+          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+      bgen_geno_prob_non_miss =
+          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+
+      // CHECK 0.1 OK.
+      if (bgen_geno_prob_non_miss < 0.9) {
+        gsl_vector_set(genotype_miss, c_idv, 1);
+        n_miss++;
+        c_idv++;
+        continue;
+      }
+
+      bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+      bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+      bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+      geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+      if (geno >= 0 && geno <= 0.5) {
+        n_0++;
+      }
+      if (geno > 0.5 && geno < 1.5) {
+        n_1++;
+      }
+      if (geno >= 1.5 && geno <= 2.0) {
+        n_2++;
+      }
+
+      gsl_vector_set(genotype, c_idv, geno);
+
+      // CHECK WHAT THIS DOES.
+      if (flag_poly == 0) {
+        geno_old = geno;
+        flag_poly = 2;
+      }
+      if (flag_poly == 2 && geno != geno_old) {
+        flag_poly = 1;
+      }
+
+      maf += geno;
+
+      c_idv++;
+    }
+
+    maf /= 2.0 * static_cast<double>(ni_test - n_miss);
+
+    SNPINFO sInfo = {chr,   rs,    -9,     b_pos,
+                     minor, major, n_miss, (double)n_miss / (double)ni_test,
+                     maf};
+    snpInfo.push_back(sInfo);
+
+    if ((double)n_miss / (double)ni_test > miss_level) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if (flag_poly != 1) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    if (hwe_level != 0 && maf_level != -1) {
+      if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
+        indicator_snp.push_back(0);
+        continue;
+      }
+    }
+
+    // Filter SNP if it is correlated with W
+    // unless W has only one column, of 1s.
+    for (size_t i = 0; i < genotype->size; ++i) {
+      if (gsl_vector_get(genotype_miss, i) == 1) {
+        geno = maf * 2.0;
+        gsl_vector_set(genotype, i, geno);
+      }
+    }
+
+    gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+    gsl_blas_ddot(genotype, genotype, &v_x);
+    gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
+
+    if (W->size2 != 1 && v_w / v_x >= r2_level) {
+      indicator_snp.push_back(0);
+      continue;
+    }
+
+    indicator_snp.push_back(1);
+    ns_test++;
+  }
+
+  return true;
 }
 
 // Read oxford genotype file and calculate kinship matrix.
-bool bgenKin (const string &file_oxford, vector<int> &indicator_snp,
-	      const int k_mode, const int display_pace,
-	      gsl_matrix *matrix_kin) {
-	string file_bgen=file_oxford;
-	ifstream infile (file_bgen.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bgen file:"<<file_bgen<<endl;
-	  return false;
-	}
-
-	// Read in header.
-	uint32_t bgen_snp_block_offset;
-	uint32_t bgen_header_length;
-	uint32_t bgen_nsamples;
-	uint32_t bgen_nsnps;
-	uint32_t bgen_flags;
-	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
-	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
-	bgen_snp_block_offset-=4;
-	infile.ignore(4+bgen_header_length-20);
-	bgen_snp_block_offset-=4+bgen_header_length-20;
-	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
-	bgen_snp_block_offset-=4;
-	bool CompressedSNPBlocks=bgen_flags&0x1;
-
-	infile.ignore(bgen_snp_block_offset);
-
-	double bgen_geno_prob_AA, bgen_geno_prob_AB;
-	double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
-	uint32_t bgen_N;
-	uint16_t bgen_LS;
-	uint16_t bgen_LR;
-	uint16_t bgen_LC;
-	uint32_t bgen_SNP_pos;
-	uint32_t bgen_LA;
-	std::string bgen_A_allele;
-	uint32_t bgen_LB;
-	std::string bgen_B_allele;
-	uint32_t bgen_P;
-	size_t unzipped_data_size;
-	string id;
-	string rs;
-	string chr;
-	double genotype;
-
-	size_t n_miss;
-	double d, geno_mean, geno_var;
-
-	size_t ni_total=matrix_kin->size1;
-	gsl_vector *geno=gsl_vector_alloc (ni_total);
-	gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
-
-	size_t ns_test=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
-		  ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);
-		}
-
-		id.clear();
-		rs.clear();
-		chr.clear();
-		bgen_A_allele.clear();
-		bgen_B_allele.clear();
-
-		infile.read(reinterpret_cast<char*>(&bgen_N),4);
-		infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
-		id.resize(bgen_LS);
-		infile.read(&id[0], bgen_LS);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
-		rs.resize(bgen_LR);
-		infile.read(&rs[0], bgen_LR);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
-		chr.resize(bgen_LC);
-		infile.read(&chr[0], bgen_LC);
-
-		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
-		bgen_A_allele.resize(bgen_LA);
-		infile.read(&bgen_A_allele[0], bgen_LA);
-
-
-		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
-		bgen_B_allele.resize(bgen_LB);
-		infile.read(&bgen_B_allele[0], bgen_LB);
-
-		uint16_t unzipped_data[3*bgen_N];
-
-		if (indicator_snp[t]==0) {
-			if(CompressedSNPBlocks)
-			  infile.read(reinterpret_cast<char*>(&bgen_P),4);
-			else
-			  bgen_P=6*bgen_N;
-
-			infile.ignore(static_cast<size_t>(bgen_P));
-
-			continue;
-		}
-
-		if(CompressedSNPBlocks)
-		{
-		  infile.read(reinterpret_cast<char*>(&bgen_P),4);
-		  uint8_t zipped_data[bgen_P];
-
-		  unzipped_data_size=6*bgen_N;
-
-		  infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
-
-		  int result=
-		    uncompress(reinterpret_cast<Bytef*>(unzipped_data),
-			       reinterpret_cast<uLongf*>(&unzipped_data_size),
-			       reinterpret_cast<Bytef*>(zipped_data),
-			       static_cast<uLong> (bgen_P));
-			assert(result == Z_OK);
-
-		}
-		else
-		{
-
-		  bgen_P=6*bgen_N;
-		  infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
-		}
-
-		geno_mean=0.0; n_miss=0; geno_var=0.0;
-		gsl_vector_set_all(geno_miss, 0);
-
-		for (size_t i=0; i<bgen_N; ++i) {
+bool bgenKin(const string &file_oxford, vector<int> &indicator_snp,
+             const int k_mode, const int display_pace, gsl_matrix *matrix_kin) {
+  string file_bgen = file_oxford;
+  ifstream infile(file_bgen.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bgen file:" << file_bgen << endl;
+    return false;
+  }
+
+  // Read in header.
+  uint32_t bgen_snp_block_offset;
+  uint32_t bgen_header_length;
+  uint32_t bgen_nsamples;
+  uint32_t bgen_nsnps;
+  uint32_t bgen_flags;
+  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+  bgen_snp_block_offset -= 4;
+  infile.ignore(4 + bgen_header_length - 20);
+  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+  bgen_snp_block_offset -= 4;
+  bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+  infile.ignore(bgen_snp_block_offset);
+
+  double bgen_geno_prob_AA, bgen_geno_prob_AB;
+  double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+  uint32_t bgen_N;
+  uint16_t bgen_LS;
+  uint16_t bgen_LR;
+  uint16_t bgen_LC;
+  uint32_t bgen_SNP_pos;
+  uint32_t bgen_LA;
+  std::string bgen_A_allele;
+  uint32_t bgen_LB;
+  std::string bgen_B_allele;
+  uint32_t bgen_P;
+  size_t unzipped_data_size;
+  string id;
+  string rs;
+  string chr;
+  double genotype;
+
+  size_t n_miss;
+  double d, geno_mean, geno_var;
+
+  size_t ni_total = matrix_kin->size1;
+  gsl_vector *geno = gsl_vector_alloc(ni_total);
+  gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
+
+  size_t ns_test = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+
+    id.clear();
+    rs.clear();
+    chr.clear();
+    bgen_A_allele.clear();
+    bgen_B_allele.clear();
+
+    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+    id.resize(bgen_LS);
+    infile.read(&id[0], bgen_LS);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+    rs.resize(bgen_LR);
+    infile.read(&rs[0], bgen_LR);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+    chr.resize(bgen_LC);
+    infile.read(&chr[0], bgen_LC);
+
+    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+    bgen_A_allele.resize(bgen_LA);
+    infile.read(&bgen_A_allele[0], bgen_LA);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+    bgen_B_allele.resize(bgen_LB);
+    infile.read(&bgen_B_allele[0], bgen_LB);
+
+    uint16_t unzipped_data[3 * bgen_N];
+
+    if (indicator_snp[t] == 0) {
+      if (CompressedSNPBlocks)
+        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      else
+        bgen_P = 6 * bgen_N;
+
+      infile.ignore(static_cast<size_t>(bgen_P));
+
+      continue;
+    }
 
+    if (CompressedSNPBlocks) {
+      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      uint8_t zipped_data[bgen_P];
 
-		  bgen_geno_prob_AA=
-		    static_cast<double>(unzipped_data[i*3])/32768.0;
-		  bgen_geno_prob_AB=
-		    static_cast<double>(unzipped_data[i*3+1])/32768.0;
-		  bgen_geno_prob_BB=
-		    static_cast<double>(unzipped_data[i*3+2])/32768.0;
-		  // WJA
-		  bgen_geno_prob_non_miss=bgen_geno_prob_AA +
-		    bgen_geno_prob_AB+bgen_geno_prob_BB;
-		  if (bgen_geno_prob_non_miss<0.9) {
-		    gsl_vector_set(geno_miss, i, 0.0);
-		    n_miss++;
-		  }
-		  else {
+      unzipped_data_size = 6 * bgen_N;
 
-		    bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
-		    bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
-		    bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
 
-		    genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+                              reinterpret_cast<uLongf *>(&unzipped_data_size),
+                              reinterpret_cast<Bytef *>(zipped_data),
+                              static_cast<uLong>(bgen_P));
+      assert(result == Z_OK);
 
-		    gsl_vector_set(geno, i, genotype);
-		    gsl_vector_set(geno_miss, i, 1.0);
-		    geno_mean+=genotype;
-		    geno_var+=genotype*genotype;
-		  }
+    } else {
 
-		}
+      bgen_P = 6 * bgen_N;
+      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+    }
 
+    geno_mean = 0.0;
+    n_miss = 0;
+    geno_var = 0.0;
+    gsl_vector_set_all(geno_miss, 0);
+
+    for (size_t i = 0; i < bgen_N; ++i) {
+
+      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+      bgen_geno_prob_AB =
+          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+      bgen_geno_prob_BB =
+          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+      // WJA
+      bgen_geno_prob_non_miss =
+          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+      if (bgen_geno_prob_non_miss < 0.9) {
+        gsl_vector_set(geno_miss, i, 0.0);
+        n_miss++;
+      } else {
 
-		geno_mean/=(double)(ni_total-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_total;
-		geno_var-=geno_mean*geno_mean;
+        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
 
-		for (size_t i=0; i<ni_total; ++i) {
-		  if (gsl_vector_get (geno_miss, i)==0) {
-		    gsl_vector_set(geno, i, geno_mean);
-		  }
-		}
+        genotype = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
 
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
+        gsl_vector_set(geno, i, genotype);
+        gsl_vector_set(geno_miss, i, 1.0);
+        geno_mean += genotype;
+        geno_var += genotype * genotype;
+      }
+    }
+
+    geno_mean /= (double)(ni_total - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_total;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_total; ++i) {
+      if (gsl_vector_get(geno_miss, i) == 0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
 
-		if (geno_var!=0) {
-		  if (k_mode==1) {
-		    gsl_blas_dsyr(CblasUpper,1.0,geno,matrix_kin);
-		  } else if (k_mode==2) {
-		    gsl_blas_dsyr(CblasUpper,1.0/geno_var,geno,matrix_kin);
-		  }
-		  else {
-		    cout<<"Unknown kinship mode."<<endl;
-		  }
-		}
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
 
-		ns_test++;
+    if (geno_var != 0) {
+      if (k_mode == 1) {
+        gsl_blas_dsyr(CblasUpper, 1.0, geno, matrix_kin);
+      } else if (k_mode == 2) {
+        gsl_blas_dsyr(CblasUpper, 1.0 / geno_var, geno, matrix_kin);
+      } else {
+        cout << "Unknown kinship mode." << endl;
+      }
     }
-	cout<<endl;
 
-	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
+    ns_test++;
+  }
+  cout << endl;
 
-	for (size_t i=0; i<ni_total; ++i) {
-		for (size_t j=0; j<i; ++j) {
-			d=gsl_matrix_get (matrix_kin, j, i);
-			gsl_matrix_set (matrix_kin, i, j, d);
-		}
-	}
+  gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
 
-	gsl_vector_free (geno);
-	gsl_vector_free (geno_miss);
+  for (size_t i = 0; i < ni_total; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      d = gsl_matrix_get(matrix_kin, j, i);
+      gsl_matrix_set(matrix_kin, i, j, d);
+    }
+  }
 
-	infile.close();
-	infile.clear();
+  gsl_vector_free(geno);
+  gsl_vector_free(geno_miss);
 
-	return true;
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read header to determine which column contains which item.
-bool ReadHeader_io (const string &line, HEADER &header)
-{
-  string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID",
-		   "rsid","RSID","MarkerName"};
-  set<string> rs_set(rs_ptr, rs_ptr+11);
-  string chr_ptr[]={"chr","CHR"};
-  set<string> chr_set(chr_ptr, chr_ptr+2);
-  string pos_ptr[]={"ps","PS","pos","POS","base_position",
-		    "BASE_POSITION", "bp", "BP"};
-  set<string> pos_set(pos_ptr, pos_ptr+8);
-  string cm_ptr[]={"cm","CM"};
-  set<string> cm_set(cm_ptr, cm_ptr+2);
-  string a1_ptr[]={"a1","A1","allele1","ALLELE1","Allele1","INC_ALLELE"};
-  set<string> a1_set(a1_ptr, a1_ptr+5);
-  string a0_ptr[]={"a0","A0","allele0","ALLELE0","Allele0","a2","A2",
-		   "allele2","ALLELE2","Allele2","DEC_ALLELE"};
-  set<string> a0_set(a0_ptr, a0_ptr+10);
-
-  string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
-  set<string> z_set(z_ptr, z_ptr+6);
-  string beta_ptr[]={"beta","BETA","b","B"};
-  set<string> beta_set(beta_ptr, beta_ptr+4);
-  string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"};
-  set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4);
-  string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"};
-  set<string> chisq_set(chisq_ptr, chisq_ptr+4);
-  string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"};
-  set<string> p_set(p_ptr, p_ptr+6);
-
-  string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"};
-  set<string> n_set(n_ptr, n_ptr+6);
-  string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"};
-  set<string> nmis_set(nmis_ptr, nmis_ptr+6);
-  string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"};
-  set<string> nobs_set(nobs_ptr, nobs_ptr+4);
-  string ncase_ptr[]={"ncase","NCASE","n_case","N_CASE"};
-  set<string> ncase_set(ncase_ptr, ncase_ptr+4);
-  string ncontrol_ptr[]={"ncontrol","NCONTROL","n_control","N_CONTROL"};
-  set<string> ncontrol_set(ncontrol_ptr, ncontrol_ptr+4);
-
-  string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq",
-		   "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY",
-		   "Freq.Allele1.HapMapCEU","FreqAllele1HapMapCEU",
-		   "Freq1.Hapmap"};
-  set<string> af_set(af_ptr, af_ptr+13);
-  string var_ptr[]={"var","VAR"};
-  set<string> var_set(var_ptr, var_ptr+2);
-
-  string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"};
-  set<string> ws_set(ws_ptr, ws_ptr+4);
-  string cor_ptr[]={"cor","COR","r","R"};
-  set<string> cor_set(cor_ptr, cor_ptr+4);
-
-  header.rs_col=0; header.chr_col=0; header.pos_col=0;
-  header.cm_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0;
-  header.beta_col=0; header.sebeta_col=0; header.chisq_col=0;
-  header.p_col=0; header.n_col=0; header.nmis_col=0;
-  header.nobs_col=0; header.ncase_col=0; header.ncontrol_col=0;
-  header.af_col=0; header.var_col=0; header.ws_col=0;
-  header.cor_col=0; header.coln=0;
+bool ReadHeader_io(const string &line, HEADER &header) {
+  string rs_ptr[] = {"rs",    "RS",    "snp",  "SNP",  "snps",      "SNPS",
+                     "snpid", "SNPID", "rsid", "RSID", "MarkerName"};
+  set<string> rs_set(rs_ptr, rs_ptr + 11);
+  string chr_ptr[] = {"chr", "CHR"};
+  set<string> chr_set(chr_ptr, chr_ptr + 2);
+  string pos_ptr[] = {
+      "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"};
+  set<string> pos_set(pos_ptr, pos_ptr + 8);
+  string cm_ptr[] = {"cm", "CM"};
+  set<string> cm_set(cm_ptr, cm_ptr + 2);
+  string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1", "Allele1", "INC_ALLELE"};
+  set<string> a1_set(a1_ptr, a1_ptr + 5);
+  string a0_ptr[] = {"a0", "A0",      "allele0", "ALLELE0", "Allele0",   "a2",
+                     "A2", "allele2", "ALLELE2", "Allele2", "DEC_ALLELE"};
+  set<string> a0_set(a0_ptr, a0_ptr + 10);
+
+  string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"};
+  set<string> z_set(z_ptr, z_ptr + 6);
+  string beta_ptr[] = {"beta", "BETA", "b", "B"};
+  set<string> beta_set(beta_ptr, beta_ptr + 4);
+  string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"};
+  set<string> sebeta_set(sebeta_ptr, sebeta_ptr + 4);
+  string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"};
+  set<string> chisq_set(chisq_ptr, chisq_ptr + 4);
+  string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"};
+  set<string> p_set(p_ptr, p_ptr + 6);
+
+  string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"};
+  set<string> n_set(n_ptr, n_ptr + 6);
+  string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"};
+  set<string> nmis_set(nmis_ptr, nmis_ptr + 6);
+  string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"};
+  set<string> nobs_set(nobs_ptr, nobs_ptr + 4);
+  string ncase_ptr[] = {"ncase", "NCASE", "n_case", "N_CASE"};
+  set<string> ncase_set(ncase_ptr, ncase_ptr + 4);
+  string ncontrol_ptr[] = {"ncontrol", "NCONTROL", "n_control", "N_CONTROL"};
+  set<string> ncontrol_set(ncontrol_ptr, ncontrol_ptr + 4);
+
+  string af_ptr[] = {"af",
+                     "AF",
+                     "maf",
+                     "MAF",
+                     "f",
+                     "F",
+                     "allele_freq",
+                     "ALLELE_FREQ",
+                     "allele_frequency",
+                     "ALLELE_FREQUENCY",
+                     "Freq.Allele1.HapMapCEU",
+                     "FreqAllele1HapMapCEU",
+                     "Freq1.Hapmap"};
+  set<string> af_set(af_ptr, af_ptr + 13);
+  string var_ptr[] = {"var", "VAR"};
+  set<string> var_set(var_ptr, var_ptr + 2);
+
+  string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"};
+  set<string> ws_set(ws_ptr, ws_ptr + 4);
+  string cor_ptr[] = {"cor", "COR", "r", "R"};
+  set<string> cor_set(cor_ptr, cor_ptr + 4);
+
+  header.rs_col = 0;
+  header.chr_col = 0;
+  header.pos_col = 0;
+  header.cm_col = 0;
+  header.a1_col = 0;
+  header.a0_col = 0;
+  header.z_col = 0;
+  header.beta_col = 0;
+  header.sebeta_col = 0;
+  header.chisq_col = 0;
+  header.p_col = 0;
+  header.n_col = 0;
+  header.nmis_col = 0;
+  header.nobs_col = 0;
+  header.ncase_col = 0;
+  header.ncontrol_col = 0;
+  header.af_col = 0;
+  header.var_col = 0;
+  header.ws_col = 0;
+  header.cor_col = 0;
+  header.coln = 0;
 
   char *ch_ptr;
   string type;
-  size_t n_error=0;
-
-  ch_ptr=strtok ((char *)line.c_str(), " , \t");
-  while (ch_ptr!=NULL) {
-    type=ch_ptr;
-    if (rs_set.count(type)!=0) {
-      if (header.rs_col==0) {
-	header.rs_col=header.coln+1;
+  size_t n_error = 0;
+
+  ch_ptr = strtok((char *)line.c_str(), " , \t");
+  while (ch_ptr != NULL) {
+    type = ch_ptr;
+    if (rs_set.count(type) != 0) {
+      if (header.rs_col == 0) {
+        header.rs_col = header.coln + 1;
       } else {
-	cout<<"error! more than two rs columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two rs columns in the file." << endl;
+        n_error++;
       }
-    } else if (chr_set.count(type)!=0) {
-      if (header.chr_col==0) {
-	header.chr_col=header.coln+1;
+    } else if (chr_set.count(type) != 0) {
+      if (header.chr_col == 0) {
+        header.chr_col = header.coln + 1;
       } else {
-	cout<<"error! more than two chr columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two chr columns in the file." << endl;
+        n_error++;
       }
-    } else if (pos_set.count(type)!=0) {
-      if (header.pos_col==0) {
-	header.pos_col=header.coln+1;
+    } else if (pos_set.count(type) != 0) {
+      if (header.pos_col == 0) {
+        header.pos_col = header.coln + 1;
       } else {
-	cout<<"error! more than two pos columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two pos columns in the file." << endl;
+        n_error++;
       }
-    } else if (cm_set.count(type)!=0) {
-      if (header.cm_col==0) {
-	header.cm_col=header.coln+1;
+    } else if (cm_set.count(type) != 0) {
+      if (header.cm_col == 0) {
+        header.cm_col = header.coln + 1;
       } else {
-	cout<<"error! more than two cm columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two cm columns in the file." << endl;
+        n_error++;
       }
-    } else if (a1_set.count(type)!=0) {
-      if (header.a1_col==0) {
-	header.a1_col=header.coln+1;
+    } else if (a1_set.count(type) != 0) {
+      if (header.a1_col == 0) {
+        header.a1_col = header.coln + 1;
       } else {
-	cout<<"error! more than two allele1 columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two allele1 columns in the file." << endl;
+        n_error++;
       }
-    } else if (a0_set.count(type)!=0) {
-      if (header.a0_col==0) {
-	header.a0_col=header.coln+1;
+    } else if (a0_set.count(type) != 0) {
+      if (header.a0_col == 0) {
+        header.a0_col = header.coln + 1;
       } else {
-	cout<<"error! more than two allele0 columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two allele0 columns in the file." << endl;
+        n_error++;
       }
-    } else if (z_set.count(type)!=0) {
-      if (header.z_col==0) {
-	header.z_col=header.coln+1;
+    } else if (z_set.count(type) != 0) {
+      if (header.z_col == 0) {
+        header.z_col = header.coln + 1;
       } else {
-	cout<<"error! more than two z columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two z columns in the file." << endl;
+        n_error++;
       }
-    } else if (beta_set.count(type)!=0) {
-      if (header.beta_col==0) {
-	header.beta_col=header.coln+1;
+    } else if (beta_set.count(type) != 0) {
+      if (header.beta_col == 0) {
+        header.beta_col = header.coln + 1;
       } else {
-	cout<<"error! more than two beta columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two beta columns in the file." << endl;
+        n_error++;
       }
-    } else if (sebeta_set.count(type)!=0) {
-      if (header.sebeta_col==0) {
-	header.sebeta_col=header.coln+1;
+    } else if (sebeta_set.count(type) != 0) {
+      if (header.sebeta_col == 0) {
+        header.sebeta_col = header.coln + 1;
       } else {
-	cout<<"error! more than two se_beta columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two se_beta columns in the file." << endl;
+        n_error++;
       }
-    } else if (chisq_set.count(type)!=0) {
-      if (header.chisq_col==0) {
-	header.chisq_col=header.coln+1;
+    } else if (chisq_set.count(type) != 0) {
+      if (header.chisq_col == 0) {
+        header.chisq_col = header.coln + 1;
       } else {
-	cout<<"error! more than two z columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two z columns in the file." << endl;
+        n_error++;
       }
-    } else if (p_set.count(type)!=0) {
-      if (header.p_col==0) {
-	header.p_col=header.coln+1;
+    } else if (p_set.count(type) != 0) {
+      if (header.p_col == 0) {
+        header.p_col = header.coln + 1;
       } else {
-	cout<<"error! more than two p columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two p columns in the file." << endl;
+        n_error++;
       }
-    } else if (n_set.count(type)!=0) {
-      if (header.n_col==0) {
-	header.n_col=header.coln+1;
+    } else if (n_set.count(type) != 0) {
+      if (header.n_col == 0) {
+        header.n_col = header.coln + 1;
       } else {
-	cout<<"error! more than two n_total columns in the file."<<endl;
-	n_error++;}
-    } else if (nmis_set.count(type)!=0) {
-      if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else {
-	cout<<"error! more than two n_mis columns in the file."<<endl;
-	n_error++;
-      }
-    } else if (nobs_set.count(type)!=0) {
-      if (header.nobs_col==0) {
-	header.nobs_col=header.coln+1;
+        cout << "error! more than two n_total columns in the file." << endl;
+        n_error++;
+      }
+    } else if (nmis_set.count(type) != 0) {
+      if (header.nmis_col == 0) {
+        header.nmis_col = header.coln + 1;
+      } else {
+        cout << "error! more than two n_mis columns in the file." << endl;
+        n_error++;
+      }
+    } else if (nobs_set.count(type) != 0) {
+      if (header.nobs_col == 0) {
+        header.nobs_col = header.coln + 1;
       } else {
-	cout<<"error! more than two n_obs columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two n_obs columns in the file." << endl;
+        n_error++;
       }
-    } else if (ncase_set.count(type)!=0) {
-      if (header.ncase_col==0) {
-	header.ncase_col=header.coln+1;
+    } else if (ncase_set.count(type) != 0) {
+      if (header.ncase_col == 0) {
+        header.ncase_col = header.coln + 1;
       } else {
-	cout<<"error! more than two n_case columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two n_case columns in the file." << endl;
+        n_error++;
       }
-    } else if (ncontrol_set.count(type)!=0) {
-      if (header.ncontrol_col==0) {
-	header.ncontrol_col=header.coln+1;
+    } else if (ncontrol_set.count(type) != 0) {
+      if (header.ncontrol_col == 0) {
+        header.ncontrol_col = header.coln + 1;
       } else {
-	cout<<"error! more than two n_control columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two n_control columns in the file." << endl;
+        n_error++;
       }
-    } else if (ws_set.count(type)!=0) {
-      if (header.ws_col==0) {
-	header.ws_col=header.coln+1;
+    } else if (ws_set.count(type) != 0) {
+      if (header.ws_col == 0) {
+        header.ws_col = header.coln + 1;
       } else {
-	cout<<"error! more than two window_size columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two window_size columns in the file." << endl;
+        n_error++;
       }
-    } else if (af_set.count(type)!=0) {
-      if (header.af_col==0) {
-	header.af_col=header.coln+1;
+    } else if (af_set.count(type) != 0) {
+      if (header.af_col == 0) {
+        header.af_col = header.coln + 1;
       } else {
-	cout<<"error! more than two af columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two af columns in the file." << endl;
+        n_error++;
       }
-    } else if (cor_set.count(type)!=0) {
-      if (header.cor_col==0) {
-	header.cor_col=header.coln+1;
+    } else if (cor_set.count(type) != 0) {
+      if (header.cor_col == 0) {
+        header.cor_col = header.coln + 1;
       } else {
-	cout<<"error! more than two cor columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two cor columns in the file." << endl;
+        n_error++;
       }
     } else {
       string str = ch_ptr;
-      string cat = str.substr(str.size()-2, 2);
+      string cat = str.substr(str.size() - 2, 2);
 
-      if(cat == "_c" || cat =="_C"){
+      if (cat == "_c" || cat == "_C") {
 
         // continuous
-	header.catc_col.insert(header.coln+1);
+        header.catc_col.insert(header.coln + 1);
       } else {
 
-	// discrete
-	header.catd_col.insert(header.coln+1);
+        // discrete
+        header.catd_col.insert(header.coln + 1);
       }
     }
 
-    ch_ptr=strtok (NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
     header.coln++;
   }
 
-  if (header.cor_col!=0 && header.cor_col!=header.coln) {
-    cout<<"error! the cor column should be the last column."<<endl;
+  if (header.cor_col != 0 && header.cor_col != header.coln) {
+    cout << "error! the cor column should be the last column." << endl;
     n_error++;
   }
 
-  if (header.rs_col==0) {
-    if (header.chr_col!=0 && header.pos_col!=0) {
-      cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl;
+  if (header.rs_col == 0) {
+    if (header.chr_col != 0 && header.pos_col != 0) {
+      cout << "missing an rs column. rs id will be replaced by chr:pos" << endl;
     } else {
-      cout<<"error! missing an rs column."<<endl; n_error++;
+      cout << "error! missing an rs column." << endl;
+      n_error++;
     }
   }
 
-  if (n_error==0) {
+  if (n_error == 0) {
     return true;
   } else {
     return false;
@@ -3026,13 +3210,13 @@ bool ReadHeader_io (const string &line, HEADER &header)
 // Read category file, record mapRS2 in the category file does not
 // contain a null category so if a snp has 0 for all categories, then
 // it is not included in the analysis.
-bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
-		   size_t &n_vc) {
+bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
+                  size_t &n_vc) {
   mapRS2cat.clear();
 
-  igzstream infile (file_cat.c_str(), igzstream::in);
+  igzstream infile(file_cat.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open category file: "<<file_cat<<endl;
+    cout << "error! fail to open category file: " << file_cat << endl;
     return false;
   }
 
@@ -3045,47 +3229,62 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
   // Read header.
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader_io(line, header);
 
   // Use the header to count the number of categories.
-  n_vc=header.coln;
-  if (header.rs_col!=0) {n_vc--;}
-  if (header.chr_col!=0) {n_vc--;}
-  if (header.pos_col!=0) {n_vc--;}
-  if (header.cm_col!=0) {n_vc--;}
-  if (header.a1_col!=0) {n_vc--;}
-  if (header.a0_col!=0) {n_vc--;}
+  n_vc = header.coln;
+  if (header.rs_col != 0) {
+    n_vc--;
+  }
+  if (header.chr_col != 0) {
+    n_vc--;
+  }
+  if (header.pos_col != 0) {
+    n_vc--;
+  }
+  if (header.cm_col != 0) {
+    n_vc--;
+  }
+  if (header.a1_col != 0) {
+    n_vc--;
+  }
+  if (header.a0_col != 0) {
+    n_vc--;
+  }
 
   // Read the following lines to record mapRS2cat.
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
-    i_cat=0;
-    for (size_t i=0; i<header.coln; i++) {
-      if (header.rs_col!=0 && header.rs_col==i+1) {
-	rs=ch_ptr;
-      } else if (header.chr_col!=0 && header.chr_col==i+1) {
-	chr=ch_ptr;
-      } else if (header.pos_col!=0 && header.pos_col==i+1) {
-	pos=ch_ptr;
-      } else if (header.cm_col!=0 && header.cm_col==i+1) {
-	cm=ch_ptr;
-      } else if (header.a1_col!=0 && header.a1_col==i+1) {
-	a1=ch_ptr;
-      } else if (header.a0_col!=0 && header.a0_col==i+1) {
-	a0=ch_ptr;
-      } else if (atoi(ch_ptr)==1 || atoi(ch_ptr)==0) {
-	if (i_cat==0) {
-	  if (header.rs_col==0) {
-	    rs=chr+":"+pos;
-	  }
-	}
-
-	if (atoi(ch_ptr)==1 && mapRS2cat.count(rs)==0) {mapRS2cat[rs]=i_cat;}
-	i_cat++;
-      } else {}
-
-      ch_ptr=strtok (NULL, " , \t");
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+    i_cat = 0;
+    for (size_t i = 0; i < header.coln; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      } else if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      } else if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+      } else if (header.cm_col != 0 && header.cm_col == i + 1) {
+        cm = ch_ptr;
+      } else if (header.a1_col != 0 && header.a1_col == i + 1) {
+        a1 = ch_ptr;
+      } else if (header.a0_col != 0 && header.a0_col == i + 1) {
+        a0 = ch_ptr;
+      } else if (atoi(ch_ptr) == 1 || atoi(ch_ptr) == 0) {
+        if (i_cat == 0) {
+          if (header.rs_col == 0) {
+            rs = chr + ":" + pos;
+          }
+        }
+
+        if (atoi(ch_ptr) == 1 && mapRS2cat.count(rs) == 0) {
+          mapRS2cat[rs] = i_cat;
+        }
+        i_cat++;
+      } else {
+      }
+
+      ch_ptr = strtok(NULL, " , \t");
     }
   }
 
@@ -3095,25 +3294,29 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
   return true;
 }
 
-bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat,
-		    size_t &n_vc) {
+bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat,
+                   size_t &n_vc) {
   mapRS2cat.clear();
 
-  igzstream infile (file_mcat.c_str(), igzstream::in);
+  igzstream infile(file_mcat.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open mcategory file: "<<file_mcat<<endl;
+    cout << "error! fail to open mcategory file: " << file_mcat << endl;
     return false;
   }
 
   string file_name;
   map<string, size_t> mapRS2cat_tmp;
-  size_t n_vc_tmp, t=0;
+  size_t n_vc_tmp, t = 0;
 
   while (!safeGetline(infile, file_name).eof()) {
     mapRS2cat_tmp.clear();
-    ReadFile_cat (file_name, mapRS2cat_tmp, n_vc_tmp);
+    ReadFile_cat(file_name, mapRS2cat_tmp, n_vc_tmp);
     mapRS2cat.insert(mapRS2cat_tmp.begin(), mapRS2cat_tmp.end());
-    if (t==0) {n_vc=n_vc_tmp;} else {n_vc=max(n_vc, n_vc_tmp);}
+    if (t == 0) {
+      n_vc = n_vc_tmp;
+    } else {
+      n_vc = max(n_vc, n_vc_tmp);
+    }
     t++;
   }
 
@@ -3123,475 +3326,490 @@ bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat,
 // Read bimbam mean genotype file and calculate kinship matrix; this
 // time, the kinship matrix is not centered, and can contain multiple
 // K matrix.
-bool BimbamKin (const string &file_geno, const int display_pace,
-		const vector<int> &indicator_idv,
-		const vector<int> &indicator_snp,
-		const map<string, double> &mapRS2weight,
-		const map<string, size_t> &mapRS2cat,
-		const vector<SNPINFO> &snpInfo,
-		const gsl_matrix *W, gsl_matrix *matrix_kin,
-		gsl_vector *vector_ns) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	size_t n_miss;
-	double d, geno_mean, geno_var;
-
-	size_t ni_test=matrix_kin->size1;
-	gsl_vector *geno=gsl_vector_alloc (ni_test);
-	gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
-
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	size_t n_vc=matrix_kin->size2/ni_test, i_vc;
-	string rs;
-	vector<size_t> ns_vec;
-	for (size_t i=0; i<n_vc; i++) {
-	  ns_vec.push_back(0);
-	}
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc);
-	gsl_matrix_set_zero(Xlarge);
-
-	size_t ns_test=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		!safeGetline(infile, line).eof();
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
-		  ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		rs=snpInfo[t].rs_number; // This line is new.
-
-		geno_mean=0.0; n_miss=0; geno_var=0.0;
-		gsl_vector_set_all(geno_miss, 0);
-
-		size_t j=0;
-		for (size_t i=0; i<indicator_idv.size(); ++i) {
-		  if (indicator_idv[i]==0) {continue;}
-			ch_ptr=strtok (NULL, " , \t");
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set(geno_miss, i, 0); n_miss++;
-			}
-			else {
-			  d=atof(ch_ptr);
-			  gsl_vector_set (geno, j, d);
-			  gsl_vector_set (geno_miss, j, 1);
-			  geno_mean+=d;
-			  geno_var+=d*d;
-			}
-			j++;
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_test;
-		geno_var-=geno_mean*geno_mean;
-
-		for (size_t i=0; i<ni_test; ++i) {
-		  if (gsl_vector_get (geno_miss, i)==0) {
-		    gsl_vector_set(geno, i, geno_mean);
-		  }
-		}
-
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
-
-		gsl_blas_dgemv (CblasTrans, 1.0, W, geno, 0.0, Wtx);
-		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
-		gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
-		gsl_blas_ddot (geno, geno, &geno_var);
-		geno_var/=(double)ni_test;
-
-		if (geno_var!=0 && (mapRS2weight.size()==0 ||
-				    mapRS2weight.count(rs)!=0)) {
-		  if (mapRS2weight.size()==0) {
-		    d=1.0/geno_var;
-		  } else {
-		    d=mapRS2weight.at(rs)/geno_var;
-		  }
-
-		  gsl_vector_scale (geno, sqrt(d));
-		  if (n_vc==1 || mapRS2cat.size()==0 ) {
-		    gsl_vector_view Xlarge_col=
-		      gsl_matrix_column(Xlarge,ns_vec[0]%msize);
-		    gsl_vector_memcpy (&Xlarge_col.vector, geno);
-		    ns_vec[0]++;
-
-		    if (ns_vec[0]%msize==0) {
-		      eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
-		      gsl_matrix_set_zero(Xlarge);
-		    }
-		  } else if (mapRS2cat.count(rs)!=0) {
-		    i_vc=mapRS2cat.at(rs);
-
-		    gsl_vector_view Xlarge_col=
-		      gsl_matrix_column(Xlarge,msize*i_vc+ns_vec[i_vc]%msize);
-		    gsl_vector_memcpy (&Xlarge_col.vector, geno);
-
-		    ns_vec[i_vc]++;
-
-		    if (ns_vec[i_vc]%msize==0) {
-		      gsl_matrix_view X_sub=
-			gsl_matrix_submatrix(Xlarge,0,msize*i_vc,
-					     ni_test,msize);
-		      gsl_matrix_view kin_sub=
-			gsl_matrix_submatrix(matrix_kin,0,ni_test*i_vc,
-					     ni_test,ni_test);
-		      eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix,
-				      &X_sub.matrix, 1.0, &kin_sub.matrix);
-
-		      gsl_matrix_set_zero(&X_sub.matrix);
-		    }
-		  }
-
-		}
-		ns_test++;
-
-	}
-
-	for (size_t i_vc=0; i_vc<n_vc; i_vc++) {
-	  if (ns_vec[i_vc]%msize!=0) {
-	    gsl_matrix_view X_sub=
-	      gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
-	    gsl_matrix_view kin_sub=
-	      gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test,
-				   ni_test);
-	    eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix,
-			    1.0, &kin_sub.matrix);
-	  }
-	}
-
-	cout<<endl;
-
-	for (size_t t=0; t<n_vc; t++) {
-	  gsl_vector_set(vector_ns, t, ns_vec[t]);
-
-	  for (size_t i=0; i<ni_test; ++i) {
-	    for (size_t j=0; j<=i; ++j) {
-	      d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
-	      d/=(double)ns_vec[t];
-	      gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
-	      gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
-	    }
-	  }
-	}
-
-	gsl_vector_free (geno);
-	gsl_vector_free (geno_miss);
-
-	gsl_vector_free (Wtx);
-	gsl_matrix_free (WtW);
-	gsl_matrix_free (WtWi);
-	gsl_vector_free (WtWiWtx);
-	gsl_permutation_free (pmt);
-
-	gsl_matrix_free (Xlarge);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool BimbamKin(const string &file_geno, const int display_pace,
+               const vector<int> &indicator_idv,
+               const vector<int> &indicator_snp,
+               const map<string, double> &mapRS2weight,
+               const map<string, size_t> &mapRS2cat,
+               const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+               gsl_matrix *matrix_kin, gsl_vector *vector_ns) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  size_t n_miss;
+  double d, geno_mean, geno_var;
+
+  size_t ni_test = matrix_kin->size1;
+  gsl_vector *geno = gsl_vector_alloc(ni_test);
+  gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
+
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  size_t n_vc = matrix_kin->size2 / ni_test, i_vc;
+  string rs;
+  vector<size_t> ns_vec;
+  for (size_t i = 0; i < n_vc; i++) {
+    ns_vec.push_back(0);
+  }
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
+  gsl_matrix_set_zero(Xlarge);
+
+  size_t ns_test = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    rs = snpInfo[t].rs_number; // This line is new.
+
+    geno_mean = 0.0;
+    n_miss = 0;
+    geno_var = 0.0;
+    gsl_vector_set_all(geno_miss, 0);
+
+    size_t j = 0;
+    for (size_t i = 0; i < indicator_idv.size(); ++i) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+      ch_ptr = strtok(NULL, " , \t");
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(geno_miss, i, 0);
+        n_miss++;
+      } else {
+        d = atof(ch_ptr);
+        gsl_vector_set(geno, j, d);
+        gsl_vector_set(geno_miss, j, 1);
+        geno_mean += d;
+        geno_var += d * d;
+      }
+      j++;
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_test;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(geno_miss, i) == 0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    gsl_blas_dgemv(CblasTrans, 1.0, W, geno, 0.0, Wtx);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+    gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
+    gsl_blas_ddot(geno, geno, &geno_var);
+    geno_var /= (double)ni_test;
+
+    if (geno_var != 0 &&
+        (mapRS2weight.size() == 0 || mapRS2weight.count(rs) != 0)) {
+      if (mapRS2weight.size() == 0) {
+        d = 1.0 / geno_var;
+      } else {
+        d = mapRS2weight.at(rs) / geno_var;
+      }
+
+      gsl_vector_scale(geno, sqrt(d));
+      if (n_vc == 1 || mapRS2cat.size() == 0) {
+        gsl_vector_view Xlarge_col =
+            gsl_matrix_column(Xlarge, ns_vec[0] % msize);
+        gsl_vector_memcpy(&Xlarge_col.vector, geno);
+        ns_vec[0]++;
+
+        if (ns_vec[0] % msize == 0) {
+          eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+          gsl_matrix_set_zero(Xlarge);
+        }
+      } else if (mapRS2cat.count(rs) != 0) {
+        i_vc = mapRS2cat.at(rs);
+
+        gsl_vector_view Xlarge_col =
+            gsl_matrix_column(Xlarge, msize * i_vc + ns_vec[i_vc] % msize);
+        gsl_vector_memcpy(&Xlarge_col.vector, geno);
+
+        ns_vec[i_vc]++;
+
+        if (ns_vec[i_vc] % msize == 0) {
+          gsl_matrix_view X_sub =
+              gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
+          gsl_matrix_view kin_sub = gsl_matrix_submatrix(
+              matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
+          eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+                         &kin_sub.matrix);
+
+          gsl_matrix_set_zero(&X_sub.matrix);
+        }
+      }
+    }
+    ns_test++;
+  }
+
+  for (size_t i_vc = 0; i_vc < n_vc; i_vc++) {
+    if (ns_vec[i_vc] % msize != 0) {
+      gsl_matrix_view X_sub =
+          gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
+      gsl_matrix_view kin_sub =
+          gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
+      eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+                     &kin_sub.matrix);
+    }
+  }
+
+  cout << endl;
+
+  for (size_t t = 0; t < n_vc; t++) {
+    gsl_vector_set(vector_ns, t, ns_vec[t]);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      for (size_t j = 0; j <= i; ++j) {
+        d = gsl_matrix_get(matrix_kin, j, i + ni_test * t);
+        d /= (double)ns_vec[t];
+        gsl_matrix_set(matrix_kin, i, j + ni_test * t, d);
+        gsl_matrix_set(matrix_kin, j, i + ni_test * t, d);
+      }
+    }
+  }
+
+  gsl_vector_free(geno);
+  gsl_vector_free(geno_miss);
+
+  gsl_vector_free(Wtx);
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_vector_free(WtWiWtx);
+  gsl_permutation_free(pmt);
+
+  gsl_matrix_free(Xlarge);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
-bool PlinkKin (const string &file_bed, const int display_pace,
-	       const vector<int> &indicator_idv,
-	       const vector<int> &indicator_snp,
-	       const map<string, double> &mapRS2weight,
-	       const map<string, size_t> &mapRS2cat,
-	       const vector<SNPINFO> &snpInfo,
-	       const gsl_matrix *W, gsl_matrix *matrix_kin,
-	       gsl_vector *vector_ns) {
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return false;
-	}
-
-	char ch[1];
-	bitset<8> b;
-
-	size_t n_miss, ci_total, ci_test;
-	double d, geno_mean, geno_var;
-
-	size_t ni_test=matrix_kin->size1;
-	size_t ni_total=indicator_idv.size();
-	gsl_vector *geno=gsl_vector_alloc (ni_test);
-
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	size_t ns_test=0;
-	int n_bit;
-
-	size_t n_vc=matrix_kin->size2/ni_test, i_vc;
-	string rs;
-	vector<size_t> ns_vec;
-	for (size_t i=0; i<n_vc; i++) {
-	  ns_vec.push_back(0);
-	}
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc);
-	gsl_matrix_set_zero(Xlarge);
-
-	// Calculate n_bit and c, the number of bit for each SNP.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
-		  ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		// n_bit, and 3 is the number of magic numbers
-		infile.seekg(t*n_bit+3);
-
-		rs=snpInfo[t].rs_number; // This line is new.
-
-		// Read genotypes.
-		geno_mean=0.0;	n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
-		for (int i=0; i<n_bit; ++i) {
-		  infile.read(ch,1);
-		  b=ch[0];
-
-		  // Minor allele homozygous: 2.0; major: 0.0;
-		  for (size_t j=0; j<4; ++j) {
-		    if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
-		    if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
-		    if (b[2*j]==0) {
-		      if (b[2*j+1]==0) {
-			gsl_vector_set(geno, ci_test, 2.0);
-			geno_mean+=2.0; geno_var+=4.0;
-		      }
-		      else {
-			gsl_vector_set(geno, ci_test, 1.0);
-			geno_mean+=1.0;
-			geno_var+=1.0;
-		      }
-		    }
-		    else {
-		      if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
-		      else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
-		    }
-
-		    ci_test++;
-		    ci_total++;
-		  }
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_test;
-		geno_var-=geno_mean*geno_mean;
-
-		for (size_t i=0; i<ni_test; ++i) {
-			d=gsl_vector_get(geno,i);
-			if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
-		}
-
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
-
-		gsl_blas_dgemv (CblasTrans, 1.0, W, geno, 0.0, Wtx);
-		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
-		gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
-		gsl_blas_ddot (geno, geno, &geno_var);
-		geno_var/=(double)ni_test;
-
-		if (geno_var!=0 && (mapRS2weight.size()==0 ||
-				    mapRS2weight.count(rs)!=0)) {
-		  if (mapRS2weight.size()==0) {
-		    d=1.0/geno_var;
-		  } else {
-		    d=mapRS2weight.at(rs)/geno_var;
-		  }
-
-		  gsl_vector_scale (geno, sqrt(d));
-		  if (n_vc==1 || mapRS2cat.size()==0 ) {
-		    gsl_vector_view Xlarge_col=
-		      gsl_matrix_column (Xlarge, ns_vec[0]%msize);
-		    gsl_vector_memcpy (&Xlarge_col.vector, geno);
-		    ns_vec[0]++;
-
-		    if (ns_vec[0]%msize==0) {
-		      eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
-		      gsl_matrix_set_zero(Xlarge);
-		    }
-		  } else if (mapRS2cat.count(rs)!=0) {
-		    i_vc=mapRS2cat.at(rs);
-
-		    gsl_vector_view Xlarge_col=
-		      gsl_matrix_column(Xlarge,msize*i_vc+ns_vec[i_vc]%msize);
-		    gsl_vector_memcpy (&Xlarge_col.vector, geno);
-
-		    ns_vec[i_vc]++;
-
-		    if (ns_vec[i_vc]%msize==0) {
-		      gsl_matrix_view X_sub=
-			gsl_matrix_submatrix(Xlarge,0,msize*i_vc,ni_test,
-					     msize);
-		      gsl_matrix_view kin_sub=
-			gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc,
-					     ni_test, ni_test);
-		      eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix,
-				      &X_sub.matrix, 1.0, &kin_sub.matrix);
-
-		      gsl_matrix_set_zero(&X_sub.matrix);
-		    }
-		  }
-
-
-		}
-		ns_test++;
-	}
-
-	for (size_t i_vc=0; i_vc<n_vc; i_vc++) {
-	  if (ns_vec[i_vc]%msize!=0) {
-	    gsl_matrix_view X_sub=
-	      gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
-	    gsl_matrix_view kin_sub=
-	      gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc,
-				   ni_test, ni_test);
-	    eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix,
-			    1.0, &kin_sub.matrix);
-	  }
-	}
-
-	cout<<endl;
-
-	for (size_t t=0; t<n_vc; t++) {
-	  gsl_vector_set(vector_ns, t, ns_vec[t]);
-
-	  for (size_t i=0; i<ni_test; ++i) {
-	    for (size_t j=0; j<=i; ++j) {
-	      d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
-	      d/=(double)ns_vec[t];
-	      gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
-	      gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
-	    }
-	  }
-	}
-
-	gsl_vector_free (geno);
-
-	gsl_vector_free (Wtx);
-	gsl_matrix_free (WtW);
-	gsl_matrix_free (WtWi);
-	gsl_vector_free (WtWiWtx);
-	gsl_permutation_free (pmt);
-
-	gsl_matrix_free (Xlarge);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool PlinkKin(const string &file_bed, const int display_pace,
+              const vector<int> &indicator_idv,
+              const vector<int> &indicator_snp,
+              const map<string, double> &mapRS2weight,
+              const map<string, size_t> &mapRS2cat,
+              const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+              gsl_matrix *matrix_kin, gsl_vector *vector_ns) {
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return false;
+  }
+
+  char ch[1];
+  bitset<8> b;
+
+  size_t n_miss, ci_total, ci_test;
+  double d, geno_mean, geno_var;
+
+  size_t ni_test = matrix_kin->size1;
+  size_t ni_total = indicator_idv.size();
+  gsl_vector *geno = gsl_vector_alloc(ni_test);
+
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  size_t ns_test = 0;
+  int n_bit;
+
+  size_t n_vc = matrix_kin->size2 / ni_test, i_vc;
+  string rs;
+  vector<size_t> ns_vec;
+  for (size_t i = 0; i < n_vc; i++) {
+    ns_vec.push_back(0);
+  }
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
+  gsl_matrix_set_zero(Xlarge);
+
+  // Calculate n_bit and c, the number of bit for each SNP.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers
+    infile.seekg(t * n_bit + 3);
+
+    rs = snpInfo[t].rs_number; // This line is new.
+
+    // Read genotypes.
+    geno_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    geno_var = 0.0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0;
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(geno, ci_test, 2.0);
+            geno_mean += 2.0;
+            geno_var += 4.0;
+          } else {
+            gsl_vector_set(geno, ci_test, 1.0);
+            geno_mean += 1.0;
+            geno_var += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(geno, ci_test, 0.0);
+          } else {
+            gsl_vector_set(geno, ci_test, -9.0);
+            n_miss++;
+          }
+        }
+
+        ci_test++;
+        ci_total++;
+      }
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_test;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      d = gsl_vector_get(geno, i);
+      if (d == -9.0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    gsl_blas_dgemv(CblasTrans, 1.0, W, geno, 0.0, Wtx);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+    gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
+    gsl_blas_ddot(geno, geno, &geno_var);
+    geno_var /= (double)ni_test;
+
+    if (geno_var != 0 &&
+        (mapRS2weight.size() == 0 || mapRS2weight.count(rs) != 0)) {
+      if (mapRS2weight.size() == 0) {
+        d = 1.0 / geno_var;
+      } else {
+        d = mapRS2weight.at(rs) / geno_var;
+      }
+
+      gsl_vector_scale(geno, sqrt(d));
+      if (n_vc == 1 || mapRS2cat.size() == 0) {
+        gsl_vector_view Xlarge_col =
+            gsl_matrix_column(Xlarge, ns_vec[0] % msize);
+        gsl_vector_memcpy(&Xlarge_col.vector, geno);
+        ns_vec[0]++;
+
+        if (ns_vec[0] % msize == 0) {
+          eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+          gsl_matrix_set_zero(Xlarge);
+        }
+      } else if (mapRS2cat.count(rs) != 0) {
+        i_vc = mapRS2cat.at(rs);
+
+        gsl_vector_view Xlarge_col =
+            gsl_matrix_column(Xlarge, msize * i_vc + ns_vec[i_vc] % msize);
+        gsl_vector_memcpy(&Xlarge_col.vector, geno);
+
+        ns_vec[i_vc]++;
+
+        if (ns_vec[i_vc] % msize == 0) {
+          gsl_matrix_view X_sub =
+              gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
+          gsl_matrix_view kin_sub = gsl_matrix_submatrix(
+              matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
+          eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+                         &kin_sub.matrix);
+
+          gsl_matrix_set_zero(&X_sub.matrix);
+        }
+      }
+    }
+    ns_test++;
+  }
+
+  for (size_t i_vc = 0; i_vc < n_vc; i_vc++) {
+    if (ns_vec[i_vc] % msize != 0) {
+      gsl_matrix_view X_sub =
+          gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
+      gsl_matrix_view kin_sub =
+          gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
+      eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+                     &kin_sub.matrix);
+    }
+  }
+
+  cout << endl;
+
+  for (size_t t = 0; t < n_vc; t++) {
+    gsl_vector_set(vector_ns, t, ns_vec[t]);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      for (size_t j = 0; j <= i; ++j) {
+        d = gsl_matrix_get(matrix_kin, j, i + ni_test * t);
+        d /= (double)ns_vec[t];
+        gsl_matrix_set(matrix_kin, i, j + ni_test * t, d);
+        gsl_matrix_set(matrix_kin, j, i + ni_test * t, d);
+      }
+    }
+  }
+
+  gsl_vector_free(geno);
+
+  gsl_vector_free(Wtx);
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_vector_free(WtWiWtx);
+  gsl_permutation_free(pmt);
+
+  gsl_matrix_free(Xlarge);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
-bool MFILEKin (const size_t mfile_mode, const string &file_mfile,
-	       const int display_pace, const vector<int> &indicator_idv,
-	       const vector<vector<int> > &mindicator_snp,
-	       const map<string, double> &mapRS2weight,
-	       const map<string, size_t> &mapRS2cat,
-	       const vector<vector<SNPINFO> > &msnpInfo,
-	       const gsl_matrix *W, gsl_matrix *matrix_kin,
-	       gsl_vector *vector_ns) {
-  size_t n_vc=vector_ns->size, ni_test=matrix_kin->size1;
+bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
+              const int display_pace, const vector<int> &indicator_idv,
+              const vector<vector<int>> &mindicator_snp,
+              const map<string, double> &mapRS2weight,
+              const map<string, size_t> &mapRS2cat,
+              const vector<vector<SNPINFO>> &msnpInfo, const gsl_matrix *W,
+              gsl_matrix *matrix_kin, gsl_vector *vector_ns) {
+  size_t n_vc = vector_ns->size, ni_test = matrix_kin->size1;
   gsl_matrix_set_zero(matrix_kin);
   gsl_vector_set_zero(vector_ns);
 
-  igzstream infile (file_mfile.c_str(), igzstream::in);
+  igzstream infile(file_mfile.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open mfile file: "<<file_mfile<<endl;
+    cout << "error! fail to open mfile file: " << file_mfile << endl;
     return false;
   }
 
   string file_name;
 
-  gsl_matrix *kin_tmp=gsl_matrix_alloc (matrix_kin->size1, matrix_kin->size2);
-  gsl_vector *ns_tmp=gsl_vector_alloc (vector_ns->size);
+  gsl_matrix *kin_tmp = gsl_matrix_alloc(matrix_kin->size1, matrix_kin->size2);
+  gsl_vector *ns_tmp = gsl_vector_alloc(vector_ns->size);
 
-  size_t l=0;
+  size_t l = 0;
   double d;
   while (!safeGetline(infile, file_name).eof()) {
     gsl_matrix_set_zero(kin_tmp);
     gsl_vector_set_zero(ns_tmp);
 
-    if (mfile_mode==1) {
-      file_name+=".bed";
-      PlinkKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
+    if (mfile_mode == 1) {
+      file_name += ".bed";
+      PlinkKin(file_name, display_pace, indicator_idv, mindicator_snp[l],
+               mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
     } else {
-      BimbamKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
+      BimbamKin(file_name, display_pace, indicator_idv, mindicator_snp[l],
+                mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
     }
 
     // Add ns.
     gsl_vector_add(vector_ns, ns_tmp);
 
     // Add kin.
-    for (size_t t=0; t<n_vc; t++) {
-      for (size_t i=0; i<ni_test; ++i) {
-	for (size_t j=0; j<=i; ++j) {
-	  d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)+gsl_matrix_get (kin_tmp, j, i+ni_test*t)*gsl_vector_get(ns_tmp, t);
-
-	  gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
-	  gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
-	}
+    for (size_t t = 0; t < n_vc; t++) {
+      for (size_t i = 0; i < ni_test; ++i) {
+        for (size_t j = 0; j <= i; ++j) {
+          d = gsl_matrix_get(matrix_kin, j, i + ni_test * t) +
+              gsl_matrix_get(kin_tmp, j, i + ni_test * t) *
+                  gsl_vector_get(ns_tmp, t);
+
+          gsl_matrix_set(matrix_kin, i, j + ni_test * t, d);
+          gsl_matrix_set(matrix_kin, j, i + ni_test * t, d);
+        }
       }
     }
     l++;
   }
 
   // Renormalize kin.
-  for (size_t t=0; t<n_vc; t++) {
-    for (size_t i=0; i<ni_test; ++i) {
-      for (size_t j=0; j<=i; ++j) {
-	d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)/
-	  gsl_vector_get(vector_ns, t);
-
-	gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
-	gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
-
+  for (size_t t = 0; t < n_vc; t++) {
+    for (size_t i = 0; i < ni_test; ++i) {
+      for (size_t j = 0; j <= i; ++j) {
+        d = gsl_matrix_get(matrix_kin, j, i + ni_test * t) /
+            gsl_vector_get(vector_ns, t);
+
+        gsl_matrix_set(matrix_kin, i, j + ni_test * t, d);
+        gsl_matrix_set(matrix_kin, j, i + ni_test * t, d);
       }
     }
   }
-  cout<<endl;
+  cout << endl;
 
   infile.close();
   infile.clear();
@@ -3602,15 +3820,13 @@ bool MFILEKin (const size_t mfile_mode, const string &file_mfile,
   return true;
 }
 
-
 // Read var file, store mapRS2wsnp.
-bool ReadFile_wsnp (const string &file_wsnp,
-		    map<string, double> &mapRS2weight) {
+bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2weight) {
   mapRS2weight.clear();
 
-  igzstream infile (file_wsnp.c_str(), igzstream::in);
+  igzstream infile(file_wsnp.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open snp weight file: "<<file_wsnp<<endl;
+    cout << "error! fail to open snp weight file: " << file_wsnp << endl;
     return false;
   }
 
@@ -3619,29 +3835,29 @@ bool ReadFile_wsnp (const string &file_wsnp,
   double weight;
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-    rs=ch_ptr;
-    ch_ptr=strtok (NULL, " , \t");
-    weight=atof(ch_ptr);
-    mapRS2weight[rs]=weight;
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
+    ch_ptr = strtok(NULL, " , \t");
+    weight = atof(ch_ptr);
+    mapRS2weight[rs] = weight;
   }
 
   return true;
 }
 
-bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
-		    map<string, vector<double> > &mapRS2wvector) {
+bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc,
+                   map<string, vector<double>> &mapRS2wvector) {
   mapRS2wvector.clear();
 
-  igzstream infile (file_wcat.c_str(), igzstream::in);
+  igzstream infile(file_wcat.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open snp weight file: "<<file_wcat<<endl;
+    cout << "error! fail to open snp weight file: " << file_wcat << endl;
     return false;
   }
 
   char *ch_ptr;
   vector<double> weight;
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     weight.push_back(0.0);
   }
 
@@ -3650,43 +3866,52 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
   // Read header.
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader_io(line, header);
 
   while (!safeGetline(infile, line).eof()) {
-    if (isBlankLine(line)) {continue;}
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
-    size_t t=0;
-    for (size_t i=0; i<header.coln; i++) {
-      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
-      else if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr; }
-      else if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr; }
-      else if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr; }
-      else if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr; }
-      else if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr; }
-      else {
-	weight[t]=atof(ch_ptr); t++;
-	if (t>n_vc) {
-	  cout<<"error! Number of columns in the wcat file does not "<<
-	    "match that of cat file.";
-	  return false;
-	}
-      }
-
-      ch_ptr=strtok (NULL, " , \t");
-    }
-
-    if (t!=n_vc) {
-      cout<<"error! Number of columns in the wcat file does not "<<
-	"match that of cat file.";
+    if (isBlankLine(line)) {
+      continue;
+    }
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+    size_t t = 0;
+    for (size_t i = 0; i < header.coln; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      } else if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      } else if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+      } else if (header.cm_col != 0 && header.cm_col == i + 1) {
+        cm = ch_ptr;
+      } else if (header.a1_col != 0 && header.a1_col == i + 1) {
+        a1 = ch_ptr;
+      } else if (header.a0_col != 0 && header.a0_col == i + 1) {
+        a0 = ch_ptr;
+      } else {
+        weight[t] = atof(ch_ptr);
+        t++;
+        if (t > n_vc) {
+          cout << "error! Number of columns in the wcat file does not "
+               << "match that of cat file.";
+          return false;
+        }
+      }
+
+      ch_ptr = strtok(NULL, " , \t");
+    }
+
+    if (t != n_vc) {
+      cout << "error! Number of columns in the wcat file does not "
+           << "match that of cat file.";
       return false;
     }
 
-    if (header.rs_col==0) {
-      rs=chr+":"+pos;
+    if (header.rs_col == 0) {
+      rs = chr + ":" + pos;
     }
 
-    mapRS2wvector[rs]=weight;
+    mapRS2wvector[rs] = weight;
   }
 
   return true;
@@ -3700,18 +3925,23 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
 // the beta file for the second time, compute q, and Vq based on block
 // jacknife use the mapRS2var to select snps (and to ), calculate q do
 // a block-wise jacknife, and compute Vq
-void ReadFile_beta (const string &file_beta,
-		    const map<string, size_t> &mapRS2cat,
-		    const map<string, double> &mapRS2wA,
-		    vector<size_t> &vec_cat, vector<size_t> &vec_ni,
-		    vector<double> &vec_weight, vector<double> &vec_z2,
-		    size_t &ni_total, size_t &ns_total, size_t &ns_test) {
-  vec_cat.clear(); vec_ni.clear(); vec_weight.clear(); vec_z2.clear();
-  ni_total=0; ns_total=0; ns_test=0;
-
-  igzstream infile (file_beta.c_str(), igzstream::in);
+void ReadFile_beta(const string &file_beta,
+                   const map<string, size_t> &mapRS2cat,
+                   const map<string, double> &mapRS2wA, vector<size_t> &vec_cat,
+                   vector<size_t> &vec_ni, vector<double> &vec_weight,
+                   vector<double> &vec_z2, size_t &ni_total, size_t &ns_total,
+                   size_t &ns_test) {
+  vec_cat.clear();
+  vec_ni.clear();
+  vec_weight.clear();
+  vec_z2.clear();
+  ni_total = 0;
+  ns_total = 0;
+  ns_test = 0;
+
+  igzstream infile(file_beta.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open beta file: "<<file_beta<<endl;
+    cout << "error! fail to open beta file: " << file_beta << endl;
     return;
   }
 
@@ -3720,110 +3950,158 @@ void ReadFile_beta (const string &file_beta,
   string type;
 
   string rs, chr, a1, a0, pos, cm;
-  double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
-  size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;
+  double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0,
+         af = 0, var_x = 0;
+  size_t n_total = 0, n_mis = 0, n_obs = 0, n_case = 0, n_control = 0;
 
   // Read header.
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader_io(line, header);
 
-  if (header.n_col==0 ) {
-    if ( (header.nobs_col==0 && header.nmis_col==0) &&
-	 (header.ncase_col==0 && header.ncontrol_col==0) ) {
-      cout<<"error! missing sample size in the beta file."<<endl;
+  if (header.n_col == 0) {
+    if ((header.nobs_col == 0 && header.nmis_col == 0) &&
+        (header.ncase_col == 0 && header.ncontrol_col == 0)) {
+      cout << "error! missing sample size in the beta file." << endl;
     } else {
-      cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+      cout << "total sample size will be replaced by obs/mis sample size."
+           << endl;
     }
   }
 
-  if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) &&
-      header.chisq_col==0 && header.p_col==0) {
-    cout<<"error! missing z scores in the beta file."<<endl;
+  if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0) &&
+      header.chisq_col == 0 && header.p_col == 0) {
+    cout << "error! missing z scores in the beta file." << endl;
   }
 
   while (!safeGetline(infile, line).eof()) {
-    if (isBlankLine(line)) {continue;}
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
-    z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
-    n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; af=0; var_x=0;
-    for (size_t i=0; i<header.coln; i++) {
-      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
-      if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
-      if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
-      if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
-      if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
-      if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+    if (isBlankLine(line)) {
+      continue;
+    }
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+    z = 0;
+    beta = 0;
+    se_beta = 0;
+    chisq = 0;
+    pvalue = 0;
+    n_total = 0;
+    n_mis = 0;
+    n_obs = 0;
+    n_case = 0;
+    n_control = 0;
+    af = 0;
+    var_x = 0;
+    for (size_t i = 0; i < header.coln; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      }
+      if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      }
+      if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+      }
+      if (header.cm_col != 0 && header.cm_col == i + 1) {
+        cm = ch_ptr;
+      }
+      if (header.a1_col != 0 && header.a1_col == i + 1) {
+        a1 = ch_ptr;
+      }
+      if (header.a0_col != 0 && header.a0_col == i + 1) {
+        a0 = ch_ptr;
+      }
 
-      if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
-      if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
-      if (header.sebeta_col!=0 && header.sebeta_col==i+1) {
-	se_beta=atof(ch_ptr);
+      if (header.z_col != 0 && header.z_col == i + 1) {
+        z = atof(ch_ptr);
+      }
+      if (header.beta_col != 0 && header.beta_col == i + 1) {
+        beta = atof(ch_ptr);
+      }
+      if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
+        se_beta = atof(ch_ptr);
+      }
+      if (header.chisq_col != 0 && header.chisq_col == i + 1) {
+        chisq = atof(ch_ptr);
+      }
+      if (header.p_col != 0 && header.p_col == i + 1) {
+        pvalue = atof(ch_ptr);
       }
-      if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
-      if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
 
-      if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
-      if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
-      if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
-      if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);}
-      if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {
-	n_control=atoi(ch_ptr);
+      if (header.n_col != 0 && header.n_col == i + 1) {
+        n_total = atoi(ch_ptr);
+      }
+      if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+        n_mis = atoi(ch_ptr);
+      }
+      if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+        n_obs = atoi(ch_ptr);
+      }
+      if (header.ncase_col != 0 && header.ncase_col == i + 1) {
+        n_case = atoi(ch_ptr);
+      }
+      if (header.ncontrol_col != 0 && header.ncontrol_col == i + 1) {
+        n_control = atoi(ch_ptr);
+      }
+      if (header.af_col != 0 && header.af_col == i + 1) {
+        af = atof(ch_ptr);
+      }
+      if (header.var_col != 0 && header.var_col == i + 1) {
+        var_x = atof(ch_ptr);
       }
-      if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
-      if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
 
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
 
-    if (header.rs_col==0) {
-      rs=chr+":"+pos;
+    if (header.rs_col == 0) {
+      rs = chr + ":" + pos;
     }
 
-    if (header.n_col==0) {
-      if (header.nmis_col!=0 && header.nobs_col!=0) {
-	n_total=n_mis+n_obs;
+    if (header.n_col == 0) {
+      if (header.nmis_col != 0 && header.nobs_col != 0) {
+        n_total = n_mis + n_obs;
       } else {
-	n_total=n_case+n_control;
+        n_total = n_case + n_control;
       }
     }
 
     // Both z values and beta/se_beta have directions, while
     // chisq/pvalue do not.
-    if (header.z_col!=0) {
-      zsquare=z*z;
-    } else if (header.beta_col!=0 && header.sebeta_col!=0) {
-      z=beta/se_beta;
-      zsquare=z*z;
-    } else if (header.chisq_col!=0) {
-      zsquare=chisq;
-    } else if (header.p_col!=0) {
-      zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
-    } else {zsquare=0;}
+    if (header.z_col != 0) {
+      zsquare = z * z;
+    } else if (header.beta_col != 0 && header.sebeta_col != 0) {
+      z = beta / se_beta;
+      zsquare = z * z;
+    } else if (header.chisq_col != 0) {
+      zsquare = chisq;
+    } else if (header.p_col != 0) {
+      zsquare = gsl_cdf_chisq_Qinv(pvalue, 1);
+    } else {
+      zsquare = 0;
+    }
 
     // Obtain var_x.
-    if (header.var_col==0 && header.af_col!=0) {
-      var_x=2.0*af*(1.0-af);
+    if (header.var_col == 0 && header.af_col != 0) {
+      var_x = 2.0 * af * (1.0 - af);
     }
 
     // If the SNP is also present in cor file, then do calculations.
-    if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) &&
-	 (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) && zsquare!=0) {
-      if (mapRS2cat.size()!=0) {
-	vec_cat.push_back(mapRS2cat.at(rs));
+    if ((mapRS2wA.size() == 0 || mapRS2wA.count(rs) != 0) &&
+        (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0) && zsquare != 0) {
+      if (mapRS2cat.size() != 0) {
+        vec_cat.push_back(mapRS2cat.at(rs));
       } else {
-	vec_cat.push_back(0);
+        vec_cat.push_back(0);
       }
       vec_ni.push_back(n_total);
-      if (mapRS2wA.size()==0) {
-	vec_weight.push_back(1);
+      if (mapRS2wA.size() == 0) {
+        vec_weight.push_back(1);
       } else {
-	vec_weight.push_back(mapRS2wA.at(rs));
+        vec_weight.push_back(mapRS2wA.at(rs));
       }
       vec_z2.push_back(zsquare);
 
-      ni_total=max(ni_total, n_total);
+      ni_total = max(ni_total, n_total);
       ns_test++;
     }
 
@@ -3836,15 +4114,15 @@ void ReadFile_beta (const string &file_beta,
   return;
 }
 
-void ReadFile_beta (const string &file_beta,
-		    const map<string, double> &mapRS2wA,
-		    map<string, string> &mapRS2A1,
-		    map<string, double> &mapRS2z) {
-  mapRS2A1.clear(); mapRS2z.clear();
+void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
+                   map<string, string> &mapRS2A1,
+                   map<string, double> &mapRS2z) {
+  mapRS2A1.clear();
+  mapRS2z.clear();
 
-  igzstream infile (file_beta.c_str(), igzstream::in);
+  igzstream infile(file_beta.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open beta file: "<<file_beta<<endl;
+    cout << "error! fail to open beta file: " << file_beta << endl;
     return;
   }
 
@@ -3853,92 +4131,137 @@ void ReadFile_beta (const string &file_beta,
   string type;
 
   string rs, chr, a1, a0, pos, cm;
-  double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, af=0, var_x=0;
-  size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;
-  size_t ni_total=0, ns_total=0, ns_test=0;
+  double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, af = 0, var_x = 0;
+  size_t n_total = 0, n_mis = 0, n_obs = 0, n_case = 0, n_control = 0;
+  size_t ni_total = 0, ns_total = 0, ns_test = 0;
 
   // Read header.
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_io (line, header);
+  ReadHeader_io(line, header);
 
-  if (header.n_col==0 ) {
-    if ((header.nobs_col==0 && header.nmis_col==0) &&
-	(header.ncase_col==0 && header.ncontrol_col==0)) {
-      cout<<"error! missing sample size in the beta file."<<endl;
+  if (header.n_col == 0) {
+    if ((header.nobs_col == 0 && header.nmis_col == 0) &&
+        (header.ncase_col == 0 && header.ncontrol_col == 0)) {
+      cout << "error! missing sample size in the beta file." << endl;
     } else {
-      cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+      cout << "total sample size will be replaced by obs/mis sample size."
+           << endl;
     }
   }
 
-  if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0)) {
-    cout<<"error! missing z scores in the beta file."<<endl;
+  if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0)) {
+    cout << "error! missing z scores in the beta file." << endl;
   }
 
   while (!safeGetline(infile, line).eof()) {
-    if (isBlankLine(line)) {continue;}
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
-    z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
-    n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; af=0; var_x=0;
-    for (size_t i=0; i<header.coln; i++) {
-      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
-      if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
-      if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
-      if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
-      if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
-      if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+    if (isBlankLine(line)) {
+      continue;
+    }
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+    z = 0;
+    beta = 0;
+    se_beta = 0;
+    chisq = 0;
+    pvalue = 0;
+    n_total = 0;
+    n_mis = 0;
+    n_obs = 0;
+    n_case = 0;
+    n_control = 0;
+    af = 0;
+    var_x = 0;
+    for (size_t i = 0; i < header.coln; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      }
+      if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      }
+      if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+      }
+      if (header.cm_col != 0 && header.cm_col == i + 1) {
+        cm = ch_ptr;
+      }
+      if (header.a1_col != 0 && header.a1_col == i + 1) {
+        a1 = ch_ptr;
+      }
+      if (header.a0_col != 0 && header.a0_col == i + 1) {
+        a0 = ch_ptr;
+      }
 
-      if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
-      if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
-      if (header.sebeta_col!=0 && header.sebeta_col==i+1) {
-	se_beta=atof(ch_ptr);
+      if (header.z_col != 0 && header.z_col == i + 1) {
+        z = atof(ch_ptr);
+      }
+      if (header.beta_col != 0 && header.beta_col == i + 1) {
+        beta = atof(ch_ptr);
+      }
+      if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
+        se_beta = atof(ch_ptr);
+      }
+      if (header.chisq_col != 0 && header.chisq_col == i + 1) {
+        chisq = atof(ch_ptr);
+      }
+      if (header.p_col != 0 && header.p_col == i + 1) {
+        pvalue = atof(ch_ptr);
       }
-      if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
-      if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
 
-      if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
-      if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
-      if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
-      if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);}
-      if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {
-	n_control=atoi(ch_ptr);
+      if (header.n_col != 0 && header.n_col == i + 1) {
+        n_total = atoi(ch_ptr);
+      }
+      if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+        n_mis = atoi(ch_ptr);
+      }
+      if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+        n_obs = atoi(ch_ptr);
+      }
+      if (header.ncase_col != 0 && header.ncase_col == i + 1) {
+        n_case = atoi(ch_ptr);
+      }
+      if (header.ncontrol_col != 0 && header.ncontrol_col == i + 1) {
+        n_control = atoi(ch_ptr);
       }
 
-      if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
-      if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+      if (header.af_col != 0 && header.af_col == i + 1) {
+        af = atof(ch_ptr);
+      }
+      if (header.var_col != 0 && header.var_col == i + 1) {
+        var_x = atof(ch_ptr);
+      }
 
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
 
-    if (header.rs_col==0) {
-      rs=chr+":"+pos;
+    if (header.rs_col == 0) {
+      rs = chr + ":" + pos;
     }
 
-    if (header.n_col==0) {
-      if (header.nmis_col!=0 && header.nobs_col!=0) {
-	n_total=n_mis+n_obs;
+    if (header.n_col == 0) {
+      if (header.nmis_col != 0 && header.nobs_col != 0) {
+        n_total = n_mis + n_obs;
       } else {
-	n_total=n_case+n_control;
+        n_total = n_case + n_control;
       }
     }
 
     // Both z values and beta/se_beta have directions, while
     // chisq/pvalue do not.
-    if (header.z_col!=0) {
-      z=z;
-    } else if (header.beta_col!=0 && header.sebeta_col!=0) {
-      z=beta/se_beta;
+    if (header.z_col != 0) {
+      z = z;
+    } else if (header.beta_col != 0 && header.sebeta_col != 0) {
+      z = beta / se_beta;
     } else {
-      z=0;
+      z = 0;
     }
 
     // If the snp is also present in cor file, then do calculations.
-    if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) ) {
-      mapRS2z[rs]=z;
-      mapRS2A1[rs]=a1;
+    if ((mapRS2wA.size() == 0 || mapRS2wA.count(rs) != 0)) {
+      mapRS2z[rs] = z;
+      mapRS2A1[rs] = a1;
 
-      ni_total=max(ni_total, n_total);
+      ni_total = max(ni_total, n_total);
       ns_test++;
     }
 
@@ -3951,139 +4274,155 @@ void ReadFile_beta (const string &file_beta,
   return;
 }
 
-void Calcq (const size_t n_block, const vector<size_t> &vec_cat,
-	    const vector<size_t> &vec_ni, const vector<double> &vec_weight,
-	    const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q,
-	    gsl_vector *s) {
-  gsl_matrix_set_zero (Vq);
-  gsl_vector_set_zero (q);
-  gsl_vector_set_zero (s);
+void Calcq(const size_t n_block, const vector<size_t> &vec_cat,
+           const vector<size_t> &vec_ni, const vector<double> &vec_weight,
+           const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q,
+           gsl_vector *s) {
+  gsl_matrix_set_zero(Vq);
+  gsl_vector_set_zero(q);
+  gsl_vector_set_zero(s);
 
   size_t cat, n_total;
   double w, zsquare;
 
   vector<double> vec_q, vec_s, n_snps;
-  for (size_t i=0; i<q->size; i++) {
+  for (size_t i = 0; i < q->size; i++) {
     vec_q.push_back(0.0);
     vec_s.push_back(0.0);
     n_snps.push_back(0.0);
   }
 
-  vector<vector<double> > mat_q, mat_s;
-  for (size_t i=0; i<n_block; i++) {
+  vector<vector<double>> mat_q, mat_s;
+  for (size_t i = 0; i < n_block; i++) {
     mat_q.push_back(vec_q);
     mat_s.push_back(vec_s);
   }
 
   // Compute q and s.
-  for (size_t i=0; i<vec_cat.size(); i++) {
+  for (size_t i = 0; i < vec_cat.size(); i++) {
 
     // Extract quantities.
-    cat=vec_cat[i];
-    n_total=vec_ni[i];
-    w=vec_weight[i];
-    zsquare=vec_z2[i];
+    cat = vec_cat[i];
+    n_total = vec_ni[i];
+    w = vec_weight[i];
+    zsquare = vec_z2[i];
 
     // Compute q and s.
-    vec_q[cat]+=(zsquare-1.0)*w/(double)n_total;
-    vec_s[cat]+=w;
+    vec_q[cat] += (zsquare - 1.0) * w / (double)n_total;
+    vec_s[cat] += w;
     n_snps[cat]++;
   }
 
   // Update q; vec_q is used again for computing Vq below.
-  for (size_t i=0; i<q->size; i++) {
-    if (vec_s[i]!=0) {
-      gsl_vector_set(q, i, vec_q[i]/vec_s[i]);
+  for (size_t i = 0; i < q->size; i++) {
+    if (vec_s[i] != 0) {
+      gsl_vector_set(q, i, vec_q[i] / vec_s[i]);
     }
     gsl_vector_set(s, i, vec_s[i]);
   }
 
   // Compute Vq; divide SNPs in each category into evenly distributed
   // blocks.
-  size_t t=0, b=0, n_snp=0;
+  size_t t = 0, b = 0, n_snp = 0;
   double d, m, n;
-  for (size_t l=0; l<q->size; l++) {
-    n_snp=floor(n_snps[l]/n_block); t=0; b=0;
-    if (n_snp==0) {continue;}
+  for (size_t l = 0; l < q->size; l++) {
+    n_snp = floor(n_snps[l] / n_block);
+    t = 0;
+    b = 0;
+    if (n_snp == 0) {
+      continue;
+    }
 
     // Initiate everything to zero.
-    for (size_t i=0; i<n_block; i++) {
-      for (size_t j=0; j<q->size; j++) {
-	mat_q[i][j]=0;
-	mat_s[i][j]=0;
+    for (size_t i = 0; i < n_block; i++) {
+      for (size_t j = 0; j < q->size; j++) {
+        mat_q[i][j] = 0;
+        mat_s[i][j] = 0;
       }
     }
 
     // Record values.
-    for (size_t i=0; i<vec_cat.size(); i++) {
+    for (size_t i = 0; i < vec_cat.size(); i++) {
 
       // Extract quantities.
-      cat=vec_cat[i];
-      n_total=vec_ni[i];
-      w=vec_weight[i];
-      zsquare=vec_z2[i];
+      cat = vec_cat[i];
+      n_total = vec_ni[i];
+      w = vec_weight[i];
+      zsquare = vec_z2[i];
 
       // Save quantities for computing Vq (which is not divided by
       // n_total).
-      mat_q[b][cat]+=(zsquare-1.0)*w;
-      mat_s[b][cat]+=w;
-
-      if (cat==l) {
-	if (b<n_block-1) {
-	  if (t<n_snp-1) {t++;}  else {b++; t=0;}
-	} else {
-	  t++;
-	}
+      mat_q[b][cat] += (zsquare - 1.0) * w;
+      mat_s[b][cat] += w;
+
+      if (cat == l) {
+        if (b < n_block - 1) {
+          if (t < n_snp - 1) {
+            t++;
+          } else {
+            b++;
+            t = 0;
+          }
+        } else {
+          t++;
+        }
       }
     }
 
     // Center mat_q.
-    for (size_t i=0; i<q->size; i++) {
-      m=0; n=0;
-      for (size_t k=0; k<n_block; k++) {
-	if (mat_s[k][i]!=0 && vec_s[i]!=mat_s[k][i]) {
-	  d=(vec_q[i]-mat_q[k][i])/(vec_s[i]-mat_s[k][i]);
-	  mat_q[k][i]=d;
-	  m+=d;
-	  n++;
-	}
+    for (size_t i = 0; i < q->size; i++) {
+      m = 0;
+      n = 0;
+      for (size_t k = 0; k < n_block; k++) {
+        if (mat_s[k][i] != 0 && vec_s[i] != mat_s[k][i]) {
+          d = (vec_q[i] - mat_q[k][i]) / (vec_s[i] - mat_s[k][i]);
+          mat_q[k][i] = d;
+          m += d;
+          n++;
+        }
+      }
+      if (n != 0) {
+        m /= n;
       }
-      if (n!=0) {m/=n;}
 
-      for (size_t k=0; k<n_block; k++) {
-	if (mat_q[k][i]!=0) {
-	  mat_q[k][i]-=m;
-	}
+      for (size_t k = 0; k < n_block; k++) {
+        if (mat_q[k][i] != 0) {
+          mat_q[k][i] -= m;
+        }
       }
     }
 
     // Compute Vq for l'th row and l'th column only.
-    for (size_t i=0; i<q->size; i++) {
-      d=0; n=0;
-      for (size_t k=0; k<n_block; k++) {
-	if (mat_q[k][l]!=0 && mat_q[k][i]!=0) {
-	  d+=mat_q[k][l]*mat_q[k][i];
-	  n++;
-	}
-      }
-      if (n!=0) {
-	d/=n;
-	d*=n-1;
-      }
-      d+=gsl_matrix_get(Vq, i, l);
+    for (size_t i = 0; i < q->size; i++) {
+      d = 0;
+      n = 0;
+      for (size_t k = 0; k < n_block; k++) {
+        if (mat_q[k][l] != 0 && mat_q[k][i] != 0) {
+          d += mat_q[k][l] * mat_q[k][i];
+          n++;
+        }
+      }
+      if (n != 0) {
+        d /= n;
+        d *= n - 1;
+      }
+      d += gsl_matrix_get(Vq, i, l);
       gsl_matrix_set(Vq, i, l, d);
-      if (i!=l) {gsl_matrix_set(Vq, l, i, d);}
+      if (i != l) {
+        gsl_matrix_set(Vq, l, i, d);
+      }
     }
-
   }
 
-  //divide the off diagonal elements of Vq by 2
-  for (size_t i=0; i<q->size; i++) {
-    for (size_t j=i; j<q->size; j++) {
-      if (i==j) {continue;}
-      d=gsl_matrix_get(Vq, i, j);
-      gsl_matrix_set(Vq, i, j, d/2);
-      gsl_matrix_set(Vq, j, i, d/2);
+  // divide the off diagonal elements of Vq by 2
+  for (size_t i = 0; i < q->size; i++) {
+    for (size_t j = i; j < q->size; j++) {
+      if (i == j) {
+        continue;
+      }
+      d = gsl_matrix_get(Vq, i, j);
+      gsl_matrix_set(Vq, i, j, d / 2);
+      gsl_matrix_set(Vq, j, i, d / 2);
     }
   }
 
@@ -4091,20 +4430,19 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat,
 }
 
 // Read vector file.
-void ReadFile_vector (const string &file_vec, gsl_vector *vec)
-{
-  igzstream infile (file_vec.c_str(), igzstream::in);
+void ReadFile_vector(const string &file_vec, gsl_vector *vec) {
+  igzstream infile(file_vec.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open vector file: "<<file_vec<<endl;
+    cout << "error! fail to open vector file: " << file_vec << endl;
     return;
   }
 
   string line;
   char *ch_ptr;
 
-  for (size_t i=0; i<vec->size; i++) {
+  for (size_t i = 0; i < vec->size; i++) {
     !safeGetline(infile, line).eof();
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
     gsl_vector_set(vec, i, atof(ch_ptr));
   }
 
@@ -4114,22 +4452,22 @@ void ReadFile_vector (const string &file_vec, gsl_vector *vec)
   return;
 }
 
-void ReadFile_matrix (const string &file_mat, gsl_matrix *mat) {
-  igzstream infile (file_mat.c_str(), igzstream::in);
+void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) {
+  igzstream infile(file_mat.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open matrix file: "<<file_mat<<endl;
+    cout << "error! fail to open matrix file: " << file_mat << endl;
     return;
   }
 
   string line;
   char *ch_ptr;
 
-  for (size_t i=0; i<mat->size1; i++) {
+  for (size_t i = 0; i < mat->size1; i++) {
     !safeGetline(infile, line).eof();
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-    for (size_t j=0; j<mat->size2; j++) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    for (size_t j = 0; j < mat->size2; j++) {
       gsl_matrix_set(mat, i, j, atof(ch_ptr));
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
   }
 
@@ -4139,32 +4477,32 @@ void ReadFile_matrix (const string &file_mat, gsl_matrix *mat) {
   return;
 }
 
-void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1,
-		      gsl_matrix *mat2) {
-  igzstream infile (file_mat.c_str(), igzstream::in);
+void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1,
+                     gsl_matrix *mat2) {
+  igzstream infile(file_mat.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open matrix file: "<<file_mat<<endl;
+    cout << "error! fail to open matrix file: " << file_mat << endl;
     return;
   }
 
   string line;
   char *ch_ptr;
 
-  for (size_t i=0; i<mat1->size1; i++) {
+  for (size_t i = 0; i < mat1->size1; i++) {
     !safeGetline(infile, line).eof();
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-    for (size_t j=0; j<mat1->size2; j++) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    for (size_t j = 0; j < mat1->size2; j++) {
       gsl_matrix_set(mat1, i, j, atof(ch_ptr));
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
   }
 
-  for (size_t i=0; i<mat2->size1; i++) {
+  for (size_t i = 0; i < mat2->size1; i++) {
     !safeGetline(infile, line).eof();
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-    for (size_t j=0; j<mat2->size2; j++) {
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    for (size_t j = 0; j < mat2->size2; j++) {
       gsl_matrix_set(mat2, i, j, atof(ch_ptr));
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
   }
 
@@ -4175,24 +4513,24 @@ void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1,
 }
 
 // Read study file.
-void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat,
-		     gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
-  string Vqfile=file_study+".Vq.txt";
-  string sfile=file_study+".size.txt";
-  string qfile=file_study+".q.txt";
+void ReadFile_study(const string &file_study, gsl_matrix *Vq_mat,
+                    gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
+  string Vqfile = file_study + ".Vq.txt";
+  string sfile = file_study + ".size.txt";
+  string qfile = file_study + ".q.txt";
 
-  gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
+  gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
 
   ReadFile_matrix(Vqfile, Vq_mat);
   ReadFile_vector(sfile, s);
   ReadFile_vector(qfile, q_vec);
 
   double d;
-  for (size_t i=0; i<s_vec->size; i++) {
-    d=gsl_vector_get (s, i);
-    gsl_vector_set (s_vec, i, d);
+  for (size_t i = 0; i < s_vec->size; i++) {
+    d = gsl_vector_get(s, i);
+    gsl_vector_set(s_vec, i, d);
   }
-  ni=gsl_vector_get (s, s_vec->size);
+  ni = gsl_vector_get(s, s_vec->size);
 
   gsl_vector_free(s);
 
@@ -4200,22 +4538,22 @@ void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat,
 }
 
 // Read reference file.
-void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat,
-		   gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
-  string sfile=file_ref+".size.txt";
-  string Sfile=file_ref+".S.txt";
+void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat,
+                  gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
+  string sfile = file_ref + ".size.txt";
+  string Sfile = file_ref + ".S.txt";
 
-  gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
+  gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
 
   ReadFile_vector(sfile, s);
   ReadFile_matrix(Sfile, S_mat, Svar_mat);
 
   double d;
-  for (size_t i=0; i<s_vec->size; i++) {
-    d=gsl_vector_get (s, i);
-    gsl_vector_set (s_vec, i, d);
+  for (size_t i = 0; i < s_vec->size; i++) {
+    d = gsl_vector_get(s, i);
+    gsl_vector_set(s_vec, i, d);
   }
-  ni=gsl_vector_get (s, s_vec->size);
+  ni = gsl_vector_get(s, s_vec->size);
 
   gsl_vector_free(s);
 
@@ -4223,20 +4561,20 @@ void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat,
 }
 
 // Read mstudy file.
-void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat,
-		      gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
+void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq_mat,
+                     gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
   gsl_matrix_set_zero(Vq_mat);
   gsl_vector_set_zero(q_vec);
   gsl_vector_set_zero(s_vec);
-  ni=0;
+  ni = 0;
 
-  gsl_matrix *Vq_sub=gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2);
-  gsl_vector *q_sub=gsl_vector_alloc(q_vec->size);
-  gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
+  gsl_matrix *Vq_sub = gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2);
+  gsl_vector *q_sub = gsl_vector_alloc(q_vec->size);
+  gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
 
-  igzstream infile (file_mstudy.c_str(), igzstream::in);
+  igzstream infile(file_mstudy.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open mstudy file: "<<file_mstudy<<endl;
+    cout << "error! fail to open mstudy file: " << file_mstudy << endl;
     return;
   }
 
@@ -4244,51 +4582,64 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat,
   double d1, d2, d;
 
   while (!safeGetline(infile, file_name).eof()) {
-    string Vqfile=file_name+".Vq.txt";
-    string sfile=file_name+".size.txt";
-    string qfile=file_name+".q.txt";
+    string Vqfile = file_name + ".Vq.txt";
+    string sfile = file_name + ".size.txt";
+    string qfile = file_name + ".q.txt";
 
     ReadFile_matrix(Vqfile, Vq_sub);
     ReadFile_vector(sfile, s);
     ReadFile_vector(qfile, q_sub);
 
-    ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size));
+    ni = max(ni, (size_t)gsl_vector_get(s, s_vec->size));
 
-    for (size_t i=0; i<s_vec->size; i++) {
-      d1=gsl_vector_get (s, i);
-      if (d1==0) {continue;}
+    for (size_t i = 0; i < s_vec->size; i++) {
+      d1 = gsl_vector_get(s, i);
+      if (d1 == 0) {
+        continue;
+      }
 
-      d=gsl_vector_get(q_vec, i)+gsl_vector_get(q_sub, i)*d1;
+      d = gsl_vector_get(q_vec, i) + gsl_vector_get(q_sub, i) * d1;
       gsl_vector_set(q_vec, i, d);
 
-      d=gsl_vector_get(s_vec, i)+d1;
+      d = gsl_vector_get(s_vec, i) + d1;
       gsl_vector_set(s_vec, i, d);
 
-      for (size_t j=i; j<s_vec->size; j++) {
-	d2=gsl_vector_get (s, j);
-	if (d2==0) {continue;}
+      for (size_t j = i; j < s_vec->size; j++) {
+        d2 = gsl_vector_get(s, j);
+        if (d2 == 0) {
+          continue;
+        }
 
-	d=gsl_matrix_get(Vq_mat, i, j)+gsl_matrix_get(Vq_sub, i, j)*d1*d2;
-	gsl_matrix_set(Vq_mat, i, j, d);
-	if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);}
+        d = gsl_matrix_get(Vq_mat, i, j) +
+            gsl_matrix_get(Vq_sub, i, j) * d1 * d2;
+        gsl_matrix_set(Vq_mat, i, j, d);
+        if (i != j) {
+          gsl_matrix_set(Vq_mat, j, i, d);
+        }
       }
     }
   }
 
-  for (size_t i=0; i<s_vec->size; i++) {
-    d1=gsl_vector_get (s_vec, i);
-    if (d1==0) {continue;}
+  for (size_t i = 0; i < s_vec->size; i++) {
+    d1 = gsl_vector_get(s_vec, i);
+    if (d1 == 0) {
+      continue;
+    }
 
-    d=gsl_vector_get (q_vec, i);
-    gsl_vector_set (q_vec, i, d/d1);
+    d = gsl_vector_get(q_vec, i);
+    gsl_vector_set(q_vec, i, d / d1);
 
-    for (size_t j=i; j<s_vec->size; j++) {
-      d2=gsl_vector_get (s_vec, j);
-      if (d2==0) {continue;}
+    for (size_t j = i; j < s_vec->size; j++) {
+      d2 = gsl_vector_get(s_vec, j);
+      if (d2 == 0) {
+        continue;
+      }
 
-      d=gsl_matrix_get (Vq_mat, i, j)/(d1*d2);
-      gsl_matrix_set (Vq_mat, i, j, d);
-      if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);}
+      d = gsl_matrix_get(Vq_mat, i, j) / (d1 * d2);
+      gsl_matrix_set(Vq_mat, i, j, d);
+      if (i != j) {
+        gsl_matrix_set(Vq_mat, j, i, d);
+      }
     }
   }
 
@@ -4300,20 +4651,20 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat,
 }
 
 // Read reference file.
-void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat,
-		    gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
+void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat,
+                   gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
   gsl_matrix_set_zero(S_mat);
   gsl_matrix_set_zero(Svar_mat);
   gsl_vector_set_zero(s_vec);
-  ni=0;
+  ni = 0;
 
-  gsl_matrix *S_sub=gsl_matrix_alloc (S_mat->size1, S_mat->size2);
-  gsl_matrix *Svar_sub=gsl_matrix_alloc (Svar_mat->size1, Svar_mat->size2);
-  gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
+  gsl_matrix *S_sub = gsl_matrix_alloc(S_mat->size1, S_mat->size2);
+  gsl_matrix *Svar_sub = gsl_matrix_alloc(Svar_mat->size1, Svar_mat->size2);
+  gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
 
-  igzstream infile (file_mref.c_str(), igzstream::in);
+  igzstream infile(file_mref.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open mref file: "<<file_mref<<endl;
+    cout << "error! fail to open mref file: " << file_mref << endl;
     return;
   }
 
@@ -4321,51 +4672,59 @@ void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat,
   double d1, d2, d;
 
   while (!safeGetline(infile, file_name).eof()) {
-    string sfile=file_name+".size.txt";
-    string Sfile=file_name+".S.txt";
+    string sfile = file_name + ".size.txt";
+    string Sfile = file_name + ".S.txt";
 
     ReadFile_vector(sfile, s);
     ReadFile_matrix(Sfile, S_sub, Svar_sub);
 
     // Update s_vec and ni.
-    for (size_t i=0; i<s_vec->size; i++) {
-      d=gsl_vector_get (s, i)+gsl_vector_get (s_vec, i);
-      gsl_vector_set (s_vec, i, d);
+    for (size_t i = 0; i < s_vec->size; i++) {
+      d = gsl_vector_get(s, i) + gsl_vector_get(s_vec, i);
+      gsl_vector_set(s_vec, i, d);
     }
-    ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size));
+    ni = max(ni, (size_t)gsl_vector_get(s, s_vec->size));
 
     // Update S and Svar from each file.
-    for (size_t i=0; i<S_mat->size1; i++) {
-      d1=gsl_vector_get(s, i);
-      for (size_t j=0; j<S_mat->size2; j++) {
-	d2=gsl_vector_get(s, j);
-
-	d=gsl_matrix_get(S_sub, i, j)*d1*d2;
-	gsl_matrix_set(S_sub, i, j, d);
-	d=gsl_matrix_get(Svar_sub, i, j)*d1*d2*d1*d2;
-	gsl_matrix_set(Svar_sub, i, j, d);
+    for (size_t i = 0; i < S_mat->size1; i++) {
+      d1 = gsl_vector_get(s, i);
+      for (size_t j = 0; j < S_mat->size2; j++) {
+        d2 = gsl_vector_get(s, j);
+
+        d = gsl_matrix_get(S_sub, i, j) * d1 * d2;
+        gsl_matrix_set(S_sub, i, j, d);
+        d = gsl_matrix_get(Svar_sub, i, j) * d1 * d2 * d1 * d2;
+        gsl_matrix_set(Svar_sub, i, j, d);
       }
     }
 
-    gsl_matrix_add (S_mat, S_sub);
-    gsl_matrix_add (Svar_mat, Svar_sub);
+    gsl_matrix_add(S_mat, S_sub);
+    gsl_matrix_add(Svar_mat, Svar_sub);
   }
 
   // Final: update S and Svar.
-  for (size_t i=0; i<S_mat->size1; i++) {
-    d1=gsl_vector_get(s_vec, i);
-    if (d1==0) {continue;}
-    for (size_t j=i; j<S_mat->size2; j++) {
-      d2=gsl_vector_get(s_vec, j);
-      if (d2==0) {continue;}
-
-      d=gsl_matrix_get(S_mat, i, j)/(d1*d2);
+  for (size_t i = 0; i < S_mat->size1; i++) {
+    d1 = gsl_vector_get(s_vec, i);
+    if (d1 == 0) {
+      continue;
+    }
+    for (size_t j = i; j < S_mat->size2; j++) {
+      d2 = gsl_vector_get(s_vec, j);
+      if (d2 == 0) {
+        continue;
+      }
+
+      d = gsl_matrix_get(S_mat, i, j) / (d1 * d2);
       gsl_matrix_set(S_mat, i, j, d);
-      if (i!=j) {gsl_matrix_set(S_mat, j, i, d);}
+      if (i != j) {
+        gsl_matrix_set(S_mat, j, i, d);
+      }
 
-      d=gsl_matrix_get(Svar_mat, i, j)/(d1*d2*d1*d2);
+      d = gsl_matrix_get(Svar_mat, i, j) / (d1 * d2 * d1 * d2);
       gsl_matrix_set(Svar_mat, i, j, d);
-      if (i!=j) {gsl_matrix_set(Svar_mat, j, i, d);}
+      if (i != j) {
+        gsl_matrix_set(Svar_mat, j, i, d);
+      }
     }
   }
 
diff --git a/src/io.h b/src/io.h
index 9d6f8cc..3e1145a 100644
--- a/src/io.h
+++ b/src/io.h
@@ -19,195 +19,172 @@
 #ifndef __IO_H__
 #define __IO_H__
 
-#include <vector>
-#include <map>
-#include <algorithm>
-#include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
+#include <algorithm>
+#include <map>
+#include <vector>
 
 #include "gzstream.h"
 #include "param.h"
 
 using namespace std;
 
-void ProgressBar (string str, double p, double total);
-void ProgressBar (string str, double p, double total, double ratio);
-std::istream& safeGetline(std::istream& is, std::string& t);
-
-bool ReadFile_snps (const string &file_snps, set<string> &setSnps);
-bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps);
-bool ReadFile_log (const string &file_log, double &pheno_mean);
-
-bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo);
-bool ReadFile_fam (const string &file_fam,
-		   vector<vector<int> > &indicator_pheno,
-		   vector<vector<double> > &pheno,
-		   map<string, int> &mapID2num,
-		   const vector<size_t> &p_column);
-
-bool ReadFile_cvt (const string &file_cvt,
-		   vector<int> &indicator_cvt,
-		   vector<vector<double> > &cvt,
-		   size_t &n_cvt);
-bool ReadFile_anno (const string &file_bim, map<string, string> &mapRS2chr,
-		    map<string, long int> &mapRS2bp,
-		    map<string, double> &mapRS2cM);
-bool ReadFile_pheno (const string &file_pheno,
-		     vector<vector<int> > &indicator_pheno,
-		     vector<vector<double> > &pheno,
-		     const vector<size_t> &p_column);
-bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv,
-		      vector<double> &pheno, const int &p_column);
-
-bool ReadFile_geno (const string &file_geno, const set<string> &setSnps,
-		    const gsl_matrix *W, vector<int> &indicator_idv,
-		    vector<int> &indicator_snp, const double &maf_level,
-		    const double &miss_level, const double &hwe_level,
-		    const double &r2_level, map<string, string> &mapRS2chr,
-		    map<string, long int> &mapRS2bp,
-		    map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
-		    size_t &ns_test);
-bool ReadFile_bed (const string &file_bed, const set<string> &setSnps,
-		   const gsl_matrix *W, vector<int> &indicator_idv,
-		   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
-		   const double &maf_level, const double &miss_level,
-		   const double &hwe_level, const double &r2_level,
-		   size_t &ns_test);
-bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv,
-			igzstream &infile, gsl_vector *geno,
-			double &geno_mean);
-void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv,
-		       ifstream &infile, gsl_vector *geno, double &geno_mean);
-
-void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv,
-		   map<string, int> &mapID2num, const size_t k_mode,
-		   bool &error, gsl_matrix *G);
-void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv,
-		  map<string, int> &mapID2num, const size_t k_mode,
-		  bool &error, gsl_matrix *G);
-void ReadFile_eigenU (const string &file_u, bool &error, gsl_matrix *U);
-void ReadFile_eigenD (const string &file_d, bool &error, gsl_vector *eval);
-
-bool BimbamKin (const string &file_geno, vector<int> &indicator_snp,
-		const int k_mode, const int display_pace,
-		gsl_matrix *matrix_kin);
-bool PlinkKin (const string &file_bed, vector<int> &indicator_snp,
-	       const int k_mode, const int display_pace,
-	       gsl_matrix *matrix_kin);
-
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
-		    vector<int> &indicator_snp, gsl_matrix *UtX,
-		    gsl_matrix *K, const bool calc_K);
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
-		   vector<int> &indicator_snp, gsl_matrix *UtX,
-		   gsl_matrix *K, const bool calc_K);
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
-		    vector<int> &indicator_snp,
-		    vector<vector<unsigned char> > &Xt, gsl_matrix *K,
-		    const bool calc_K, const size_t ni_test,
-		    const size_t ns_test);
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
-		   vector<int> &indicator_snp,
-		   vector<vector<unsigned char> > &Xt, gsl_matrix *K,
-		   const bool calc_K, const size_t ni_test,
-		   const size_t ns_test);
-
-bool ReadFile_est (const string &file_est, const vector<size_t> &est_column,
-		   map<string, double> &mapRS2est);
-
-bool CountFileLines (const string &file_input, size_t &n_lines);
-
-bool ReadFile_gene (const string &file_gene, vector<double> &vec_read,
-		    vector<SNPINFO> &snpInfo, size_t &ng_total);
-
-bool ReadHeader_io (const string &line, HEADER &header);
-bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
-		   size_t &n_vc);
-bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat,
-		    size_t &n_vc);
-
-bool ReadFile_catc (const string &file_cat,
-		    map<string, vector<double> > &mapRS2catc,
-		    size_t &n_cat);
-bool ReadFile_mcatc (const string &file_mcat, map<string,
-		     vector<double> > &mapRS2catc, size_t &n_cat);
-
-bool BimbamKin (const string &file_geno, const int display_pace,
-		const vector<int> &indicator_idv,
-		const vector<int> &indicator_snp,
-		const map<string, double> &mapRS2weight,
-		const map<string, size_t> &mapRS2cat,
-		const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
-		gsl_matrix *matrix_kin, gsl_vector *vector_ns);
-bool PlinkKin (const string &file_bed, const int display_pace,
-	       const vector<int> &indicator_idv,
-	       const vector<int> &indicator_snp,
-	       const map<string, double> &mapRS2weight,
-	       const map<string, size_t> &mapRS2cat,
-	       const vector<SNPINFO> &snpInfo,
-	       const gsl_matrix *W, gsl_matrix *matrix_kin,
-	       gsl_vector *vector_ns);
-bool MFILEKin (const size_t mfile_mode, const string &file_mfile,
-	       const int display_pace, const vector<int> &indicator_idv,
-	       const vector<vector<int> > &mindicator_snp,
-	       const map<string, double> &mapRS2weight,
-	       const map<string, size_t> &mapRS2cat,
-	       const vector<vector<SNPINFO> > &msnpInfo,
-	       const gsl_matrix *W, gsl_matrix *matrix_kin,
-	       gsl_vector *vector_ns);
-
-bool ReadFile_wsnp (const string &file_wsnp,
-		    map<string, double> &mapRS2double);
-bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
-		    map<string, vector<double> > &mapRS2vector);
-
-void ReadFile_beta (const string &file_beta,
-		    const map<string, size_t> &mapRS2cat,
-		    const map<string, double> &mapRS2wA,
-		    vector<size_t> &vec_cat, vector<size_t> &vec_ni,
-		    vector<double> &vec_weight, vector<double> &vec_z2,
-		    size_t &ni_total, size_t &ns_total, size_t &ns_test);
-void ReadFile_beta (const string &file_beta,
-		    const map<string, double> &mapRS2wA,
-		    map<string, string> &mapRS2A1,
-		    map<string, double> &mapRS2z);
-void Calcq (const size_t n_block, const vector<size_t> &vec_cat,
-	    const vector<size_t> &vec_ni,
-	    const vector<double> &vec_weight, const vector<double> &vec_z2,
-	    gsl_matrix *Vq, gsl_vector *q, gsl_vector *s);
-
-void ReadFile_study (const string &file_study, gsl_matrix *Vq,
-		     gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni);
-void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat,
-		   gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
-void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq,
-		      gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni);
-void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat,
-		    gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
+void ProgressBar(string str, double p, double total);
+void ProgressBar(string str, double p, double total, double ratio);
+std::istream &safeGetline(std::istream &is, std::string &t);
+
+bool ReadFile_snps(const string &file_snps, set<string> &setSnps);
+bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps);
+bool ReadFile_log(const string &file_log, double &pheno_mean);
+
+bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo);
+bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
+                  vector<vector<double>> &pheno, map<string, int> &mapID2num,
+                  const vector<size_t> &p_column);
+
+bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt,
+                  vector<vector<double>> &cvt, size_t &n_cvt);
+bool ReadFile_anno(const string &file_bim, map<string, string> &mapRS2chr,
+                   map<string, long int> &mapRS2bp,
+                   map<string, double> &mapRS2cM);
+bool ReadFile_pheno(const string &file_pheno,
+                    vector<vector<int>> &indicator_pheno,
+                    vector<vector<double>> &pheno,
+                    const vector<size_t> &p_column);
+bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
+                     vector<double> &pheno, const int &p_column);
+
+bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
+                   const gsl_matrix *W, vector<int> &indicator_idv,
+                   vector<int> &indicator_snp, const double &maf_level,
+                   const double &miss_level, const double &hwe_level,
+                   const double &r2_level, map<string, string> &mapRS2chr,
+                   map<string, long int> &mapRS2bp,
+                   map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
+                   size_t &ns_test);
+bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
+                  const gsl_matrix *W, vector<int> &indicator_idv,
+                  vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+                  const double &maf_level, const double &miss_level,
+                  const double &hwe_level, const double &r2_level,
+                  size_t &ns_test);
+bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
+                       igzstream &infile, gsl_vector *geno, double &geno_mean);
+void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv,
+                      ifstream &infile, gsl_vector *geno, double &geno_mean);
+
+void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
+                  map<string, int> &mapID2num, const size_t k_mode, bool &error,
+                  gsl_matrix *G);
+void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
+                 map<string, int> &mapID2num, const size_t k_mode, bool &error,
+                 gsl_matrix *G);
+void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U);
+void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval);
+
+bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
+               const int k_mode, const int display_pace,
+               gsl_matrix *matrix_kin);
+bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
+              const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
+
+bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+                   vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
+                   const bool calc_K);
+bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
+                  vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
+                  const bool calc_K);
+bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+                   vector<int> &indicator_snp,
+                   vector<vector<unsigned char>> &Xt, gsl_matrix *K,
+                   const bool calc_K, const size_t ni_test,
+                   const size_t ns_test);
+bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
+                  vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
+                  gsl_matrix *K, const bool calc_K, const size_t ni_test,
+                  const size_t ns_test);
+
+bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
+                  map<string, double> &mapRS2est);
+
+bool CountFileLines(const string &file_input, size_t &n_lines);
+
+bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
+                   vector<SNPINFO> &snpInfo, size_t &ng_total);
+
+bool ReadHeader_io(const string &line, HEADER &header);
+bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
+                  size_t &n_vc);
+bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat,
+                   size_t &n_vc);
+
+bool ReadFile_catc(const string &file_cat,
+                   map<string, vector<double>> &mapRS2catc, size_t &n_cat);
+bool ReadFile_mcatc(const string &file_mcat,
+                    map<string, vector<double>> &mapRS2catc, size_t &n_cat);
+
+bool BimbamKin(const string &file_geno, const int display_pace,
+               const vector<int> &indicator_idv,
+               const vector<int> &indicator_snp,
+               const map<string, double> &mapRS2weight,
+               const map<string, size_t> &mapRS2cat,
+               const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+               gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+bool PlinkKin(const string &file_bed, const int display_pace,
+              const vector<int> &indicator_idv,
+              const vector<int> &indicator_snp,
+              const map<string, double> &mapRS2weight,
+              const map<string, size_t> &mapRS2cat,
+              const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+              gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
+              const int display_pace, const vector<int> &indicator_idv,
+              const vector<vector<int>> &mindicator_snp,
+              const map<string, double> &mapRS2weight,
+              const map<string, size_t> &mapRS2cat,
+              const vector<vector<SNPINFO>> &msnpInfo, const gsl_matrix *W,
+              gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+
+bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2double);
+bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc,
+                   map<string, vector<double>> &mapRS2vector);
+
+void ReadFile_beta(const string &file_beta,
+                   const map<string, size_t> &mapRS2cat,
+                   const map<string, double> &mapRS2wA, vector<size_t> &vec_cat,
+                   vector<size_t> &vec_ni, vector<double> &vec_weight,
+                   vector<double> &vec_z2, size_t &ni_total, size_t &ns_total,
+                   size_t &ns_test);
+void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
+                   map<string, string> &mapRS2A1, map<string, double> &mapRS2z);
+void Calcq(const size_t n_block, const vector<size_t> &vec_cat,
+           const vector<size_t> &vec_ni, const vector<double> &vec_weight,
+           const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q,
+           gsl_vector *s);
+
+void ReadFile_study(const string &file_study, gsl_matrix *Vq, gsl_vector *q_vec,
+                    gsl_vector *s_vec, size_t &ni);
+void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat,
+                  gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
+void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq,
+                     gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni);
+void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat,
+                   gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
 
 // WJA added.
-bool bgenKin (const string &file_geno, vector<int> &indicator_snp,
-	      const int k_mode, const int display_pace,
-	      gsl_matrix *matrix_kin);
+bool bgenKin(const string &file_geno, vector<int> &indicator_snp,
+             const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
 bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
-		   const gsl_matrix *W, vector<int> &indicator_idv,
-		   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
-		   const double &maf_level, const double &miss_level,
-		   const double &hwe_level, const double &r2_level,
-		   size_t &ns_test);
+                   const gsl_matrix *W, vector<int> &indicator_idv,
+                   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+                   const double &maf_level, const double &miss_level,
+                   const double &hwe_level, const double &r2_level,
+                   size_t &ns_test);
 bool ReadFile_sample(const string &file_sample,
-		     vector<vector<int> > &indicator_pheno,
-		     vector<vector<double> > &pheno,
-		     const vector<size_t> &p_column,
-		     vector<int> &indicator_cvt,
-		     vector<vector<double> > &cvt,
-		     size_t &n_cvt);
+                     vector<vector<int>> &indicator_pheno,
+                     vector<vector<double>> &pheno,
+                     const vector<size_t> &p_column, vector<int> &indicator_cvt,
+                     vector<vector<double>> &cvt, size_t &n_cvt);
 
 #endif
-
-
-
-
-
-
-
diff --git a/src/lapack.cpp b/src/lapack.cpp
index 05b85f4..8f6e8ff 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -16,614 +16,612 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 #include <cmath>
+#include <iostream>
 #include <vector>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 
 using namespace std;
 
 extern "C" void sgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K,
-		       float *ALPHA, float *A, int *LDA, float *B, int *LDB,
-		       float *BETA, float *C, int *LDC);
+                       float *ALPHA, float *A, int *LDA, float *B, int *LDB,
+                       float *BETA, float *C, int *LDC);
 extern "C" void spotrf_(char *UPLO, int *N, float *A, int *LDA, int *INFO);
 extern "C" void spotrs_(char *UPLO, int *N, int *NRHS, float *A, int *LDA,
-			float *B, int *LDB, int *INFO);
-extern "C" void ssyev_(char* JOBZ, char* UPLO, int *N, float *A, int *LDA,
-		       float *W, float *WORK, int *LWORK, int *INFO);
-extern "C" void ssyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N,
-			float *A, int *LDA, float *VL, float *VU, int *IL,
-			int *IU, float *ABSTOL, int *M, float *W, float *Z,
-			int *LDZ, int *ISUPPZ, float *WORK, int *LWORK,
-			int *IWORK, int *LIWORK, int *INFO);
+                        float *B, int *LDB, int *INFO);
+extern "C" void ssyev_(char *JOBZ, char *UPLO, int *N, float *A, int *LDA,
+                       float *W, float *WORK, int *LWORK, int *INFO);
+extern "C" void ssyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, float *A,
+                        int *LDA, float *VL, float *VU, int *IL, int *IU,
+                        float *ABSTOL, int *M, float *W, float *Z, int *LDZ,
+                        int *ISUPPZ, float *WORK, int *LWORK, int *IWORK,
+                        int *LIWORK, int *INFO);
 extern "C" double sdot_(int *N, float *DX, int *INCX, float *DY, int *INCY);
 
 extern "C" void dgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K,
-		       double *ALPHA, double *A, int *LDA, double *B,
-		       int *LDB, double *BETA, double *C, int *LDC);
+                       double *ALPHA, double *A, int *LDA, double *B, int *LDB,
+                       double *BETA, double *C, int *LDC);
 extern "C" void dpotrf_(char *UPLO, int *N, double *A, int *LDA, int *INFO);
 extern "C" void dpotrs_(char *UPLO, int *N, int *NRHS, double *A, int *LDA,
-			double *B, int *LDB, int *INFO);
-extern "C" void dsyev_(char* JOBZ, char* UPLO, int *N, double *A, int *LDA,
-		       double *W, double *WORK, int *LWORK, int *INFO);
-extern "C" void dsyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N,
-			double *A, int *LDA, double *VL, double *VU,
-			int *IL, int *IU, double *ABSTOL, int *M,
-			double *W, double *Z, int *LDZ, int *ISUPPZ,
-			double *WORK, int *LWORK, int *IWORK,
-			int *LIWORK, int *INFO);
+                        double *B, int *LDB, int *INFO);
+extern "C" void dsyev_(char *JOBZ, char *UPLO, int *N, double *A, int *LDA,
+                       double *W, double *WORK, int *LWORK, int *INFO);
+extern "C" void dsyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, double *A,
+                        int *LDA, double *VL, double *VU, int *IL, int *IU,
+                        double *ABSTOL, int *M, double *W, double *Z, int *LDZ,
+                        int *ISUPPZ, double *WORK, int *LWORK, int *IWORK,
+                        int *LIWORK, int *INFO);
 extern "C" double ddot_(int *N, double *DX, int *INCX, double *DY, int *INCY);
 
 // Cholesky decomposition, A is destroyed.
-void lapack_float_cholesky_decomp (gsl_matrix_float *A) {
-	int N=A->size1, LDA=A->size1, INFO;
-	char UPLO='L';
-
-	if (N!=(int)A->size2) {
-	  cout << "Matrix needs to be symmetric and same dimension in " <<
-	    "lapack_cholesky_decomp." << endl;
-	  return;
-	}
-
-	spotrf_(&UPLO, &N, A->data, &LDA, &INFO);
-	if (INFO!=0) {
-	  cout << "Cholesky decomposition unsuccessful in " <<
-	    "lapack_cholesky_decomp." << endl;
-	  return;
-	}
-
-	return;
+void lapack_float_cholesky_decomp(gsl_matrix_float *A) {
+  int N = A->size1, LDA = A->size1, INFO;
+  char UPLO = 'L';
+
+  if (N != (int)A->size2) {
+    cout << "Matrix needs to be symmetric and same dimension in "
+         << "lapack_cholesky_decomp." << endl;
+    return;
+  }
+
+  spotrf_(&UPLO, &N, A->data, &LDA, &INFO);
+  if (INFO != 0) {
+    cout << "Cholesky decomposition unsuccessful in "
+         << "lapack_cholesky_decomp." << endl;
+    return;
+  }
+
+  return;
 }
 
 // Cholesky decomposition, A is destroyed.
-void lapack_cholesky_decomp (gsl_matrix *A) {
-	int N=A->size1, LDA=A->size1, INFO;
-	char UPLO='L';
-
-	if (N!=(int)A->size2) {
-	  cout << "Matrix needs to be symmetric and same dimension in " <<
-	    "lapack_cholesky_decomp." << endl;
-	  return;
-	}
-
-	dpotrf_(&UPLO, &N, A->data, &LDA, &INFO);
-	if (INFO!=0) {
-	  cout << "Cholesky decomposition unsuccessful in " <<
-	    "lapack_cholesky_decomp."<<endl;
-	  return;
-	}
-
-	return;
+void lapack_cholesky_decomp(gsl_matrix *A) {
+  int N = A->size1, LDA = A->size1, INFO;
+  char UPLO = 'L';
+
+  if (N != (int)A->size2) {
+    cout << "Matrix needs to be symmetric and same dimension in "
+         << "lapack_cholesky_decomp." << endl;
+    return;
+  }
+
+  dpotrf_(&UPLO, &N, A->data, &LDA, &INFO);
+  if (INFO != 0) {
+    cout << "Cholesky decomposition unsuccessful in "
+         << "lapack_cholesky_decomp." << endl;
+    return;
+  }
+
+  return;
 }
 
 // Cholesky solve, A is decomposed.
-void lapack_float_cholesky_solve (gsl_matrix_float *A,
-				  const gsl_vector_float *b,
-				  gsl_vector_float *x) {
-	int N=A->size1, NRHS=1, LDA=A->size1, LDB=b->size, INFO;
-	char UPLO='L';
-
-
-	if (N!=(int)A->size2 || N!=LDB) {
-	  cout << "Matrix needs to be symmetric and same dimension in " <<
-	    "lapack_cholesky_solve." << endl;
-	  return;
-	}
-
-	gsl_vector_float_memcpy (x, b);
-	spotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO);
-	if (INFO!=0) {
-	  cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." <<
-	    endl;
-	  return;
-	}
-
-	return;
+void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b,
+                                 gsl_vector_float *x) {
+  int N = A->size1, NRHS = 1, LDA = A->size1, LDB = b->size, INFO;
+  char UPLO = 'L';
+
+  if (N != (int)A->size2 || N != LDB) {
+    cout << "Matrix needs to be symmetric and same dimension in "
+         << "lapack_cholesky_solve." << endl;
+    return;
+  }
+
+  gsl_vector_float_memcpy(x, b);
+  spotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO);
+  if (INFO != 0) {
+    cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << endl;
+    return;
+  }
+
+  return;
 }
 
 // Cholesky solve, A is decomposed.
-void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b,
-			    gsl_vector *x) {
-	int N=A->size1, NRHS=1, LDA=A->size1, LDB=b->size, INFO;
-	char UPLO='L';
-
-	if (N!=(int)A->size2 || N!=LDB) {
-	  cout << "Matrix needs to be symmetric and same dimension in " <<
-	    "lapack_cholesky_solve." << endl;
-	  return;
-	}
-
-	gsl_vector_memcpy (x, b);
-	dpotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO);
-	if (INFO!=0) {
-	  cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." <<
-	    endl;
-	  return;
-	}
-
-	return;
-}
+void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x) {
+  int N = A->size1, NRHS = 1, LDA = A->size1, LDB = b->size, INFO;
+  char UPLO = 'L';
+
+  if (N != (int)A->size2 || N != LDB) {
+    cout << "Matrix needs to be symmetric and same dimension in "
+         << "lapack_cholesky_solve." << endl;
+    return;
+  }
+
+  gsl_vector_memcpy(x, b);
+  dpotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO);
+  if (INFO != 0) {
+    cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << endl;
+    return;
+  }
 
-void lapack_sgemm (char *TransA, char *TransB, float alpha,
-		   const gsl_matrix_float *A, const gsl_matrix_float *B,
-		   float beta, gsl_matrix_float *C) {
-	int M, N, K1, K2, LDA=A->size1, LDB=B->size1, LDC=C->size2;
-
-	if (*TransA=='N' || *TransA=='n') {M=A->size1; K1=A->size2;}
-	else if (*TransA=='T' || *TransA=='t') {M=A->size2; K1=A->size1;}
-	else {cout<<"need 'N' or 'T' in lapack_sgemm"<<endl; return;}
-
-	if (*TransB=='N' || *TransB=='n') {N=B->size2; K2=B->size1;}
-	else if (*TransB=='T' || *TransB=='t')  {N=B->size1; K2=B->size2;}
-	else {cout<<"need 'N' or 'T' in lapack_sgemm"<<endl;  return;}
-
-	if (K1!=K2) {
-	  cout<<"A and B not compatible in lapack_sgemm"<<endl;
-	  return;
-	}
-	if (C->size1!=(size_t)M || C->size2!=(size_t)N) {
-	  cout<<"C not compatible in lapack_sgemm"<<endl;
-	  return;
-	}
-
-	gsl_matrix_float *A_t=gsl_matrix_float_alloc (A->size2, A->size1);
-	gsl_matrix_float_transpose_memcpy (A_t, A);
-	gsl_matrix_float *B_t=gsl_matrix_float_alloc (B->size2, B->size1);
-	gsl_matrix_float_transpose_memcpy (B_t, B);
-	gsl_matrix_float *C_t=gsl_matrix_float_alloc (C->size2, C->size1);
-	gsl_matrix_float_transpose_memcpy (C_t, C);
-
-	sgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA,
-	       B_t->data, &LDB, &beta, C_t->data, &LDC);
-	gsl_matrix_float_transpose_memcpy (C, C_t);
-
-	gsl_matrix_float_free (A_t);
-	gsl_matrix_float_free (B_t);
-	gsl_matrix_float_free (C_t);
-	return;
+  return;
 }
 
+void lapack_sgemm(char *TransA, char *TransB, float alpha,
+                  const gsl_matrix_float *A, const gsl_matrix_float *B,
+                  float beta, gsl_matrix_float *C) {
+  int M, N, K1, K2, LDA = A->size1, LDB = B->size1, LDC = C->size2;
+
+  if (*TransA == 'N' || *TransA == 'n') {
+    M = A->size1;
+    K1 = A->size2;
+  } else if (*TransA == 'T' || *TransA == 't') {
+    M = A->size2;
+    K1 = A->size1;
+  } else {
+    cout << "need 'N' or 'T' in lapack_sgemm" << endl;
+    return;
+  }
 
+  if (*TransB == 'N' || *TransB == 'n') {
+    N = B->size2;
+    K2 = B->size1;
+  } else if (*TransB == 'T' || *TransB == 't') {
+    N = B->size1;
+    K2 = B->size2;
+  } else {
+    cout << "need 'N' or 'T' in lapack_sgemm" << endl;
+    return;
+  }
 
-void lapack_dgemm (char *TransA, char *TransB, double alpha,
-		   const gsl_matrix *A, const gsl_matrix *B,
-		   double beta, gsl_matrix *C) {
-	int M, N, K1, K2, LDA=A->size1, LDB=B->size1, LDC=C->size2;
+  if (K1 != K2) {
+    cout << "A and B not compatible in lapack_sgemm" << endl;
+    return;
+  }
+  if (C->size1 != (size_t)M || C->size2 != (size_t)N) {
+    cout << "C not compatible in lapack_sgemm" << endl;
+    return;
+  }
 
-	if (*TransA=='N' || *TransA=='n') {M=A->size1; K1=A->size2;}
-	else if (*TransA=='T' || *TransA=='t') {M=A->size2; K1=A->size1;}
-	else {cout<<"need 'N' or 'T' in lapack_dgemm"<<endl; return;}
+  gsl_matrix_float *A_t = gsl_matrix_float_alloc(A->size2, A->size1);
+  gsl_matrix_float_transpose_memcpy(A_t, A);
+  gsl_matrix_float *B_t = gsl_matrix_float_alloc(B->size2, B->size1);
+  gsl_matrix_float_transpose_memcpy(B_t, B);
+  gsl_matrix_float *C_t = gsl_matrix_float_alloc(C->size2, C->size1);
+  gsl_matrix_float_transpose_memcpy(C_t, C);
+
+  sgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB,
+         &beta, C_t->data, &LDC);
+  gsl_matrix_float_transpose_memcpy(C, C_t);
+
+  gsl_matrix_float_free(A_t);
+  gsl_matrix_float_free(B_t);
+  gsl_matrix_float_free(C_t);
+  return;
+}
 
-	if (*TransB=='N' || *TransB=='n') {N=B->size2; K2=B->size1;}
-	else if (*TransB=='T' || *TransB=='t')  {N=B->size1; K2=B->size2;}
-	else {cout<<"need 'N' or 'T' in lapack_dgemm"<<endl;  return;}
+void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A,
+                  const gsl_matrix *B, double beta, gsl_matrix *C) {
+  int M, N, K1, K2, LDA = A->size1, LDB = B->size1, LDC = C->size2;
+
+  if (*TransA == 'N' || *TransA == 'n') {
+    M = A->size1;
+    K1 = A->size2;
+  } else if (*TransA == 'T' || *TransA == 't') {
+    M = A->size2;
+    K1 = A->size1;
+  } else {
+    cout << "need 'N' or 'T' in lapack_dgemm" << endl;
+    return;
+  }
+
+  if (*TransB == 'N' || *TransB == 'n') {
+    N = B->size2;
+    K2 = B->size1;
+  } else if (*TransB == 'T' || *TransB == 't') {
+    N = B->size1;
+    K2 = B->size2;
+  } else {
+    cout << "need 'N' or 'T' in lapack_dgemm" << endl;
+    return;
+  }
 
-	if (K1!=K2) {
-	  cout << "A and B not compatible in lapack_dgemm"<<endl;
-	  return;
-	}
-	if (C->size1!=(size_t)M || C->size2!=(size_t)N) {
-	  cout<<"C not compatible in lapack_dgemm"<<endl;
-	  return;
-	}
+  if (K1 != K2) {
+    cout << "A and B not compatible in lapack_dgemm" << endl;
+    return;
+  }
+  if (C->size1 != (size_t)M || C->size2 != (size_t)N) {
+    cout << "C not compatible in lapack_dgemm" << endl;
+    return;
+  }
 
-	gsl_matrix *A_t=gsl_matrix_alloc (A->size2, A->size1);
-	gsl_matrix_transpose_memcpy (A_t, A);
-	gsl_matrix *B_t=gsl_matrix_alloc (B->size2, B->size1);
-	gsl_matrix_transpose_memcpy (B_t, B);
-	gsl_matrix *C_t=gsl_matrix_alloc (C->size2, C->size1);
-	gsl_matrix_transpose_memcpy (C_t, C);
+  gsl_matrix *A_t = gsl_matrix_alloc(A->size2, A->size1);
+  gsl_matrix_transpose_memcpy(A_t, A);
+  gsl_matrix *B_t = gsl_matrix_alloc(B->size2, B->size1);
+  gsl_matrix_transpose_memcpy(B_t, B);
+  gsl_matrix *C_t = gsl_matrix_alloc(C->size2, C->size1);
+  gsl_matrix_transpose_memcpy(C_t, C);
 
-	dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA,
-	       B_t->data, &LDB, &beta, C_t->data, &LDC);
+  dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB,
+         &beta, C_t->data, &LDC);
 
-	gsl_matrix_transpose_memcpy (C, C_t);
+  gsl_matrix_transpose_memcpy(C, C_t);
 
-	gsl_matrix_free (A_t);
-	gsl_matrix_free (B_t);
-	gsl_matrix_free (C_t);
-	return;
+  gsl_matrix_free(A_t);
+  gsl_matrix_free(B_t);
+  gsl_matrix_free(C_t);
+  return;
 }
 
 // Eigen value decomposition, matrix A is destroyed, float seems to
 // have problem with large matrices (in mac).
-void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
-			       gsl_matrix_float *evec,
-			       const size_t flag_largematrix) {
-	if (flag_largematrix==1) {
-		int N=A->size1, LDA=A->size1, INFO, LWORK=-1;
-		char JOBZ='V', UPLO='L';
-
-		if (N!=(int)A->size2 || N!=(int)eval->size) {
-		  cout << "Matrix needs to be symmetric and same " <<
-		    "dimension in lapack_eigen_symmv."<<endl;
-		  return;
-		}
-
-		LWORK=3*N;
-		float *WORK=new float [LWORK];
-		ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK,
-		       &LWORK, &INFO);
-		if (INFO!=0) {
-		  cout << "Eigen decomposition unsuccessful in " <<
-		    "lapack_eigen_symmv."<<endl;
-		  return;
-		}
-
-		gsl_matrix_float_view A_sub =
-		  gsl_matrix_float_submatrix(A, 0, 0, N, N);
-		gsl_matrix_float_memcpy (evec, &A_sub.matrix);
-		gsl_matrix_float_transpose (evec);
-
-		delete [] WORK;
-	} else {
-		int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO,
-		  LWORK=-1, LIWORK=-1;
-		char JOBZ='V', UPLO='L', RANGE='A';
-		float ABSTOL=1.0E-7;
-
-		// VL, VU, IL, IU are not referenced; M equals N if RANGE='A'.
-		float VL=0.0, VU=0.0;
-		int IL=0, IU=0, M;
-
-		if (N!=(int)A->size2 || N!=(int)eval->size) {
-		  cout << "Matrix needs to be symmetric and same " <<
-		    "dimension in lapack_float_eigen_symmv." << endl;
-		  return;
-		}
-
-		int *ISUPPZ=new int [2*N];
-
-		float WORK_temp[1];
-		int IWORK_temp[1];
-		ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL,
-			&VU, &IL, &IU, &ABSTOL, &M, eval->data,
-			evec->data, &LDZ, ISUPPZ, WORK_temp, &LWORK,
-			IWORK_temp, &LIWORK, &INFO);
-		if (INFO!=0) {
-		  cout << "Work space estimate unsuccessful in " <<
-		    "lapack_float_eigen_symmv." << endl;
-		  return;
-		}
-		LWORK=(int)WORK_temp[0]; LIWORK=(int)IWORK_temp[0];
-
-		float *WORK=new float [LWORK];
-		int *IWORK=new int [LIWORK];
-
-		ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL,
-			&VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data,
-			&LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO);
-		if (INFO!=0) {
-		  cout << "Eigen decomposition unsuccessful in " <<
-		    "lapack_float_eigen_symmv." << endl;
-		  return;
-		}
-
-		gsl_matrix_float_transpose (evec);
-
-		delete [] ISUPPZ;
-		delete [] WORK;
-		delete [] IWORK;
-	}
-
-
-	return;
-}
-
+void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval,
+                              gsl_matrix_float *evec,
+                              const size_t flag_largematrix) {
+  if (flag_largematrix == 1) {
+    int N = A->size1, LDA = A->size1, INFO, LWORK = -1;
+    char JOBZ = 'V', UPLO = 'L';
+
+    if (N != (int)A->size2 || N != (int)eval->size) {
+      cout << "Matrix needs to be symmetric and same "
+           << "dimension in lapack_eigen_symmv." << endl;
+      return;
+    }
+
+    LWORK = 3 * N;
+    float *WORK = new float[LWORK];
+    ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, &LWORK, &INFO);
+    if (INFO != 0) {
+      cout << "Eigen decomposition unsuccessful in "
+           << "lapack_eigen_symmv." << endl;
+      return;
+    }
+
+    gsl_matrix_float_view A_sub = gsl_matrix_float_submatrix(A, 0, 0, N, N);
+    gsl_matrix_float_memcpy(evec, &A_sub.matrix);
+    gsl_matrix_float_transpose(evec);
+
+    delete[] WORK;
+  } else {
+    int N = A->size1, LDA = A->size1, LDZ = A->size1, INFO, LWORK = -1,
+        LIWORK = -1;
+    char JOBZ = 'V', UPLO = 'L', RANGE = 'A';
+    float ABSTOL = 1.0E-7;
+
+    // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'.
+    float VL = 0.0, VU = 0.0;
+    int IL = 0, IU = 0, M;
+
+    if (N != (int)A->size2 || N != (int)eval->size) {
+      cout << "Matrix needs to be symmetric and same "
+           << "dimension in lapack_float_eigen_symmv." << endl;
+      return;
+    }
+
+    int *ISUPPZ = new int[2 * N];
+
+    float WORK_temp[1];
+    int IWORK_temp[1];
+    ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU,
+            &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK_temp,
+            &LWORK, IWORK_temp, &LIWORK, &INFO);
+    if (INFO != 0) {
+      cout << "Work space estimate unsuccessful in "
+           << "lapack_float_eigen_symmv." << endl;
+      return;
+    }
+    LWORK = (int)WORK_temp[0];
+    LIWORK = (int)IWORK_temp[0];
+
+    float *WORK = new float[LWORK];
+    int *IWORK = new int[LIWORK];
+
+    ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU,
+            &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK, &LWORK,
+            IWORK, &LIWORK, &INFO);
+    if (INFO != 0) {
+      cout << "Eigen decomposition unsuccessful in "
+           << "lapack_float_eigen_symmv." << endl;
+      return;
+    }
+
+    gsl_matrix_float_transpose(evec);
+
+    delete[] ISUPPZ;
+    delete[] WORK;
+    delete[] IWORK;
+  }
 
+  return;
+}
 
 // Eigenvalue decomposition, matrix A is destroyed.
-void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
-			 const size_t flag_largematrix) {
-	if (flag_largematrix==1) {
-		int N=A->size1, LDA=A->size1, INFO, LWORK=-1;
-		char JOBZ='V', UPLO='L';
-
-		if (N!=(int)A->size2 || N!=(int)eval->size) {
-		  cout << "Matrix needs to be symmetric and same " <<
-		    "dimension in lapack_eigen_symmv." << endl;
-		  return;
-		}
-
-		LWORK=3*N;
-		double *WORK=new double [LWORK];
-		dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK,
-		       &LWORK, &INFO);
-		if (INFO!=0) {
-		  cout<<"Eigen decomposition unsuccessful in " <<
-		    "lapack_eigen_symmv." << endl;
-		  return;
-		}
-
-		gsl_matrix_view A_sub=gsl_matrix_submatrix(A, 0, 0, N, N);
-		gsl_matrix_memcpy (evec, &A_sub.matrix);
-		gsl_matrix_transpose (evec);
-
-		delete [] WORK;
-	} else {
-   	        int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO;
-		int LWORK=-1, LIWORK=-1;
-		char JOBZ='V', UPLO='L', RANGE='A';
-		double ABSTOL=1.0E-7;
-
-		// VL, VU, IL, IU are not referenced; M equals N if RANGE='A'.
-		double VL=0.0, VU=0.0;
-		int IL=0, IU=0, M;
-
-		if (N!=(int)A->size2 || N!=(int)eval->size) {
-		  cout << "Matrix needs to be symmetric and same " <<
-		    "dimension in lapack_eigen_symmv." << endl;
-		  return;
-		}
-
-		int *ISUPPZ=new int [2*N];
-
-		double WORK_temp[1];
-		int IWORK_temp[1];
-
-		dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU,
-			&IL, &IU, &ABSTOL, &M, eval->data, evec->data,
-			&LDZ, ISUPPZ, WORK_temp, &LWORK, IWORK_temp,
-			&LIWORK, &INFO);
-		if (INFO!=0) {
-		  cout << "Work space estimate unsuccessful in " <<
-		    "lapack_eigen_symmv." << endl;
-		  return;
-		}
-		LWORK=(int)WORK_temp[0]; LIWORK=(int)IWORK_temp[0];
-
-		double *WORK=new double [LWORK];
-		int *IWORK=new int [LIWORK];
-
-		dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU,
-			&IL, &IU, &ABSTOL, &M, eval->data, evec->data,
-			&LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO);
-		if (INFO!=0) {
-		  cout << "Eigen decomposition unsuccessful in " <<
-		    "lapack_eigen_symmv." << endl;
-		  return;
-		}
-
-		gsl_matrix_transpose (evec);
-
-		delete [] ISUPPZ;
-		delete [] WORK;
-		delete [] IWORK;
-	}
-
-	return;
+void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
+                        const size_t flag_largematrix) {
+  if (flag_largematrix == 1) {
+    int N = A->size1, LDA = A->size1, INFO, LWORK = -1;
+    char JOBZ = 'V', UPLO = 'L';
+
+    if (N != (int)A->size2 || N != (int)eval->size) {
+      cout << "Matrix needs to be symmetric and same "
+           << "dimension in lapack_eigen_symmv." << endl;
+      return;
+    }
+
+    LWORK = 3 * N;
+    double *WORK = new double[LWORK];
+    dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, &LWORK, &INFO);
+    if (INFO != 0) {
+      cout << "Eigen decomposition unsuccessful in "
+           << "lapack_eigen_symmv." << endl;
+      return;
+    }
+
+    gsl_matrix_view A_sub = gsl_matrix_submatrix(A, 0, 0, N, N);
+    gsl_matrix_memcpy(evec, &A_sub.matrix);
+    gsl_matrix_transpose(evec);
+
+    delete[] WORK;
+  } else {
+    int N = A->size1, LDA = A->size1, LDZ = A->size1, INFO;
+    int LWORK = -1, LIWORK = -1;
+    char JOBZ = 'V', UPLO = 'L', RANGE = 'A';
+    double ABSTOL = 1.0E-7;
+
+    // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'.
+    double VL = 0.0, VU = 0.0;
+    int IL = 0, IU = 0, M;
+
+    if (N != (int)A->size2 || N != (int)eval->size) {
+      cout << "Matrix needs to be symmetric and same "
+           << "dimension in lapack_eigen_symmv." << endl;
+      return;
+    }
+
+    int *ISUPPZ = new int[2 * N];
+
+    double WORK_temp[1];
+    int IWORK_temp[1];
+
+    dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU,
+            &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK_temp,
+            &LWORK, IWORK_temp, &LIWORK, &INFO);
+    if (INFO != 0) {
+      cout << "Work space estimate unsuccessful in "
+           << "lapack_eigen_symmv." << endl;
+      return;
+    }
+    LWORK = (int)WORK_temp[0];
+    LIWORK = (int)IWORK_temp[0];
+
+    double *WORK = new double[LWORK];
+    int *IWORK = new int[LIWORK];
+
+    dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU,
+            &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK, &LWORK,
+            IWORK, &LIWORK, &INFO);
+    if (INFO != 0) {
+      cout << "Eigen decomposition unsuccessful in "
+           << "lapack_eigen_symmv." << endl;
+      return;
+    }
+
+    gsl_matrix_transpose(evec);
+
+    delete[] ISUPPZ;
+    delete[] WORK;
+    delete[] IWORK;
+  }
+
+  return;
 }
 
 // DO NOT set eigenvalues to be positive.
-double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
-		    const size_t flag_largematrix) {
-	lapack_eigen_symmv (G, eval, U, flag_largematrix);
-
-	// Calculate track_G=mean(diag(G)).
-	double d=0.0;
-	for (size_t i=0; i<eval->size; ++i) {
-		d+=gsl_vector_get(eval, i);
-	}
-	d/=(double)eval->size;
-
-	return d;
-}
+double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
+                   const size_t flag_largematrix) {
+  lapack_eigen_symmv(G, eval, U, flag_largematrix);
+
+  // Calculate track_G=mean(diag(G)).
+  double d = 0.0;
+  for (size_t i = 0; i < eval->size; ++i) {
+    d += gsl_vector_get(eval, i);
+  }
+  d /= (double)eval->size;
 
+  return d;
+}
 
 // DO NOT set eigen values to be positive.
-double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U,
-		    gsl_vector_float *eval, const size_t flag_largematrix) {
-	lapack_float_eigen_symmv (G, eval, U, flag_largematrix);
-
-	// Calculate track_G=mean(diag(G)).
-	double d = 0.0;
-	for (size_t i=0; i<eval->size; ++i) {
-		d+=gsl_vector_float_get(eval, i);
-	}
-	d/=(double)eval->size;
-
-	return d;
-}
+double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U,
+                   gsl_vector_float *eval, const size_t flag_largematrix) {
+  lapack_float_eigen_symmv(G, eval, U, flag_largematrix);
+
+  // Calculate track_G=mean(diag(G)).
+  double d = 0.0;
+  for (size_t i = 0; i < eval->size; ++i) {
+    d += gsl_vector_float_get(eval, i);
+  }
+  d /= (double)eval->size;
 
+  return d;
+}
 
 double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) {
-	double logdet_O=0.0;
+  double logdet_O = 0.0;
 
-	lapack_cholesky_decomp(Omega);
-	for (size_t i=0; i<Omega->size1; ++i) {
-		logdet_O+=log(gsl_matrix_get (Omega, i, i));
-	}
-	logdet_O*=2.0;
-	lapack_cholesky_solve(Omega, Xty, OiXty);
+  lapack_cholesky_decomp(Omega);
+  for (size_t i = 0; i < Omega->size1; ++i) {
+    logdet_O += log(gsl_matrix_get(Omega, i, i));
+  }
+  logdet_O *= 2.0;
+  lapack_cholesky_solve(Omega, Xty, OiXty);
 
-	return logdet_O;
+  return logdet_O;
 }
 
-
 double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty,
-		     gsl_vector_float *OiXty) {
-	double logdet_O=0.0;
+                     gsl_vector_float *OiXty) {
+  double logdet_O = 0.0;
 
-	lapack_float_cholesky_decomp(Omega);
-	for (size_t i=0; i<Omega->size1; ++i) {
-		logdet_O+=log(gsl_matrix_float_get (Omega, i, i));
-	}
-	logdet_O*=2.0;
-	lapack_float_cholesky_solve(Omega, Xty, OiXty);
+  lapack_float_cholesky_decomp(Omega);
+  for (size_t i = 0; i < Omega->size1; ++i) {
+    logdet_O += log(gsl_matrix_float_get(Omega, i, i));
+  }
+  logdet_O *= 2.0;
+  lapack_float_cholesky_solve(Omega, Xty, OiXty);
 
-	return logdet_O;
+  return logdet_O;
 }
 
-
 // LU decomposition.
-void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum) {
-	gsl_linalg_LU_decomp (LU, p, signum);
-	return;
+void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) {
+  gsl_linalg_LU_decomp(LU, p, signum);
+  return;
 }
 
-void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum) {
-	gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2);
-
-	// Copy float matrix to double.
-	for (size_t i=0; i<LU->size1; i++) {
-		for (size_t j=0; j<LU->size2; j++) {
-			gsl_matrix_set (LU_double, i, j,
-					gsl_matrix_float_get(LU, i, j));
-		}
-	}
-
-	// LU decomposition.
-	gsl_linalg_LU_decomp (LU_double, p, signum);
-
-	// Copy float matrix to double.
-	for (size_t i=0; i<LU->size1; i++) {
-		for (size_t j=0; j<LU->size2; j++) {
-			gsl_matrix_float_set (LU, i, j,
-					      gsl_matrix_get(LU_double, i, j));
-		}
-	}
-
-	// Free matrix.
-	gsl_matrix_free (LU_double);
-	return;
-}
+void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) {
+  gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
+
+  // Copy float matrix to double.
+  for (size_t i = 0; i < LU->size1; i++) {
+    for (size_t j = 0; j < LU->size2; j++) {
+      gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
+    }
+  }
 
+  // LU decomposition.
+  gsl_linalg_LU_decomp(LU_double, p, signum);
+
+  // Copy float matrix to double.
+  for (size_t i = 0; i < LU->size1; i++) {
+    for (size_t j = 0; j < LU->size2; j++) {
+      gsl_matrix_float_set(LU, i, j, gsl_matrix_get(LU_double, i, j));
+    }
+  }
+
+  // Free matrix.
+  gsl_matrix_free(LU_double);
+  return;
+}
 
 // LU invert.
-void LUInvert (const gsl_matrix *LU, const gsl_permutation *p,
-	       gsl_matrix *inverse) {
-	gsl_linalg_LU_invert (LU, p, inverse);
-	return;
+void LUInvert(const gsl_matrix *LU, const gsl_permutation *p,
+              gsl_matrix *inverse) {
+  gsl_linalg_LU_invert(LU, p, inverse);
+  return;
 }
 
-void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p,
-	       gsl_matrix_float *inverse) {
-	gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2);
-	gsl_matrix *inverse_double=gsl_matrix_alloc (inverse->size1,
-						     inverse->size2);
-
-	// Copy float matrix to double.
-	for (size_t i=0; i<LU->size1; i++) {
-		for (size_t j=0; j<LU->size2; j++) {
-			gsl_matrix_set (LU_double, i, j,
-					gsl_matrix_float_get(LU, i, j));
-		}
-	}
-
-	// LU decomposition.
-	gsl_linalg_LU_invert (LU_double, p, inverse_double);
-
-	// Copy float matrix to double.
-	for (size_t i=0; i<inverse->size1; i++) {
-		for (size_t j=0; j<inverse->size2; j++) {
-			gsl_matrix_float_set (inverse, i, j,
-					      gsl_matrix_get(inverse_double,
-							     i, j));
-		}
-	}
-
-	// Free matrix.
-	gsl_matrix_free (LU_double);
-	gsl_matrix_free (inverse_double);
-	return;
+void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p,
+              gsl_matrix_float *inverse) {
+  gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
+  gsl_matrix *inverse_double = gsl_matrix_alloc(inverse->size1, inverse->size2);
+
+  // Copy float matrix to double.
+  for (size_t i = 0; i < LU->size1; i++) {
+    for (size_t j = 0; j < LU->size2; j++) {
+      gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
+    }
+  }
+
+  // LU decomposition.
+  gsl_linalg_LU_invert(LU_double, p, inverse_double);
+
+  // Copy float matrix to double.
+  for (size_t i = 0; i < inverse->size1; i++) {
+    for (size_t j = 0; j < inverse->size2; j++) {
+      gsl_matrix_float_set(inverse, i, j, gsl_matrix_get(inverse_double, i, j));
+    }
+  }
+
+  // Free matrix.
+  gsl_matrix_free(LU_double);
+  gsl_matrix_free(inverse_double);
+  return;
 }
 
 // LU lndet.
-double LULndet (gsl_matrix *LU) {
-	double d;
-	d=gsl_linalg_LU_lndet (LU);
-	return d;
+double LULndet(gsl_matrix *LU) {
+  double d;
+  d = gsl_linalg_LU_lndet(LU);
+  return d;
 }
 
-double LULndet (gsl_matrix_float *LU) {
-	gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2);
-	double d;
+double LULndet(gsl_matrix_float *LU) {
+  gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
+  double d;
 
-	// Copy float matrix to double.
-	for (size_t i=0; i<LU->size1; i++) {
-		for (size_t j=0; j<LU->size2; j++) {
-			gsl_matrix_set (LU_double, i, j, gsl_matrix_float_get(LU, i, j));
-		}
-	}
+  // Copy float matrix to double.
+  for (size_t i = 0; i < LU->size1; i++) {
+    for (size_t j = 0; j < LU->size2; j++) {
+      gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
+    }
+  }
 
-	// LU decomposition.
-	d=gsl_linalg_LU_lndet (LU_double);
+  // LU decomposition.
+  d = gsl_linalg_LU_lndet(LU_double);
 
-	// Free matrix
-	gsl_matrix_free (LU_double);
-	return d;
+  // Free matrix
+  gsl_matrix_free(LU_double);
+  return d;
 }
 
-
 // LU solve.
-void LUSolve (const gsl_matrix *LU, const gsl_permutation *p,
-	      const gsl_vector *b, gsl_vector *x) {
-	gsl_linalg_LU_solve (LU, p, b, x);
-	return;
+void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
+             const gsl_vector *b, gsl_vector *x) {
+  gsl_linalg_LU_solve(LU, p, b, x);
+  return;
 }
 
-void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p,
-	      const gsl_vector_float *b, gsl_vector_float *x) {
-	gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2);
-	gsl_vector *b_double=gsl_vector_alloc (b->size);
-	gsl_vector *x_double=gsl_vector_alloc (x->size);
-
-	// Copy float matrix to double.
-	for (size_t i=0; i<LU->size1; i++) {
-		for (size_t j=0; j<LU->size2; j++) {
-			gsl_matrix_set (LU_double, i, j,
-					gsl_matrix_float_get(LU, i, j));
-		}
-	}
-
-	for (size_t i=0; i<b->size; i++) {
-		gsl_vector_set (b_double, i, gsl_vector_float_get(b, i));
-	}
-
-	for (size_t i=0; i<x->size; i++) {
-		gsl_vector_set (x_double, i, gsl_vector_float_get(x, i));
-	}
-
-	// LU decomposition.
-	gsl_linalg_LU_solve (LU_double, p, b_double, x_double);
-
-	// Copy float matrix to double.
-	for (size_t i=0; i<x->size; i++) {
-		gsl_vector_float_set (x, i, gsl_vector_get(x_double, i));
-	}
-
-	// Free matrix.
-	gsl_matrix_free (LU_double);
-	gsl_vector_free (b_double);
-	gsl_vector_free (x_double);
-	return;
-}
+void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p,
+             const gsl_vector_float *b, gsl_vector_float *x) {
+  gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
+  gsl_vector *b_double = gsl_vector_alloc(b->size);
+  gsl_vector *x_double = gsl_vector_alloc(x->size);
+
+  // Copy float matrix to double.
+  for (size_t i = 0; i < LU->size1; i++) {
+    for (size_t j = 0; j < LU->size2; j++) {
+      gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
+    }
+  }
 
+  for (size_t i = 0; i < b->size; i++) {
+    gsl_vector_set(b_double, i, gsl_vector_float_get(b, i));
+  }
+
+  for (size_t i = 0; i < x->size; i++) {
+    gsl_vector_set(x_double, i, gsl_vector_float_get(x, i));
+  }
+
+  // LU decomposition.
+  gsl_linalg_LU_solve(LU_double, p, b_double, x_double);
+
+  // Copy float matrix to double.
+  for (size_t i = 0; i < x->size; i++) {
+    gsl_vector_float_set(x, i, gsl_vector_get(x_double, i));
+  }
+
+  // Free matrix.
+  gsl_matrix_free(LU_double);
+  gsl_vector_free(b_double);
+  gsl_vector_free(x_double);
+  return;
+}
 
 bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) {
-  bool flag=false;
-  int incx=1;
-  int incy=1;
-  int n=(int)x.size();
-  if (x.size()==y.size()) {
-    v=ddot_(&n, &x[0], &incx, &y[0], &incy);
-    flag=true;
+  bool flag = false;
+  int incx = 1;
+  int incy = 1;
+  int n = (int)x.size();
+  if (x.size() == y.size()) {
+    v = ddot_(&n, &x[0], &incx, &y[0], &incy);
+    flag = true;
   }
 
   return flag;
 }
 
-
 bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) {
-  bool flag=false;
-  int incx=1;
-  int incy=1;
-  int n=(int)x.size();
-  if (x.size()==y.size()) {
-    v=sdot_(&n, &x[0], &incx, &y[0], &incy);
-    flag=true;
+  bool flag = false;
+  int incx = 1;
+  int incy = 1;
+  int n = (int)x.size();
+  if (x.size() == y.size()) {
+    v = sdot_(&n, &x[0], &incx, &y[0], &incy);
+    flag = true;
   }
 
   return flag;
diff --git a/src/lapack.h b/src/lapack.h
index 5e1db35..ff02b96 100644
--- a/src/lapack.h
+++ b/src/lapack.h
@@ -23,45 +23,43 @@
 
 using namespace std;
 
-void lapack_float_cholesky_decomp (gsl_matrix_float *A);
-void lapack_cholesky_decomp (gsl_matrix *A);
-void lapack_float_cholesky_solve (gsl_matrix_float *A,
-				  const gsl_vector_float *b,
-				  gsl_vector_float *x);
-void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b, gsl_vector *x);
-void lapack_sgemm (char *TransA, char *TransB, float alpha,
-		   const gsl_matrix_float *A, const gsl_matrix_float *B,
-		   float beta, gsl_matrix_float *C);
-void lapack_dgemm (char *TransA, char *TransB, double alpha,
-		   const gsl_matrix *A, const gsl_matrix *B,
-		   double beta, gsl_matrix *C);
-void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
-			       gsl_matrix_float *evec,
-			       const size_t flag_largematrix);
-void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
-			 const size_t flag_largematrix);
+void lapack_float_cholesky_decomp(gsl_matrix_float *A);
+void lapack_cholesky_decomp(gsl_matrix *A);
+void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b,
+                                 gsl_vector_float *x);
+void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x);
+void lapack_sgemm(char *TransA, char *TransB, float alpha,
+                  const gsl_matrix_float *A, const gsl_matrix_float *B,
+                  float beta, gsl_matrix_float *C);
+void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A,
+                  const gsl_matrix *B, double beta, gsl_matrix *C);
+void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval,
+                              gsl_matrix_float *evec,
+                              const size_t flag_largematrix);
+void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
+                        const size_t flag_largematrix);
 
-double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
-		    const size_t flag_largematrix);
-double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U,
-		    gsl_vector_float *eval, const size_t flag_largematrix);
+double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
+                   const size_t flag_largematrix);
+double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U,
+                   gsl_vector_float *eval, const size_t flag_largematrix);
 
 double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty);
 double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty,
-		     gsl_vector_float *OiXty);
+                     gsl_vector_float *OiXty);
 
-void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum);
-void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum);
-void LUInvert (const gsl_matrix *LU, const gsl_permutation *p,
-	       gsl_matrix *inverse);
-void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p,
-	       gsl_matrix_float *inverse);
-double LULndet (gsl_matrix *LU);
-double LULndet (gsl_matrix_float *LU);
-void LUSolve (const gsl_matrix *LU, const gsl_permutation *p,
-	      const gsl_vector *b, gsl_vector *x);
-void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p,
-	      const gsl_vector_float *b, gsl_vector_float *x);
+void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum);
+void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum);
+void LUInvert(const gsl_matrix *LU, const gsl_permutation *p,
+              gsl_matrix *inverse);
+void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p,
+              gsl_matrix_float *inverse);
+double LULndet(gsl_matrix *LU);
+double LULndet(gsl_matrix_float *LU);
+void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
+             const gsl_vector *b, gsl_vector *x);
+void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p,
+             const gsl_vector_float *b, gsl_vector_float *x);
 
 bool lapack_ddot(vector<double> &x, vector<double> &y, double &v);
 bool lapack_sdot(vector<float> &x, vector<float> &y, double &v);
diff --git a/src/ldr.cpp b/src/ldr.cpp
index f0a1b37..3554efa 100644
--- a/src/ldr.cpp
+++ b/src/ldr.cpp
@@ -16,67 +16,65 @@
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-#include <iomanip>
+#include <algorithm>
 #include <cmath>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
 #include <iostream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <ctime>
-#include <cstring>
-#include <algorithm>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
+#include "Eigen/Dense"
 #include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
 #include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
 #include "gsl/gsl_randist.h"
-#include "gsl/gsl_cdf.h"
 #include "gsl/gsl_roots.h"
-#include "Eigen/Dense"
+#include "gsl/gsl_vector.h"
 
 #include "lapack.h"
-#include "param.h"
 #include "ldr.h"
 #include "lm.h"
 #include "mathfunc.h"
+#include "param.h"
 
 using namespace std;
 using namespace Eigen;
 
-void LDR::CopyFromParam (PARAM &cPar) {
-	a_mode=cPar.a_mode;
-	d_pace=cPar.d_pace;
+void LDR::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
+  d_pace = cPar.d_pace;
 
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
 
-	ni_total=cPar.ni_total;
-	ns_total=cPar.ns_total;
-	ni_test=cPar.ni_test;
-	ns_test=cPar.ns_test;
-	n_cvt=cPar.n_cvt;
+  ni_total = cPar.ni_total;
+  ns_total = cPar.ns_total;
+  ni_test = cPar.ni_test;
+  ns_test = cPar.ns_test;
+  n_cvt = cPar.n_cvt;
 
-	indicator_idv=cPar.indicator_idv;
-	indicator_snp=cPar.indicator_snp;
-	snpInfo=cPar.snpInfo;
+  indicator_idv = cPar.indicator_idv;
+  indicator_snp = cPar.indicator_snp;
+  snpInfo = cPar.snpInfo;
 
-	return;
+  return;
 }
 
-void LDR::CopyToParam (PARAM &cPar) {
-	return;
-}
+void LDR::CopyToParam(PARAM &cPar) { return; }
 
-//X is a p by n matrix.
-void LDR::VB (const vector<vector<unsigned char> > &Xt,
-	      const gsl_matrix *W_gsl, const gsl_vector *y_gsl) {
+// X is a p by n matrix.
+void LDR::VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl,
+             const gsl_vector *y_gsl) {
 
   // Save gsl_vector and gsl_matrix into Eigen library formats.
   MatrixXd W(W_gsl->size1, W_gsl->size2);
@@ -84,20 +82,21 @@ void LDR::VB (const vector<vector<unsigned char> > &Xt,
   VectorXd x_col(y_gsl->size);
 
   double d;
-  for (size_t i=0; i<W_gsl->size1; i++) {
-    d=gsl_vector_get(y_gsl, i);
-    y(i)=d;
-    for (size_t j=0; j<W_gsl->size2; j++) {
-      W(i,j)=gsl_matrix_get(W_gsl, i, j);
+  for (size_t i = 0; i < W_gsl->size1; i++) {
+    d = gsl_vector_get(y_gsl, i);
+    y(i) = d;
+    for (size_t j = 0; j < W_gsl->size2; j++) {
+      W(i, j) = gsl_matrix_get(W_gsl, i, j);
     }
   }
 
   // Initial VB values by lm.
-  cout<<indicator_snp[0]<<" "<<indicator_snp[1]<<" "<<indicator_snp[2]<<endl;
-  uchar_matrix_get_row (Xt, 0, x_col);
+  cout << indicator_snp[0] << " " << indicator_snp[1] << " " << indicator_snp[2]
+       << endl;
+  uchar_matrix_get_row(Xt, 0, x_col);
 
-  for (size_t j=0; j<10; j++) {
-    cout<<x_col(j)<<endl;
+  for (size_t j = 0; j < 10; j++) {
+    cout << x_col(j) << endl;
   }
 
   // Run VB iterations.
diff --git a/src/ldr.h b/src/ldr.h
index ab55fe2..6720689 100644
--- a/src/ldr.h
+++ b/src/ldr.h
@@ -19,53 +19,51 @@
 #ifndef __LDR_H__
 #define __LDR_H__
 
-#include <vector>
-#include <map>
-#include <gsl/gsl_rng.h>
-#include <gsl/gsl_randist.h>
 #include "param.h"
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_rng.h>
+#include <map>
+#include <vector>
 
 using namespace std;
 
 class LDR {
 
 public:
-	// IO-related parameters.
-	int a_mode;
-	size_t d_pace;
+  // IO-related parameters.
+  int a_mode;
+  size_t d_pace;
 
-	string file_bfile;
-	string file_geno;
-	string file_out;
-	string path_out;
+  string file_bfile;
+  string file_geno;
+  string file_out;
+  string path_out;
 
-	// Summary statistics.
-	size_t ni_total, ns_total; // Total number of individuals & SNPs.
-	size_t ni_test, ns_test;   // Number of individuals & SNPs used
-                                   // for analysis
-	size_t n_cvt;		   // Number of covariates.
+  // Summary statistics.
+  size_t ni_total, ns_total; // Total number of individuals & SNPs.
+  size_t ni_test, ns_test;   // Number of individuals & SNPs used
+                             // for analysis
+  size_t n_cvt;              // Number of covariates.
 
-        // Indicator for individuals (phenotypes): 0 missing, 1
-        // available for analysis.
-	vector<int> indicator_idv;
+  // Indicator for individuals (phenotypes): 0 missing, 1
+  // available for analysis.
+  vector<int> indicator_idv;
 
-       	// Sequence indicator for SNPs: 0 ignored because of (a) maf,
-       	// (b) miss, (c) non-poly; 1 available for analysis.
-	vector<int> indicator_snp;
+  // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+  // (b) miss, (c) non-poly; 1 available for analysis.
+  vector<int> indicator_snp;
 
-	vector<SNPINFO> snpInfo; // Record SNP information.
+  vector<SNPINFO> snpInfo; // Record SNP information.
 
-	// Not included in PARAM.
-	gsl_rng *gsl_r;
+  // Not included in PARAM.
+  gsl_rng *gsl_r;
 
-	// Main functions.
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
+  // Main functions.
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
 
-	void VB(const vector<vector<unsigned char> > &Xt,
-		const gsl_matrix *W_gsl, const gsl_vector *y_gsl);
+  void VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl,
+          const gsl_vector *y_gsl);
 };
 
 #endif
-
-
diff --git a/src/lm.cpp b/src/lm.cpp
index 94729db..f8fc43d 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -16,28 +16,28 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-#include <iomanip>
+#include <assert.h>
+#include <bitset>
 #include <cmath>
+#include <cstring>
+#include <iomanip>
 #include <iostream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <assert.h>
-#include <bitset>
-#include <cstring>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 
 #include "gsl/gsl_cdf.h"
-#include "gsl/gsl_roots.h"
-#include "gsl/gsl_min.h"
 #include "gsl/gsl_integration.h"
+#include "gsl/gsl_min.h"
+#include "gsl/gsl_roots.h"
 
 #include "eigenlib.h"
 #include "gzstream.h"
@@ -46,783 +46,835 @@
 
 using namespace std;
 
-void LM::CopyFromParam (PARAM &cPar) {
-	a_mode=cPar.a_mode;
-	d_pace=cPar.d_pace;
+void LM::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
+  d_pace = cPar.d_pace;
 
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
-	file_gene=cPar.file_gene;
-	// WJA added
-	file_oxford=cPar.file_oxford;
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
+  file_gene = cPar.file_gene;
+  // WJA added
+  file_oxford = cPar.file_oxford;
 
-	time_opt=0.0;
+  time_opt = 0.0;
 
-	ni_total=cPar.ni_total;
-	ns_total=cPar.ns_total;
-	ni_test=cPar.ni_test;
-	ns_test=cPar.ns_test;
-	n_cvt=cPar.n_cvt;
+  ni_total = cPar.ni_total;
+  ns_total = cPar.ns_total;
+  ni_test = cPar.ni_test;
+  ns_test = cPar.ns_test;
+  n_cvt = cPar.n_cvt;
 
-	ng_total=cPar.ng_total;
-	ng_test=0;
+  ng_total = cPar.ng_total;
+  ng_test = 0;
 
-	indicator_idv=cPar.indicator_idv;
-	indicator_snp=cPar.indicator_snp;
-	snpInfo=cPar.snpInfo;
+  indicator_idv = cPar.indicator_idv;
+  indicator_snp = cPar.indicator_snp;
+  snpInfo = cPar.snpInfo;
 
-	return;
+  return;
 }
 
-void LM::CopyToParam (PARAM &cPar) {
-	cPar.time_opt=time_opt;
-	cPar.ng_test=ng_test;
-	return;
+void LM::CopyToParam(PARAM &cPar) {
+  cPar.time_opt = time_opt;
+  cPar.ng_test = ng_test;
+  return;
 }
 
-void LM::WriteFiles () {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".assoc.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout << "error writing file: " << file_str.c_str() << endl;
-	  return;
-	}
-
-	if (!file_gene.empty()) {
-		outfile<<"geneID"<<"\t";
-
-		if (a_mode==51) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
-		} else if (a_mode==52) {
-			outfile<<"p_lrt"<<endl;
-		} else if (a_mode==53) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
-		} else if (a_mode==54) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<
-			  "\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
-		} else {}
-
-		for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
-			outfile<<snpInfo[t].rs_number<<"\t";
-
-			if (a_mode==51) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].p_wald <<endl;
-			} else if (a_mode==52) {
-				outfile<<scientific<<setprecision(6)<<
-				  "\t"<<sumStat[t].p_lrt<<endl;
-			} else if (a_mode==53) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].p_score<<endl;
-			} else if (a_mode==54) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].p_wald <<"\t"<<
-				  sumStat[t].p_lrt<<"\t"<<
-				  sumStat[t].p_score<<endl;
-			} else {}
-		}
-	}  else {
-		outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"<<
-		  "\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<
-		  "af"<<"\t";
-
-		if (a_mode==51) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
-		} else if (a_mode==52) {
-			outfile<<"p_lrt"<<endl;
-		} else if (a_mode==53) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
-		} else if (a_mode==54) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"
-			       <<"p_lrt"<<"\t"<<"p_score"<<endl;
-		} else {}
-
-		size_t t=0;
-		for (size_t i=0; i<snpInfo.size(); ++i) {
-			if (indicator_snp[i]==0) {continue;}
-
-			outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<
-			  "\t"<<snpInfo[i].base_position<<"\t"<<
-			  snpInfo[i].n_miss<<"\t"<<ni_test-snpInfo[i].n_miss<<
-			  "\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<
-			  "\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
-			if (a_mode==51) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].p_wald <<endl;
-			} else if (a_mode==52) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].p_lrt<<endl;
-			} else if (a_mode==53) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].p_score<<endl;
-			} else if (a_mode==54) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].p_wald <<"\t"<<
-				  sumStat[t].p_lrt<<"\t"<<
-				  sumStat[t].p_score<<endl;
-			} else {}
-			t++;
-		}
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+void LM::WriteFiles() {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".assoc.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  if (!file_gene.empty()) {
+    outfile << "geneID"
+            << "\t";
+
+    if (a_mode == 51) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "p_wald" << endl;
+    } else if (a_mode == 52) {
+      outfile << "p_lrt" << endl;
+    } else if (a_mode == 53) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "p_score" << endl;
+    } else if (a_mode == 54) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "p_wald"
+              << "\t"
+              << "p_lrt"
+              << "\t"
+              << "p_score" << endl;
+    } else {
+    }
+
+    for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) {
+      outfile << snpInfo[t].rs_number << "\t";
+
+      if (a_mode == 51) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].p_wald << endl;
+      } else if (a_mode == 52) {
+        outfile << scientific << setprecision(6) << "\t" << sumStat[t].p_lrt
+                << endl;
+      } else if (a_mode == 53) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
+      } else if (a_mode == 54) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].p_wald << "\t"
+                << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
+      } else {
+      }
+    }
+  } else {
+    outfile << "chr"
+            << "\t"
+            << "rs"
+            << "\t"
+            << "ps"
+            << "\t"
+            << "n_mis"
+            << "\t"
+            << "n_obs"
+            << "\t"
+            << "allele1"
+            << "\t"
+            << "allele0"
+            << "\t"
+            << "af"
+            << "\t";
+
+    if (a_mode == 51) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "p_wald" << endl;
+    } else if (a_mode == 52) {
+      outfile << "p_lrt" << endl;
+    } else if (a_mode == 53) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "p_score" << endl;
+    } else if (a_mode == 54) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "p_wald"
+              << "\t"
+              << "p_lrt"
+              << "\t"
+              << "p_score" << endl;
+    } else {
+    }
+
+    size_t t = 0;
+    for (size_t i = 0; i < snpInfo.size(); ++i) {
+      if (indicator_snp[i] == 0) {
+        continue;
+      }
+
+      outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+              << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
+              << ni_test - snpInfo[i].n_miss << "\t" << snpInfo[i].a_minor
+              << "\t" << snpInfo[i].a_major << "\t" << fixed << setprecision(3)
+              << snpInfo[i].maf << "\t";
+
+      if (a_mode == 51) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].p_wald << endl;
+      } else if (a_mode == 52) {
+        outfile << scientific << setprecision(6) << sumStat[t].p_lrt << endl;
+      } else if (a_mode == 53) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
+      } else if (a_mode == 54) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].p_wald << "\t"
+                << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
+      } else {
+      }
+      t++;
+    }
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
 void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty,
-	     const gsl_vector *Wtx, const gsl_vector *y,
-	     const gsl_vector *x,  double &xPwy, double &xPwx) {
-	size_t c_size=Wty->size;
-	double d;
+             const gsl_vector *Wtx, const gsl_vector *y, const gsl_vector *x,
+             double &xPwy, double &xPwx) {
+  size_t c_size = Wty->size;
+  double d;
 
-	gsl_vector *WtWiWtx=gsl_vector_alloc (c_size);
+  gsl_vector *WtWiWtx = gsl_vector_alloc(c_size);
 
-	gsl_blas_ddot (x, x, &xPwx);
-	gsl_blas_ddot (x, y, &xPwy);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+  gsl_blas_ddot(x, x, &xPwx);
+  gsl_blas_ddot(x, y, &xPwy);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
 
-	gsl_blas_ddot (WtWiWtx, Wtx, &d);
-	xPwx-=d;
+  gsl_blas_ddot(WtWiWtx, Wtx, &d);
+  xPwx -= d;
 
-	gsl_blas_ddot (WtWiWtx, Wty, &d);
-	xPwy-=d;
+  gsl_blas_ddot(WtWiWtx, Wty, &d);
+  xPwy -= d;
 
-	gsl_vector_free (WtWiWtx);
+  gsl_vector_free(WtWiWtx);
 
-	return;
+  return;
 }
 
-void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty,
-	     const gsl_vector *y, double &yPwy) {
-	size_t c_size=Wty->size;
-	double d;
+void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y,
+             double &yPwy) {
+  size_t c_size = Wty->size;
+  double d;
 
-	gsl_vector *WtWiWty=gsl_vector_alloc (c_size);
+  gsl_vector *WtWiWty = gsl_vector_alloc(c_size);
 
-	gsl_blas_ddot (y, y, &yPwy);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);
+  gsl_blas_ddot(y, y, &yPwy);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);
 
-	gsl_blas_ddot (WtWiWty, Wty, &d);
-	yPwy-=d;
+  gsl_blas_ddot(WtWiWty, Wty, &d);
+  yPwy -= d;
 
-	gsl_vector_free (WtWiWty);
+  gsl_vector_free(WtWiWty);
 
-	return;
+  return;
 }
 
 // Calculate p-values and beta/se in a linear model.
-void LmCalcP (const size_t test_mode, const double yPwy,
-	      const double xPwy, const double xPwx, const double df,
-	      const size_t n_size, double &beta, double &se,
-	      double &p_wald, double &p_lrt, double &p_score) {
-	double yPxy=yPwy-xPwy*xPwy/xPwx;
-	double se_wald, se_score;
-
-	beta=xPwy/xPwx;
-	se_wald=sqrt(yPxy/(df*xPwx) );
-	se_score=sqrt(yPwy/((double)n_size*xPwx) );
-
-	p_wald=gsl_cdf_fdist_Q (beta*beta/(se_wald*se_wald), 1.0, df);
-	p_score=gsl_cdf_fdist_Q (beta*beta/(se_score*se_score), 1.0, df);
-	p_lrt=gsl_cdf_chisq_Q ((double)n_size*(log(yPwy)-log(yPxy)), 1);
-
-	if (test_mode==3) {se=se_score;} else {se=se_wald;}
-
-	return;
+void LmCalcP(const size_t test_mode, const double yPwy, const double xPwy,
+             const double xPwx, const double df, const size_t n_size,
+             double &beta, double &se, double &p_wald, double &p_lrt,
+             double &p_score) {
+  double yPxy = yPwy - xPwy * xPwy / xPwx;
+  double se_wald, se_score;
+
+  beta = xPwy / xPwx;
+  se_wald = sqrt(yPxy / (df * xPwx));
+  se_score = sqrt(yPwy / ((double)n_size * xPwx));
+
+  p_wald = gsl_cdf_fdist_Q(beta * beta / (se_wald * se_wald), 1.0, df);
+  p_score = gsl_cdf_fdist_Q(beta * beta / (se_score * se_score), 1.0, df);
+  p_lrt = gsl_cdf_chisq_Q((double)n_size * (log(yPwy) - log(yPxy)), 1);
+
+  if (test_mode == 3) {
+    se = se_score;
+  } else {
+    se = se_wald;
+  }
+
+  return;
 }
 
-void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x) {
-	ifstream infile (file_gene.c_str(), ifstream::in);
-	if (!infile) {
-	  cout<<"error reading gene expression file:"<<file_gene<<endl;
-	  return;
-	}
+void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
+  ifstream infile(file_gene.c_str(), ifstream::in);
+  if (!infile) {
+    cout << "error reading gene expression file:" << file_gene << endl;
+    return;
+  }
 
-	clock_t time_start=clock();
+  clock_t time_start = clock();
 
-	string line;
-	char *ch_ptr;
+  string line;
+  char *ch_ptr;
 
-	double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
-	int c_phen;
-	string rs; // Gene id.
-	double d;
+  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
+  int c_phen;
+  string rs; // Gene id.
+  double d;
 
-	// Calculate some basic quantities.
-	double yPwy, xPwy, xPwx;
-	double df=(double)W->size1-(double)W->size2-1.0;
+  // Calculate some basic quantities.
+  double yPwy, xPwy, xPwx;
+  double df = (double)W->size1 - (double)W->size2 - 1.0;
 
-	gsl_vector *y=gsl_vector_alloc (W->size1);
+  gsl_vector *y = gsl_vector_alloc(W->size1);
 
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *Wty=gsl_vector_alloc (W->size2);
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *Wty = gsl_vector_alloc(W->size2);
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
 
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
 
-	gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
-	CalcvPv(WtWi, Wtx, x, xPwx);
+  gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+  CalcvPv(WtWi, Wtx, x, xPwx);
 
-	// Header.
-	getline(infile, line);
+  // Header.
+  getline(infile, line);
 
-	for (size_t t=0; t<ng_total; t++) {
-		getline(infile, line);
-		if (t%d_pace==0 || t==ng_total-1) {
-		  ProgressBar ("Performing Analysis ", t, ng_total-1);
-		}
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		rs=ch_ptr;
+  for (size_t t = 0; t < ng_total; t++) {
+    getline(infile, line);
+    if (t % d_pace == 0 || t == ng_total - 1) {
+      ProgressBar("Performing Analysis ", t, ng_total - 1);
+    }
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
 
-		c_phen=0;
-		for (size_t i=0; i<indicator_idv.size(); ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==0) {continue;}
+    c_phen = 0;
+    for (size_t i = 0; i < indicator_idv.size(); ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
 
-			d=atof(ch_ptr);
-			gsl_vector_set(y, c_phen, d);
+      d = atof(ch_ptr);
+      gsl_vector_set(y, c_phen, d);
 
-			c_phen++;
-		}
+      c_phen++;
+    }
 
-		// Calculate statistics.
-		time_start=clock();
+    // Calculate statistics.
+    time_start = clock();
 
-		gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
-		CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy);
-		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
-			 beta, se, p_wald, p_lrt, p_score);
+    gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
+    CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy);
+    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
+            p_lrt, p_score);
 
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
-		// Store summary data.
-		SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
-	}
-	cout<<endl;
+    // Store summary data.
+    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
 
-	gsl_vector_free(y);
+  gsl_vector_free(y);
 
-	gsl_matrix_free(WtW);
-	gsl_matrix_free(WtWi);
-	gsl_vector_free(Wty);
-	gsl_vector_free(Wtx);
-	gsl_permutation_free(pmt);
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_vector_free(Wty);
+  gsl_vector_free(Wtx);
+  gsl_permutation_free(pmt);
 
-	infile.close();
-	infile.clear();
+  infile.close();
+  infile.clear();
 
-	return;
+  return;
 }
 
 // WJA added
-void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) {
-	string file_bgen=file_oxford+".bgen";
-	ifstream infile (file_bgen.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bgen file:"<<file_bgen<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-
-	string line;
-	char *ch_ptr;
-
-	double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
-	int n_miss, c_phen;
-	double geno, x_mean;
-
-	// Calculate some basic quantities.
-	double yPwy, xPwy, xPwx;
-	double df=(double)W->size1-(double)W->size2-1.0;
-
-	gsl_vector *x=gsl_vector_alloc (W->size1);
-	gsl_vector *x_miss=gsl_vector_alloc (W->size1);
-
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *Wty=gsl_vector_alloc (W->size2);
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
-	CalcvPv(WtWi, Wty, y, yPwy);
-
-	// Read in header.
-	uint32_t bgen_snp_block_offset;
-	uint32_t bgen_header_length;
-	uint32_t bgen_nsamples;
-	uint32_t bgen_nsnps;
-	uint32_t bgen_flags;
-	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
-	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
-	bgen_snp_block_offset-=4;
-	infile.ignore(4+bgen_header_length-20);
-	bgen_snp_block_offset-=4+bgen_header_length-20;
-	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
-	bgen_snp_block_offset-=4;
-	bool CompressedSNPBlocks=bgen_flags&0x1;
-
-	infile.ignore(bgen_snp_block_offset);
-
-	double bgen_geno_prob_AA, bgen_geno_prob_AB;
-	double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
-	uint32_t bgen_N;
-	uint16_t bgen_LS;
-	uint16_t bgen_LR;
-	uint16_t bgen_LC;
-	uint32_t bgen_SNP_pos;
-	uint32_t bgen_LA;
-	std::string bgen_A_allele;
-	uint32_t bgen_LB;
-	std::string bgen_B_allele;
-	uint32_t bgen_P;
-	size_t unzipped_data_size;
-	string id;
-	string rs;
-	string chr;
-	std::cout << "Warning: WJA hard coded SNP missingness " <<
-	  "threshold of 10%" << std::endl;
-
-	// Start reading genotypes and analyze.
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		if (t%d_pace==0 || t==(ns_total-1)) {
-		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
-		}
-
-		// Read SNP header.
-		id.clear();
-		rs.clear();
-		chr.clear();
-		bgen_A_allele.clear();
-		bgen_B_allele.clear();
-
-		infile.read(reinterpret_cast<char*>(&bgen_N),4);
-		infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
-		id.resize(bgen_LS);
-		infile.read(&id[0], bgen_LS);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
-		rs.resize(bgen_LR);
-		infile.read(&rs[0], bgen_LR);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
-		chr.resize(bgen_LC);
-		infile.read(&chr[0], bgen_LC);
-
-		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
-		bgen_A_allele.resize(bgen_LA);
-		infile.read(&bgen_A_allele[0], bgen_LA);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
-		bgen_B_allele.resize(bgen_LB);
-		infile.read(&bgen_B_allele[0], bgen_LB);
-
-		uint16_t unzipped_data[3*bgen_N];
-
-		if (indicator_snp[t]==0) {
-			if(CompressedSNPBlocks)
-			  infile.read(reinterpret_cast<char*>(&bgen_P),4);
-			else
-			  bgen_P=6*bgen_N;
-
-			infile.ignore(static_cast<size_t>(bgen_P));
-
-			continue;
-		}
-
-		if(CompressedSNPBlocks) {
-			infile.read(reinterpret_cast<char*>(&bgen_P),4);
-			uint8_t zipped_data[bgen_P];
-
-			unzipped_data_size=6*bgen_N;
-
-			infile.read(reinterpret_cast<char*>(zipped_data),
-				    bgen_P);
-
-			int result=
-			  uncompress(reinterpret_cast<Bytef*>(unzipped_data),
-			    reinterpret_cast<uLongf*>(&unzipped_data_size),
-			    reinterpret_cast<Bytef*>(zipped_data),
-			    static_cast<uLong> (bgen_P));
-			assert(result == Z_OK);
-
-		}
-		else
-		{
-
-			bgen_P=6*bgen_N;
-			infile.read(reinterpret_cast<char*>(unzipped_data),
-				    bgen_P);
-		}
-
-		x_mean=0.0; c_phen=0; n_miss=0;
-		gsl_vector_set_zero(x_miss);
-		for (size_t i=0; i<bgen_N; ++i) {
-			if (indicator_idv[i]==0) {continue;}
-
-
-			  bgen_geno_prob_AA=
-			    static_cast<double>(unzipped_data[i*3])/32768.0;
-			  bgen_geno_prob_AB=
-			    static_cast<double>(unzipped_data[i*3+1])/32768.0;
-			  bgen_geno_prob_BB=
-			    static_cast<double>(unzipped_data[i*3+2])/32768.0;
-
-				// WJA
-			  bgen_geno_prob_non_miss=
-			    bgen_geno_prob_AA +
-			    bgen_geno_prob_AB +
-			    bgen_geno_prob_BB;
-			  if (bgen_geno_prob_non_miss<0.9) {
-			    gsl_vector_set(x_miss, c_phen, 0.0);
-			    n_miss++;
-			  }
-			  else {
-				bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
-				bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
-				bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
-				geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
-				gsl_vector_set(x, c_phen, geno);
-				gsl_vector_set(x_miss, c_phen, 1.0);
-				x_mean+=geno;
-			}
-			c_phen++;
-		}
-
-		x_mean/=static_cast<double>(ni_test-n_miss);
-
-		for (size_t i=0; i<ni_test; ++i) {
-			if (gsl_vector_get (x_miss, i)==0) {
-			  gsl_vector_set(x, i, x_mean);
-			}
-			geno=gsl_vector_get(x, i);
-		}
-
-		// Calculate statistics.
-		time_start=clock();
-
-		gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
-		CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
-		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
-			 beta, se, p_wald, p_lrt, p_score);
-
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		// Store summary data.
-		SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
-	}
-	cout<<endl;
-
-	gsl_vector_free(x);
-	gsl_vector_free(x_miss);
-
-	gsl_matrix_free(WtW);
-	gsl_matrix_free(WtWi);
-	gsl_vector_free(Wty);
-	gsl_vector_free(Wtx);
-	gsl_permutation_free(pmt);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) {
+  string file_bgen = file_oxford + ".bgen";
+  ifstream infile(file_bgen.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bgen file:" << file_bgen << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+
+  string line;
+  char *ch_ptr;
+
+  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
+  int n_miss, c_phen;
+  double geno, x_mean;
+
+  // Calculate some basic quantities.
+  double yPwy, xPwy, xPwx;
+  double df = (double)W->size1 - (double)W->size2 - 1.0;
+
+  gsl_vector *x = gsl_vector_alloc(W->size1);
+  gsl_vector *x_miss = gsl_vector_alloc(W->size1);
+
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *Wty = gsl_vector_alloc(W->size2);
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
+  CalcvPv(WtWi, Wty, y, yPwy);
+
+  // Read in header.
+  uint32_t bgen_snp_block_offset;
+  uint32_t bgen_header_length;
+  uint32_t bgen_nsamples;
+  uint32_t bgen_nsnps;
+  uint32_t bgen_flags;
+  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+  bgen_snp_block_offset -= 4;
+  infile.ignore(4 + bgen_header_length - 20);
+  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+  bgen_snp_block_offset -= 4;
+  bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+  infile.ignore(bgen_snp_block_offset);
+
+  double bgen_geno_prob_AA, bgen_geno_prob_AB;
+  double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+  uint32_t bgen_N;
+  uint16_t bgen_LS;
+  uint16_t bgen_LR;
+  uint16_t bgen_LC;
+  uint32_t bgen_SNP_pos;
+  uint32_t bgen_LA;
+  std::string bgen_A_allele;
+  uint32_t bgen_LB;
+  std::string bgen_B_allele;
+  uint32_t bgen_P;
+  size_t unzipped_data_size;
+  string id;
+  string rs;
+  string chr;
+  std::cout << "Warning: WJA hard coded SNP missingness "
+            << "threshold of 10%" << std::endl;
+
+  // Start reading genotypes and analyze.
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+
+    // Read SNP header.
+    id.clear();
+    rs.clear();
+    chr.clear();
+    bgen_A_allele.clear();
+    bgen_B_allele.clear();
+
+    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+    id.resize(bgen_LS);
+    infile.read(&id[0], bgen_LS);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+    rs.resize(bgen_LR);
+    infile.read(&rs[0], bgen_LR);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+    chr.resize(bgen_LC);
+    infile.read(&chr[0], bgen_LC);
+
+    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+    bgen_A_allele.resize(bgen_LA);
+    infile.read(&bgen_A_allele[0], bgen_LA);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+    bgen_B_allele.resize(bgen_LB);
+    infile.read(&bgen_B_allele[0], bgen_LB);
+
+    uint16_t unzipped_data[3 * bgen_N];
+
+    if (indicator_snp[t] == 0) {
+      if (CompressedSNPBlocks)
+        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      else
+        bgen_P = 6 * bgen_N;
+
+      infile.ignore(static_cast<size_t>(bgen_P));
+
+      continue;
+    }
+
+    if (CompressedSNPBlocks) {
+      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      uint8_t zipped_data[bgen_P];
+
+      unzipped_data_size = 6 * bgen_N;
+
+      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+
+      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+                              reinterpret_cast<uLongf *>(&unzipped_data_size),
+                              reinterpret_cast<Bytef *>(zipped_data),
+                              static_cast<uLong>(bgen_P));
+      assert(result == Z_OK);
+
+    } else {
+
+      bgen_P = 6 * bgen_N;
+      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+    }
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < bgen_N; ++i) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+      bgen_geno_prob_AB =
+          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+      bgen_geno_prob_BB =
+          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+
+      // WJA
+      bgen_geno_prob_non_miss =
+          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+      if (bgen_geno_prob_non_miss < 0.9) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= static_cast<double>(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+      geno = gsl_vector_get(x, i);
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+
+    gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+    CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
+            p_lrt, p_score);
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_vector_free(Wty);
+  gsl_vector_free(Wtx);
+  gsl_permutation_free(pmt);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout << "error reading genotype file:" << file_geno << endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-
-	string line;
-	char *ch_ptr;
-
-	double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
-	int n_miss, c_phen;
-	double geno, x_mean;
-
-	// Calculate some basic quantities.
-	double yPwy, xPwy, xPwx;
-	double df=(double)W->size1-(double)W->size2-1.0;
-
-	gsl_vector *x=gsl_vector_alloc (W->size1);
-	gsl_vector *x_miss=gsl_vector_alloc (W->size1);
-
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *Wty=gsl_vector_alloc (W->size2);
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
-	CalcvPv(WtWi, Wty, y, yPwy);
-
-	// Start reading genotypes and analyze.
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		getline(infile, line);
-		if (t%d_pace==0 || t==(ns_total-1)) {
-		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		x_mean=0.0; c_phen=0; n_miss=0;
-		gsl_vector_set_zero(x_miss);
-		for (size_t i=0; i<ni_total; ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==0) {continue;}
-
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set(x_miss, c_phen, 0.0);
-			  n_miss++;
-			}
-			else {
-				geno=atof(ch_ptr);
-
-				gsl_vector_set(x, c_phen, geno);
-				gsl_vector_set(x_miss, c_phen, 1.0);
-				x_mean+=geno;
-			}
-			c_phen++;
-		}
-
-		x_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<ni_test; ++i) {
-			if (gsl_vector_get (x_miss, i)==0) {
-			  gsl_vector_set(x, i, x_mean);
-			}
-			geno=gsl_vector_get(x, i);
-		}
-
-		// Calculate statistics.
-		time_start=clock();
-
-		gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
-		CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
-		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
-			 beta, se, p_wald, p_lrt, p_score);
-
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		// Store summary data.
-		SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
-	}
-	cout<<endl;
-
-	gsl_vector_free(x);
-	gsl_vector_free(x_miss);
-
-	gsl_matrix_free(WtW);
-	gsl_matrix_free(WtWi);
-	gsl_vector_free(Wty);
-	gsl_vector_free(Wtx);
-	gsl_permutation_free(pmt);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+
+  string line;
+  char *ch_ptr;
+
+  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
+  int n_miss, c_phen;
+  double geno, x_mean;
+
+  // Calculate some basic quantities.
+  double yPwy, xPwy, xPwx;
+  double df = (double)W->size1 - (double)W->size2 - 1.0;
+
+  gsl_vector *x = gsl_vector_alloc(W->size1);
+  gsl_vector *x_miss = gsl_vector_alloc(W->size1);
+
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *Wty = gsl_vector_alloc(W->size2);
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
+  CalcvPv(WtWi, Wty, y, yPwy);
+
+  // Start reading genotypes and analyze.
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    getline(infile, line);
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+      geno = gsl_vector_get(x, i);
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+
+    gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+    CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
+            p_lrt, p_score);
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_vector_free(Wty);
+  gsl_vector_free(Wtx);
+  gsl_permutation_free(pmt);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-
-	char ch[1];
-	bitset<8> b;
-
-	double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
-	int n_bit, n_miss, ci_total, ci_test;
-	double geno, x_mean;
-
-	// Calculate some basic quantities.
-	double yPwy, xPwy, xPwx;
-	double df=(double)W->size1-(double)W->size2-1.0;
-
-	gsl_vector *x=gsl_vector_alloc (W->size1);
-
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *Wty=gsl_vector_alloc (W->size2);
-	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
-	CalcvPv(WtWi, Wty, y, yPwy);
-
-	// Calculate n_bit and c, the number of bit for each SNP.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1;}
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-		if (t%d_pace==0 || t==snpInfo.size()-1) {
-		  ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
-		for (int i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-	                // Minor allele homozygous: 2.0; major: 0.0;
-			for (size_t j=0; j<4; ++j) {
-			  if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
-			    break;
-			  }
-			  if (indicator_idv[ci_total]==0) {
-			    ci_total++;
-			    continue;
-			  }
-
-			  if (b[2*j]==0) {
-			    if (b[2*j+1]==0) {
-			      gsl_vector_set(x, ci_test, 2);
-			      x_mean+=2.0;
-			    }
-			    else {
-			      gsl_vector_set(x, ci_test, 1);
-			      x_mean+=1.0; }
-			    }
-			  else {
-			    if (b[2*j+1]==1) {
-			      gsl_vector_set(x, ci_test, 0);
-			    }
-			    else {
-			      gsl_vector_set(x, ci_test, -9);
-			      n_miss++;
-			    }
-			  }
-
-			ci_total++;
-			ci_test++;
-			}
-		}
-
-		x_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<ni_test; ++i) {
-			geno=gsl_vector_get(x,i);
-			if (geno==-9) {
-			  gsl_vector_set(x, i, x_mean);
-			  geno=x_mean;
-			}
-		}
-
-		// Calculate statistics.
-		time_start=clock();
-
-		gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
-		CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
-		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
-			 beta, se, p_wald, p_lrt, p_score);
-
-		//store summary data
-		SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
-
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	}
-	cout<<endl;
-
-	gsl_vector_free(x);
-
-	gsl_matrix_free(WtW);
-	gsl_matrix_free(WtWi);
-	gsl_vector_free(Wty);
-	gsl_vector_free(Wtx);
-	gsl_permutation_free(pmt);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+
+  char ch[1];
+  bitset<8> b;
+
+  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
+  int n_bit, n_miss, ci_total, ci_test;
+  double geno, x_mean;
+
+  // Calculate some basic quantities.
+  double yPwy, xPwy, xPwx;
+  double df = (double)W->size1 - (double)W->size2 - 1.0;
+
+  gsl_vector *x = gsl_vector_alloc(W->size1);
+
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *Wty = gsl_vector_alloc(W->size2);
+  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  int sig;
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
+  CalcvPv(WtWi, Wty, y, yPwy);
+
+  // Calculate n_bit and c, the number of bit for each SNP.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0;
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(x, ci_test, 2);
+            x_mean += 2.0;
+          } else {
+            gsl_vector_set(x, ci_test, 1);
+            x_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(x, ci_test, 0);
+          } else {
+            gsl_vector_set(x, ci_test, -9);
+            n_miss++;
+          }
+        }
+
+        ci_total++;
+        ci_test++;
+      }
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+
+    gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+    CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
+            p_lrt, p_score);
+
+    // store summary data
+    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+    sumStat.push_back(SNPs);
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_vector_free(Wty);
+  gsl_vector_free(Wtx);
+  gsl_permutation_free(pmt);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
 // Make sure that both y and X are centered already.
-void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y,
-		     vector<pair<size_t, double> > &pos_loglr) {
-	double yty, xty, xtx, log_lr;
-	gsl_blas_ddot(y, y, &yty);
+void MatrixCalcLmLR(const gsl_matrix *X, const gsl_vector *y,
+                    vector<pair<size_t, double>> &pos_loglr) {
+  double yty, xty, xtx, log_lr;
+  gsl_blas_ddot(y, y, &yty);
 
-	for (size_t i=0; i<X->size2; ++i) {
-	  gsl_vector_const_view X_col=gsl_matrix_const_column (X, i);
-	  gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx);
-	  gsl_blas_ddot(&X_col.vector, y, &xty);
+  for (size_t i = 0; i < X->size2; ++i) {
+    gsl_vector_const_view X_col = gsl_matrix_const_column(X, i);
+    gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx);
+    gsl_blas_ddot(&X_col.vector, y, &xty);
 
-	  log_lr=0.5*(double)y->size*(log(yty)-log(yty-xty*xty/xtx));
-	  pos_loglr.push_back(make_pair(i,log_lr) );
-	}
+    log_lr = 0.5 * (double)y->size * (log(yty) - log(yty - xty * xty / xtx));
+    pos_loglr.push_back(make_pair(i, log_lr));
+  }
 
-	return;
+  return;
 }
diff --git a/src/lm.h b/src/lm.h
index cf428f0..cb22d3b 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -19,61 +19,61 @@
 #ifndef __LM_H__
 #define __LM_H__
 
-#include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
 #include "io.h"
+#include "param.h"
 
 using namespace std;
 
 class LM {
 
 public:
-	// IO-related parameters.
-	int a_mode;	// Analysis mode: 50+1/2/3/4 for Frequentist tests.
-	size_t d_pace;	// Display pace.
-
-	string file_bfile;
-	string file_geno;
-	string file_oxford;
-	string file_out;
-	string path_out;
-
-	string file_gene;
-
-	// Summary statistics.
-	size_t ni_total, ni_test;  // Number of individuals.
-	size_t ns_total, ns_test;  // Number of SNPs.
-	size_t ng_total, ng_test;  // Number of genes.
-	size_t n_cvt;
-	double time_opt;	   // Time spent.
-
-        // Indicator for individuals (phenotypes): 0 missing, 1
-        // available for analysis.
-	vector<int> indicator_idv;
-
-        // Sequence indicator for SNPs: 0 ignored because of (a) maf,
-        // (b) miss, (c) non-poly; 1 available for analysis.
-	vector<int> indicator_snp;
-
-	vector<SNPINFO> snpInfo;  // Record SNP information.
-
-	// Not included in PARAM.
-	vector<SUMSTAT> sumStat;  // Output SNPSummary Data.
-
-	// Main functions.
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
-	void AnalyzeGene (const gsl_matrix *W, const gsl_vector *x);
-	void AnalyzePlink (const gsl_matrix *W, const gsl_vector *y);
-	void AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y);
-	// WJA added.
-	void Analyzebgen (const gsl_matrix *W, const gsl_vector *y);
-
-	void WriteFiles ();
+  // IO-related parameters.
+  int a_mode;    // Analysis mode: 50+1/2/3/4 for Frequentist tests.
+  size_t d_pace; // Display pace.
+
+  string file_bfile;
+  string file_geno;
+  string file_oxford;
+  string file_out;
+  string path_out;
+
+  string file_gene;
+
+  // Summary statistics.
+  size_t ni_total, ni_test; // Number of individuals.
+  size_t ns_total, ns_test; // Number of SNPs.
+  size_t ng_total, ng_test; // Number of genes.
+  size_t n_cvt;
+  double time_opt; // Time spent.
+
+  // Indicator for individuals (phenotypes): 0 missing, 1
+  // available for analysis.
+  vector<int> indicator_idv;
+
+  // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+  // (b) miss, (c) non-poly; 1 available for analysis.
+  vector<int> indicator_snp;
+
+  vector<SNPINFO> snpInfo; // Record SNP information.
+
+  // Not included in PARAM.
+  vector<SUMSTAT> sumStat; // Output SNPSummary Data.
+
+  // Main functions.
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
+  void AnalyzeGene(const gsl_matrix *W, const gsl_vector *x);
+  void AnalyzePlink(const gsl_matrix *W, const gsl_vector *y);
+  void AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y);
+  // WJA added.
+  void Analyzebgen(const gsl_matrix *W, const gsl_vector *y);
+
+  void WriteFiles();
 };
 
-void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y,
-		     vector<pair<size_t, double> > &pos_loglr);
+void MatrixCalcLmLR(const gsl_matrix *X, const gsl_vector *y,
+                    vector<pair<size_t, double>> &pos_loglr);
 
 #endif
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 2b5ca84..3f51073 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -16,2488 +16,2585 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-#include <iomanip>
+#include <assert.h>
+#include <bitset>
 #include <cmath>
+#include <cstring>
+#include <iomanip>
 #include <iostream>
-#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <bitset>
-#include <cstring>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
-#include "gsl/gsl_roots.h"
-#include "gsl/gsl_min.h"
 #include "gsl/gsl_integration.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_min.h"
+#include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
 
-#include "io.h"
 #include "eigenlib.h"
-#include "lapack.h"
 #include "gzstream.h"
+#include "io.h"
+#include "lapack.h"
 #include "lmm.h"
 
 using namespace std;
 
-void LMM::CopyFromParam (PARAM &cPar) {
-	a_mode=cPar.a_mode;
-	d_pace=cPar.d_pace;
+void LMM::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
+  d_pace = cPar.d_pace;
 
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
-	file_gene=cPar.file_gene;
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
+  file_gene = cPar.file_gene;
 
-	// WJA added.
-	file_oxford=cPar.file_oxford;
+  // WJA added.
+  file_oxford = cPar.file_oxford;
 
-	l_min=cPar.l_min;
-	l_max=cPar.l_max;
-	n_region=cPar.n_region;
-	l_mle_null=cPar.l_mle_null;
-	logl_mle_H0=cPar.logl_mle_H0;
+  l_min = cPar.l_min;
+  l_max = cPar.l_max;
+  n_region = cPar.n_region;
+  l_mle_null = cPar.l_mle_null;
+  logl_mle_H0 = cPar.logl_mle_H0;
 
-	time_UtX=0.0;
-	time_opt=0.0;
+  time_UtX = 0.0;
+  time_opt = 0.0;
 
-	ni_total=cPar.ni_total;
-	ns_total=cPar.ns_total;
-	ni_test=cPar.ni_test;
-	ns_test=cPar.ns_test;
-	n_cvt=cPar.n_cvt;
+  ni_total = cPar.ni_total;
+  ns_total = cPar.ns_total;
+  ni_test = cPar.ni_test;
+  ns_test = cPar.ns_test;
+  n_cvt = cPar.n_cvt;
 
-	ng_total=cPar.ng_total;
-	ng_test=0;
+  ng_total = cPar.ng_total;
+  ng_test = 0;
 
-	indicator_idv=cPar.indicator_idv;
-	indicator_snp=cPar.indicator_snp;
-	snpInfo=cPar.snpInfo;
+  indicator_idv = cPar.indicator_idv;
+  indicator_snp = cPar.indicator_snp;
+  snpInfo = cPar.snpInfo;
 
-	return;
+  return;
 }
 
-void LMM::CopyToParam (PARAM &cPar) {
-	cPar.time_UtX=time_UtX;
-	cPar.time_opt=time_opt;
+void LMM::CopyToParam(PARAM &cPar) {
+  cPar.time_UtX = time_UtX;
+  cPar.time_opt = time_opt;
 
-	cPar.ng_test=ng_test;
+  cPar.ng_test = ng_test;
 
-	return;
+  return;
 }
 
-void LMM::WriteFiles () {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".assoc.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	if (!file_gene.empty()) {
-		outfile<<"geneID"<<"\t";
-
-		if (a_mode==1) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<
-			  "\t"<<"p_wald"<<endl;
-		} else if (a_mode==2) {
-			outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl;
-		} else if (a_mode==3) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
-		} else if (a_mode==4) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<
-			  "\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<
-			  "\t"<<"p_score"<<endl;
-		} else {}
-
-		for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
-			outfile<<snpInfo[t].rs_number<<"\t";
-
-			if (a_mode==1) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<
-				  sumStat[t].lambda_remle<<"\t"<<
-				  sumStat[t].p_wald <<endl;
-			} else if (a_mode==2) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].lambda_mle<<"\t"<<
-				  sumStat[t].p_lrt<<endl;
-			} else if (a_mode==3) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].p_score<<endl;
-			} else if (a_mode==4) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<
-				  sumStat[t].lambda_remle<<"\t"<<
-				  sumStat[t].lambda_mle<<"\t"<<
-				  sumStat[t].p_wald <<"\t"<<
-				  sumStat[t].p_lrt<<"\t"<<
-				  sumStat[t].p_score<<endl;
-			} else {}
-		}
-	}  else {
-		outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"
-		       <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
-		if (a_mode==1) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"
-			       <<"p_wald"<<endl;
-		} else if (a_mode==2) {
-			outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl;
-		} else if (a_mode==3) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
-		} else if (a_mode==4) {
-			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"
-			       <<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<
-			  "\t"<<"p_score"<<endl;
-		} else {}
-
-		size_t t=0;
-		for (size_t i=0; i<snpInfo.size(); ++i) {
-			if (indicator_snp[i]==0) {continue;}
-
-			outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<
-			  "\t"<<snpInfo[i].base_position<<"\t"<<
-			  snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<
-			  snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<
-			  snpInfo[i].maf<<"\t";
-
-			if (a_mode==1) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].lambda_remle<<"\t"<<
-				  sumStat[t].p_wald <<endl;
-			} else if (a_mode==2) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].lambda_mle<<"\t"<<
-				  sumStat[t].p_lrt<<endl;
-			} else if (a_mode==3) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].p_score<<endl;
-			} else if (a_mode==4) {
-				outfile<<scientific<<setprecision(6)<<
-				  sumStat[t].beta<<"\t"<<sumStat[t].se<<
-				  "\t"<<sumStat[t].lambda_remle<<"\t"<<
-				  sumStat[t].lambda_mle<<"\t"<<
-				  sumStat[t].p_wald <<"\t"<<
-				  sumStat[t].p_lrt<<"\t"<<
-				  sumStat[t].p_score<<endl;
-			} else {}
-			t++;
-		}
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+void LMM::WriteFiles() {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".assoc.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  if (!file_gene.empty()) {
+    outfile << "geneID"
+            << "\t";
+
+    if (a_mode == 1) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "l_remle"
+              << "\t"
+              << "p_wald" << endl;
+    } else if (a_mode == 2) {
+      outfile << "l_mle"
+              << "\t"
+              << "p_lrt" << endl;
+    } else if (a_mode == 3) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "p_score" << endl;
+    } else if (a_mode == 4) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "l_remle"
+              << "\t"
+              << "l_mle"
+              << "\t"
+              << "p_wald"
+              << "\t"
+              << "p_lrt"
+              << "\t"
+              << "p_score" << endl;
+    } else {
+    }
+
+    for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) {
+      outfile << snpInfo[t].rs_number << "\t";
+
+      if (a_mode == 1) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
+                << sumStat[t].p_wald << endl;
+      } else if (a_mode == 2) {
+        outfile << scientific << setprecision(6) << sumStat[t].lambda_mle
+                << "\t" << sumStat[t].p_lrt << endl;
+      } else if (a_mode == 3) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
+      } else if (a_mode == 4) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
+                << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t"
+                << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
+      } else {
+      }
+    }
+  } else {
+    outfile << "chr"
+            << "\t"
+            << "rs"
+            << "\t"
+            << "ps"
+            << "\t"
+            << "n_miss"
+            << "\t"
+            << "allele1"
+            << "\t"
+            << "allele0"
+            << "\t"
+            << "af"
+            << "\t";
+
+    if (a_mode == 1) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "l_remle"
+              << "\t"
+              << "p_wald" << endl;
+    } else if (a_mode == 2) {
+      outfile << "l_mle"
+              << "\t"
+              << "p_lrt" << endl;
+    } else if (a_mode == 3) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "p_score" << endl;
+    } else if (a_mode == 4) {
+      outfile << "beta"
+              << "\t"
+              << "se"
+              << "\t"
+              << "l_remle"
+              << "\t"
+              << "l_mle"
+              << "\t"
+              << "p_wald"
+              << "\t"
+              << "p_lrt"
+              << "\t"
+              << "p_score" << endl;
+    } else {
+    }
+
+    size_t t = 0;
+    for (size_t i = 0; i < snpInfo.size(); ++i) {
+      if (indicator_snp[i] == 0) {
+        continue;
+      }
+
+      outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+              << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
+              << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t"
+              << fixed << setprecision(3) << snpInfo[i].maf << "\t";
+
+      if (a_mode == 1) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
+                << sumStat[t].p_wald << endl;
+      } else if (a_mode == 2) {
+        outfile << scientific << setprecision(6) << sumStat[t].lambda_mle
+                << "\t" << sumStat[t].p_lrt << endl;
+      } else if (a_mode == 3) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
+      } else if (a_mode == 4) {
+        outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+                << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
+                << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t"
+                << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
+      } else {
+      }
+      t++;
+    }
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
-void CalcPab (const size_t n_cvt, const size_t e_mode,
-	      const gsl_vector *Hi_eval, const gsl_matrix *Uab,
-	      const gsl_vector *ab, gsl_matrix *Pab) {
-	size_t index_ab, index_aw, index_bw, index_ww;
-	double p_ab;
-	double ps_ab, ps_aw, ps_bw, ps_ww;
-
-	for (size_t p=0; p<=n_cvt+1; ++p) {
-		for (size_t a=p+1; a<=n_cvt+2; ++a) {
-			for (size_t b=a; b<=n_cvt+2; ++b) {
-				index_ab=GetabIndex (a, b, n_cvt);
-				if (p==0) {
-				  gsl_vector_const_view Uab_col=
-				    gsl_matrix_const_column (Uab, index_ab);
-				  gsl_blas_ddot(Hi_eval,&Uab_col.vector,&p_ab);
-				  if (e_mode!=0) {
-				    p_ab=gsl_vector_get (ab, index_ab)-p_ab;
-				  }
-				  gsl_matrix_set (Pab, 0, index_ab, p_ab);
-				}
-				else {
-				  index_aw=GetabIndex (a, p, n_cvt);
-				  index_bw=GetabIndex (b, p, n_cvt);
-				  index_ww=GetabIndex (p, p, n_cvt);
-
-				  ps_ab=gsl_matrix_get (Pab, p-1, index_ab);
-				  ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
-				  ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
-				  ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-
-				  p_ab=ps_ab-ps_aw*ps_bw/ps_ww;
-				  gsl_matrix_set (Pab, p, index_ab, p_ab);
-				}
-			}
-		}
-	}
-	return;
+void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval,
+             const gsl_matrix *Uab, const gsl_vector *ab, gsl_matrix *Pab) {
+  size_t index_ab, index_aw, index_bw, index_ww;
+  double p_ab;
+  double ps_ab, ps_aw, ps_bw, ps_ww;
+
+  for (size_t p = 0; p <= n_cvt + 1; ++p) {
+    for (size_t a = p + 1; a <= n_cvt + 2; ++a) {
+      for (size_t b = a; b <= n_cvt + 2; ++b) {
+        index_ab = GetabIndex(a, b, n_cvt);
+        if (p == 0) {
+          gsl_vector_const_view Uab_col =
+              gsl_matrix_const_column(Uab, index_ab);
+          gsl_blas_ddot(Hi_eval, &Uab_col.vector, &p_ab);
+          if (e_mode != 0) {
+            p_ab = gsl_vector_get(ab, index_ab) - p_ab;
+          }
+          gsl_matrix_set(Pab, 0, index_ab, p_ab);
+        } else {
+          index_aw = GetabIndex(a, p, n_cvt);
+          index_bw = GetabIndex(b, p, n_cvt);
+          index_ww = GetabIndex(p, p, n_cvt);
+
+          ps_ab = gsl_matrix_get(Pab, p - 1, index_ab);
+          ps_aw = gsl_matrix_get(Pab, p - 1, index_aw);
+          ps_bw = gsl_matrix_get(Pab, p - 1, index_bw);
+          ps_ww = gsl_matrix_get(Pab, p - 1, index_ww);
+
+          p_ab = ps_ab - ps_aw * ps_bw / ps_ww;
+          gsl_matrix_set(Pab, p, index_ab, p_ab);
+        }
+      }
+    }
+  }
+  return;
 }
 
-void CalcPPab (const size_t n_cvt, const size_t e_mode,
-	       const gsl_vector *HiHi_eval, const gsl_matrix *Uab,
-	       const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) {
-	size_t index_ab, index_aw, index_bw, index_ww;
-	double p2_ab;
-	double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww;
-
-	for (size_t p=0; p<=n_cvt+1; ++p) {
-		for (size_t a=p+1; a<=n_cvt+2; ++a) {
-			for (size_t b=a; b<=n_cvt+2; ++b) {
-				index_ab=GetabIndex (a, b, n_cvt);
-				if (p==0) {
-				  gsl_vector_const_view Uab_col=
-				    gsl_matrix_const_column (Uab, index_ab);
-				  gsl_blas_ddot (HiHi_eval, &Uab_col.vector,
-						 &p2_ab);
-				  if (e_mode!=0) {
-				    p2_ab=p2_ab-gsl_vector_get(ab,index_ab) +
-				      2.0*gsl_matrix_get (Pab, 0, index_ab);
-				  }
-				  gsl_matrix_set (PPab, 0, index_ab, p2_ab);
-				}
-				else {
-				  index_aw=GetabIndex (a, p, n_cvt);
-				  index_bw=GetabIndex (b, p, n_cvt);
-				  index_ww=GetabIndex (p, p, n_cvt);
-
-				  ps2_ab=gsl_matrix_get (PPab, p-1, index_ab);
-				  ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
-				  ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
-				  ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-				  ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
-				  ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
-				  ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
-
-				  p2_ab=ps2_ab+ps_aw*ps_bw*
-				    ps2_ww/(ps_ww*ps_ww);
-				  p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww;
-				  gsl_matrix_set (PPab, p, index_ab, p2_ab);
-				}
-			}
-		}
-	}
-	return;
+void CalcPPab(const size_t n_cvt, const size_t e_mode,
+              const gsl_vector *HiHi_eval, const gsl_matrix *Uab,
+              const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) {
+  size_t index_ab, index_aw, index_bw, index_ww;
+  double p2_ab;
+  double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww;
+
+  for (size_t p = 0; p <= n_cvt + 1; ++p) {
+    for (size_t a = p + 1; a <= n_cvt + 2; ++a) {
+      for (size_t b = a; b <= n_cvt + 2; ++b) {
+        index_ab = GetabIndex(a, b, n_cvt);
+        if (p == 0) {
+          gsl_vector_const_view Uab_col =
+              gsl_matrix_const_column(Uab, index_ab);
+          gsl_blas_ddot(HiHi_eval, &Uab_col.vector, &p2_ab);
+          if (e_mode != 0) {
+            p2_ab = p2_ab - gsl_vector_get(ab, index_ab) +
+                    2.0 * gsl_matrix_get(Pab, 0, index_ab);
+          }
+          gsl_matrix_set(PPab, 0, index_ab, p2_ab);
+        } else {
+          index_aw = GetabIndex(a, p, n_cvt);
+          index_bw = GetabIndex(b, p, n_cvt);
+          index_ww = GetabIndex(p, p, n_cvt);
+
+          ps2_ab = gsl_matrix_get(PPab, p - 1, index_ab);
+          ps_aw = gsl_matrix_get(Pab, p - 1, index_aw);
+          ps_bw = gsl_matrix_get(Pab, p - 1, index_bw);
+          ps_ww = gsl_matrix_get(Pab, p - 1, index_ww);
+          ps2_aw = gsl_matrix_get(PPab, p - 1, index_aw);
+          ps2_bw = gsl_matrix_get(PPab, p - 1, index_bw);
+          ps2_ww = gsl_matrix_get(PPab, p - 1, index_ww);
+
+          p2_ab = ps2_ab + ps_aw * ps_bw * ps2_ww / (ps_ww * ps_ww);
+          p2_ab -= (ps_aw * ps2_bw + ps_bw * ps2_aw) / ps_ww;
+          gsl_matrix_set(PPab, p, index_ab, p2_ab);
+        }
+      }
+    }
+  }
+  return;
 }
 
-void CalcPPPab (const size_t n_cvt, const size_t e_mode,
-		const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab,
-		const gsl_vector *ab, const gsl_matrix *Pab,
-		const gsl_matrix *PPab, gsl_matrix *PPPab) {
-	size_t index_ab, index_aw, index_bw, index_ww;
-	double p3_ab;
-	double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww,
-	  ps3_aw, ps3_bw, ps3_ww;
-
-	for (size_t p=0; p<=n_cvt+1; ++p) {
-		for (size_t a=p+1; a<=n_cvt+2; ++a) {
-			for (size_t b=a; b<=n_cvt+2; ++b) {
-				index_ab=GetabIndex (a, b, n_cvt);
-				if (p==0) {
-				  gsl_vector_const_view Uab_col=
-				    gsl_matrix_const_column (Uab, index_ab);
-				  gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector,
-						 &p3_ab);
-				  if (e_mode!=0) {
-				    p3_ab=gsl_vector_get (ab, index_ab)-
-				      p3_ab+3.0*gsl_matrix_get(PPab,0,index_ab)
-				      -3.0*gsl_matrix_get (Pab, 0, index_ab);
-				  }
-				  gsl_matrix_set (PPPab, 0, index_ab, p3_ab);
-				}
-				else {
-				  index_aw=GetabIndex (a, p, n_cvt);
-				  index_bw=GetabIndex (b, p, n_cvt);
-				  index_ww=GetabIndex (p, p, n_cvt);
-
-				  ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab);
-				  ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
-				  ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
-				  ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-				  ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
-				  ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
-				  ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
-				  ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw);
-				  ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw);
-				  ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww);
-
-				  p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww
-				    /(ps_ww*ps_ww*ps_ww);
-				  p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw +
-					  ps2_aw*ps2_bw)/ps_ww;
-				  p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*
-					  ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/
-				    (ps_ww*ps_ww);
-
-				  gsl_matrix_set (PPPab, p, index_ab, p3_ab);
-				}
-			}
-		}
-	}
-	return;
+void CalcPPPab(const size_t n_cvt, const size_t e_mode,
+               const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab,
+               const gsl_vector *ab, const gsl_matrix *Pab,
+               const gsl_matrix *PPab, gsl_matrix *PPPab) {
+  size_t index_ab, index_aw, index_bw, index_ww;
+  double p3_ab;
+  double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw,
+      ps3_ww;
+
+  for (size_t p = 0; p <= n_cvt + 1; ++p) {
+    for (size_t a = p + 1; a <= n_cvt + 2; ++a) {
+      for (size_t b = a; b <= n_cvt + 2; ++b) {
+        index_ab = GetabIndex(a, b, n_cvt);
+        if (p == 0) {
+          gsl_vector_const_view Uab_col =
+              gsl_matrix_const_column(Uab, index_ab);
+          gsl_blas_ddot(HiHiHi_eval, &Uab_col.vector, &p3_ab);
+          if (e_mode != 0) {
+            p3_ab = gsl_vector_get(ab, index_ab) - p3_ab +
+                    3.0 * gsl_matrix_get(PPab, 0, index_ab) -
+                    3.0 * gsl_matrix_get(Pab, 0, index_ab);
+          }
+          gsl_matrix_set(PPPab, 0, index_ab, p3_ab);
+        } else {
+          index_aw = GetabIndex(a, p, n_cvt);
+          index_bw = GetabIndex(b, p, n_cvt);
+          index_ww = GetabIndex(p, p, n_cvt);
+
+          ps3_ab = gsl_matrix_get(PPPab, p - 1, index_ab);
+          ps_aw = gsl_matrix_get(Pab, p - 1, index_aw);
+          ps_bw = gsl_matrix_get(Pab, p - 1, index_bw);
+          ps_ww = gsl_matrix_get(Pab, p - 1, index_ww);
+          ps2_aw = gsl_matrix_get(PPab, p - 1, index_aw);
+          ps2_bw = gsl_matrix_get(PPab, p - 1, index_bw);
+          ps2_ww = gsl_matrix_get(PPab, p - 1, index_ww);
+          ps3_aw = gsl_matrix_get(PPPab, p - 1, index_aw);
+          ps3_bw = gsl_matrix_get(PPPab, p - 1, index_bw);
+          ps3_ww = gsl_matrix_get(PPPab, p - 1, index_ww);
+
+          p3_ab = ps3_ab -
+                  ps_aw * ps_bw * ps2_ww * ps2_ww / (ps_ww * ps_ww * ps_ww);
+          p3_ab -= (ps_aw * ps3_bw + ps_bw * ps3_aw + ps2_aw * ps2_bw) / ps_ww;
+          p3_ab += (ps_aw * ps2_bw * ps2_ww + ps_bw * ps2_aw * ps2_ww +
+                    ps_aw * ps_bw * ps3_ww) /
+                   (ps_ww * ps_ww);
+
+          gsl_matrix_set(PPPab, p, index_ab, p3_ab);
+        }
+      }
+    }
+  }
+  return;
 }
 
-double LogL_f (double l, void *params) {
-	FUNC_PARAM *p=(FUNC_PARAM *) params;
-	size_t n_cvt=p->n_cvt;
-	size_t ni_test=p->ni_test;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	size_t nc_total;
-	if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
-	double f=0.0, logdet_h=0.0, d;
-	size_t index_yy;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
-	gsl_vector_memcpy (v_temp, p->eval);
-	gsl_vector_scale (v_temp, l);
-	if (p->e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	for (size_t i=0; i<(p->eval)->size; ++i) {
-		d=gsl_vector_get (v_temp, i);
-		logdet_h+=log(fabs(d));
-	}
-
-	CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-
-	double c=0.5*(double)ni_test*(log((double)ni_test)-log(2*M_PI)-1.0);
-
-	index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
-	f=c-0.5*logdet_h-0.5*(double)ni_test*log(P_yy);
-
-	gsl_matrix_free (Pab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (v_temp);
-	return f;
+double LogL_f(double l, void *params) {
+  FUNC_PARAM *p = (FUNC_PARAM *)params;
+  size_t n_cvt = p->n_cvt;
+  size_t ni_test = p->ni_test;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  size_t nc_total;
+  if (p->calc_null == true) {
+    nc_total = n_cvt;
+  } else {
+    nc_total = n_cvt + 1;
+  }
+
+  double f = 0.0, logdet_h = 0.0, d;
+  size_t index_yy;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_scale(v_temp, l);
+  if (p->e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  for (size_t i = 0; i < (p->eval)->size; ++i) {
+    d = gsl_vector_get(v_temp, i);
+    logdet_h += log(fabs(d));
+  }
+
+  CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+
+  double c =
+      0.5 * (double)ni_test * (log((double)ni_test) - log(2 * M_PI) - 1.0);
+
+  index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
+  f = c - 0.5 * logdet_h - 0.5 * (double)ni_test * log(P_yy);
+
+  gsl_matrix_free(Pab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(v_temp);
+  return f;
 }
 
-double LogL_dev1 (double l, void *params) {
-	FUNC_PARAM *p=(FUNC_PARAM *) params;
-	size_t n_cvt=p->n_cvt;
-	size_t ni_test=p->ni_test;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
+double LogL_dev1(double l, void *params) {
+  FUNC_PARAM *p = (FUNC_PARAM *)params;
+  size_t n_cvt = p->n_cvt;
+  size_t ni_test = p->ni_test;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  size_t nc_total;
+  if (p->calc_null == true) {
+    nc_total = n_cvt;
+  } else {
+    nc_total = n_cvt + 1;
+  }
+
+  double dev1 = 0.0, trace_Hi = 0.0;
+  size_t index_yy;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_scale(v_temp, l);
+  if (p->e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_mul(HiHi_eval, Hi_eval);
+
+  gsl_vector_set_all(v_temp, 1.0);
+  gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+
+  if (p->e_mode != 0) {
+    trace_Hi = (double)ni_test - trace_Hi;
+  }
+
+  CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+  CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+
+  double trace_HiK = ((double)ni_test - trace_Hi) / l;
+
+  index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+
+  double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
+  double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy);
+  double yPKPy = (P_yy - PP_yy) / l;
+  dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy;
+
+  gsl_matrix_free(Pab);
+  gsl_matrix_free(PPab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(HiHi_eval);
+  gsl_vector_free(v_temp);
+
+  return dev1;
+}
 
-	size_t nc_total;
-	if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
+double LogL_dev2(double l, void *params) {
+  FUNC_PARAM *p = (FUNC_PARAM *)params;
+  size_t n_cvt = p->n_cvt;
+  size_t ni_test = p->ni_test;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  size_t nc_total;
+  if (p->calc_null == true) {
+    nc_total = n_cvt;
+  } else {
+    nc_total = n_cvt + 1;
+  }
+
+  double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
+  size_t index_yy;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_scale(v_temp, l);
+  if (p->e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_mul(HiHi_eval, Hi_eval);
+  gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+  gsl_vector_mul(HiHiHi_eval, Hi_eval);
+
+  gsl_vector_set_all(v_temp, 1.0);
+  gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+  gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
+
+  if (p->e_mode != 0) {
+    trace_Hi = (double)ni_test - trace_Hi;
+    trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
+  }
+
+  CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+  CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+  CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
+  double trace_HiKHiK = ((double)ni_test + trace_HiHi - 2 * trace_Hi) / (l * l);
+
+  index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
+  double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy);
+  double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_yy);
+
+  double yPKPy = (P_yy - PP_yy) / l;
+  double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
+
+  dev2 = 0.5 * trace_HiKHiK -
+         0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) /
+             (P_yy * P_yy);
+
+  gsl_matrix_free(Pab);
+  gsl_matrix_free(PPab);
+  gsl_matrix_free(PPPab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(HiHi_eval);
+  gsl_vector_free(HiHiHi_eval);
+  gsl_vector_free(v_temp);
+
+  return dev2;
+}
 
-	double dev1=0.0, trace_Hi=0.0;
-	size_t index_yy;
+void LogL_dev12(double l, void *params, double *dev1, double *dev2) {
+  FUNC_PARAM *p = (FUNC_PARAM *)params;
+  size_t n_cvt = p->n_cvt;
+  size_t ni_test = p->ni_test;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  size_t nc_total;
+  if (p->calc_null == true) {
+    nc_total = n_cvt;
+  } else {
+    nc_total = n_cvt + 1;
+  }
+
+  double trace_Hi = 0.0, trace_HiHi = 0.0;
+  size_t index_yy;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_scale(v_temp, l);
+  if (p->e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_mul(HiHi_eval, Hi_eval);
+  gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+  gsl_vector_mul(HiHiHi_eval, Hi_eval);
+
+  gsl_vector_set_all(v_temp, 1.0);
+  gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+  gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
+
+  if (p->e_mode != 0) {
+    trace_Hi = (double)ni_test - trace_Hi;
+    trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
+  }
+
+  CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+  CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+  CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
+  double trace_HiK = ((double)ni_test - trace_Hi) / l;
+  double trace_HiKHiK = ((double)ni_test + trace_HiHi - 2 * trace_Hi) / (l * l);
+
+  index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+
+  double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
+  double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy);
+  double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_yy);
+
+  double yPKPy = (P_yy - PP_yy) / l;
+  double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
+
+  *dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy;
+  *dev2 = 0.5 * trace_HiKHiK -
+          0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) /
+              (P_yy * P_yy);
+
+  gsl_matrix_free(Pab);
+  gsl_matrix_free(PPab);
+  gsl_matrix_free(PPPab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(HiHi_eval);
+  gsl_vector_free(HiHiHi_eval);
+  gsl_vector_free(v_temp);
+
+  return;
+}
 
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
+double LogRL_f(double l, void *params) {
+  FUNC_PARAM *p = (FUNC_PARAM *)params;
+  size_t n_cvt = p->n_cvt;
+  size_t ni_test = p->ni_test;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  double df;
+  size_t nc_total;
+  if (p->calc_null == true) {
+    nc_total = n_cvt;
+    df = (double)ni_test - (double)n_cvt;
+  } else {
+    nc_total = n_cvt + 1;
+    df = (double)ni_test - (double)n_cvt - 1.0;
+  }
+
+  double f = 0.0, logdet_h = 0.0, logdet_hiw = 0.0, d;
+  size_t index_ww;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *Iab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_scale(v_temp, l);
+  if (p->e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  for (size_t i = 0; i < (p->eval)->size; ++i) {
+    d = gsl_vector_get(v_temp, i);
+    logdet_h += log(fabs(d));
+  }
+
+  CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+  gsl_vector_set_all(v_temp, 1.0);
+  CalcPab(n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
+
+  // Calculate |WHiW|-|WW|.
+  logdet_hiw = 0.0;
+  for (size_t i = 0; i < nc_total; ++i) {
+    index_ww = GetabIndex(i + 1, i + 1, n_cvt);
+    d = gsl_matrix_get(Pab, i, index_ww);
+    logdet_hiw += log(d);
+    d = gsl_matrix_get(Iab, i, index_ww);
+    logdet_hiw -= log(d);
+  }
+  index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, nc_total, index_ww);
+
+  double c = 0.5 * df * (log(df) - log(2 * M_PI) - 1.0);
+  f = c - 0.5 * logdet_h - 0.5 * logdet_hiw - 0.5 * df * log(P_yy);
+
+  gsl_matrix_free(Pab);
+  gsl_matrix_free(Iab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(v_temp);
+  return f;
+}
 
-	gsl_vector_memcpy (v_temp, p->eval);
-	gsl_vector_scale (v_temp, l);
-	if (p->e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
+double LogRL_dev1(double l, void *params) {
+  FUNC_PARAM *p = (FUNC_PARAM *)params;
+  size_t n_cvt = p->n_cvt;
+  size_t ni_test = p->ni_test;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  double df;
+  size_t nc_total;
+  if (p->calc_null == true) {
+    nc_total = n_cvt;
+    df = (double)ni_test - (double)n_cvt;
+  } else {
+    nc_total = n_cvt + 1;
+    df = (double)ni_test - (double)n_cvt - 1.0;
+  }
+
+  double dev1 = 0.0, trace_Hi = 0.0;
+  size_t index_ww;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_scale(v_temp, l);
+  if (p->e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_mul(HiHi_eval, Hi_eval);
+
+  gsl_vector_set_all(v_temp, 1.0);
+  gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+
+  if (p->e_mode != 0) {
+    trace_Hi = (double)ni_test - trace_Hi;
+  }
+
+  CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+  CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+
+  // Calculate tracePK and trace PKPK.
+  double trace_P = trace_Hi;
+  double ps_ww, ps2_ww;
+  for (size_t i = 0; i < nc_total; ++i) {
+    index_ww = GetabIndex(i + 1, i + 1, n_cvt);
+    ps_ww = gsl_matrix_get(Pab, i, index_ww);
+    ps2_ww = gsl_matrix_get(PPab, i, index_ww);
+    trace_P -= ps2_ww / ps_ww;
+  }
+  double trace_PK = (df - trace_P) / l;
+
+  // Calculate yPKPy, yPKPKPy.
+  index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, nc_total, index_ww);
+  double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww);
+  double yPKPy = (P_yy - PP_yy) / l;
+
+  dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy;
+
+  gsl_matrix_free(Pab);
+  gsl_matrix_free(PPab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(HiHi_eval);
+  gsl_vector_free(v_temp);
+
+  return dev1;
+}
 
-	gsl_vector_memcpy (HiHi_eval, Hi_eval);
-	gsl_vector_mul (HiHi_eval, Hi_eval);
+double LogRL_dev2(double l, void *params) {
+  FUNC_PARAM *p = (FUNC_PARAM *)params;
+  size_t n_cvt = p->n_cvt;
+  size_t ni_test = p->ni_test;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  double df;
+  size_t nc_total;
+  if (p->calc_null == true) {
+    nc_total = n_cvt;
+    df = (double)ni_test - (double)n_cvt;
+  } else {
+    nc_total = n_cvt + 1;
+    df = (double)ni_test - (double)n_cvt - 1.0;
+  }
+
+  double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
+  size_t index_ww;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_scale(v_temp, l);
+  if (p->e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_mul(HiHi_eval, Hi_eval);
+  gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+  gsl_vector_mul(HiHiHi_eval, Hi_eval);
+
+  gsl_vector_set_all(v_temp, 1.0);
+  gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+  gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
+
+  if (p->e_mode != 0) {
+    trace_Hi = (double)ni_test - trace_Hi;
+    trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
+  }
+
+  CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+  CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+  CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
+  // Calculate tracePK and trace PKPK.
+  double trace_P = trace_Hi, trace_PP = trace_HiHi;
+  double ps_ww, ps2_ww, ps3_ww;
+  for (size_t i = 0; i < nc_total; ++i) {
+    index_ww = GetabIndex(i + 1, i + 1, n_cvt);
+    ps_ww = gsl_matrix_get(Pab, i, index_ww);
+    ps2_ww = gsl_matrix_get(PPab, i, index_ww);
+    ps3_ww = gsl_matrix_get(PPPab, i, index_ww);
+    trace_P -= ps2_ww / ps_ww;
+    trace_PP += ps2_ww * ps2_ww / (ps_ww * ps_ww) - 2.0 * ps3_ww / ps_ww;
+  }
+  double trace_PKPK = (df + trace_PP - 2.0 * trace_P) / (l * l);
+
+  // Calculate yPKPy, yPKPKPy.
+  index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, nc_total, index_ww);
+  double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww);
+  double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_ww);
+  double yPKPy = (P_yy - PP_yy) / l;
+  double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
+
+  dev2 = 0.5 * trace_PKPK -
+         0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy);
+
+  gsl_matrix_free(Pab);
+  gsl_matrix_free(PPab);
+  gsl_matrix_free(PPPab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(HiHi_eval);
+  gsl_vector_free(HiHiHi_eval);
+  gsl_vector_free(v_temp);
+
+  return dev2;
+}
 
-	gsl_vector_set_all (v_temp, 1.0);
-	gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
+void LogRL_dev12(double l, void *params, double *dev1, double *dev2) {
+  FUNC_PARAM *p = (FUNC_PARAM *)params;
+  size_t n_cvt = p->n_cvt;
+  size_t ni_test = p->ni_test;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  double df;
+  size_t nc_total;
+  if (p->calc_null == true) {
+    nc_total = n_cvt;
+    df = (double)ni_test - (double)n_cvt;
+  } else {
+    nc_total = n_cvt + 1;
+    df = (double)ni_test - (double)n_cvt - 1.0;
+  }
+
+  double trace_Hi = 0.0, trace_HiHi = 0.0;
+  size_t index_ww;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_scale(v_temp, l);
+  if (p->e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_mul(HiHi_eval, Hi_eval);
+  gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+  gsl_vector_mul(HiHiHi_eval, Hi_eval);
+
+  gsl_vector_set_all(v_temp, 1.0);
+  gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+  gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
+
+  if (p->e_mode != 0) {
+    trace_Hi = (double)ni_test - trace_Hi;
+    trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
+  }
+
+  CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+  CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+  CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
+  // Calculate tracePK and trace PKPK.
+  double trace_P = trace_Hi, trace_PP = trace_HiHi;
+  double ps_ww, ps2_ww, ps3_ww;
+  for (size_t i = 0; i < nc_total; ++i) {
+    index_ww = GetabIndex(i + 1, i + 1, n_cvt);
+    ps_ww = gsl_matrix_get(Pab, i, index_ww);
+    ps2_ww = gsl_matrix_get(PPab, i, index_ww);
+    ps3_ww = gsl_matrix_get(PPPab, i, index_ww);
+    trace_P -= ps2_ww / ps_ww;
+    trace_PP += ps2_ww * ps2_ww / (ps_ww * ps_ww) - 2.0 * ps3_ww / ps_ww;
+  }
+  double trace_PK = (df - trace_P) / l;
+  double trace_PKPK = (df + trace_PP - 2.0 * trace_P) / (l * l);
+
+  // Calculate yPKPy, yPKPKPy.
+  index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, nc_total, index_ww);
+  double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww);
+  double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_ww);
+  double yPKPy = (P_yy - PP_yy) / l;
+  double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
+
+  *dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy;
+  *dev2 = 0.5 * trace_PKPK -
+          0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy);
+
+  gsl_matrix_free(Pab);
+  gsl_matrix_free(PPab);
+  gsl_matrix_free(PPPab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(HiHi_eval);
+  gsl_vector_free(HiHiHi_eval);
+  gsl_vector_free(v_temp);
+
+  return;
+}
 
-	if (p->e_mode!=0) {trace_Hi=(double)ni_test-trace_Hi;}
+void LMM::CalcRLWald(const double &l, const FUNC_PARAM &params, double &beta,
+                     double &se, double &p_wald) {
+  size_t n_cvt = params.n_cvt;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  int df = (int)ni_test - (int)n_cvt - 1;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
+  gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+
+  gsl_vector_memcpy(v_temp, params.eval);
+  gsl_vector_scale(v_temp, l);
+  if (params.e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  CalcPab(n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
+
+  size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  size_t index_xx = GetabIndex(n_cvt + 1, n_cvt + 1, n_cvt);
+  size_t index_xy = GetabIndex(n_cvt + 2, n_cvt + 1, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy);
+  double P_xx = gsl_matrix_get(Pab, n_cvt, index_xx);
+  double P_xy = gsl_matrix_get(Pab, n_cvt, index_xy);
+  double Px_yy = gsl_matrix_get(Pab, n_cvt + 1, index_yy);
+
+  beta = P_xy / P_xx;
+  double tau = (double)df / Px_yy;
+  se = sqrt(1.0 / (tau * P_xx));
+  p_wald = gsl_cdf_fdist_Q((P_yy - Px_yy) * tau, 1.0, df);
+
+  gsl_matrix_free(Pab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(v_temp);
+  return;
+}
 
-	CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-	CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+void LMM::CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
+                      double &se, double &p_score) {
+  size_t n_cvt = params.n_cvt;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  int df = (int)ni_test - (int)n_cvt - 1;
+
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
+  gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+
+  gsl_vector_memcpy(v_temp, params.eval);
+  gsl_vector_scale(v_temp, l);
+  if (params.e_mode == 0) {
+    gsl_vector_set_all(Hi_eval, 1.0);
+  } else {
+    gsl_vector_memcpy(Hi_eval, v_temp);
+  }
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  CalcPab(n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
+
+  size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  size_t index_xx = GetabIndex(n_cvt + 1, n_cvt + 1, n_cvt);
+  size_t index_xy = GetabIndex(n_cvt + 2, n_cvt + 1, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy);
+  double P_xx = gsl_matrix_get(Pab, n_cvt, index_xx);
+  double P_xy = gsl_matrix_get(Pab, n_cvt, index_xy);
+  double Px_yy = gsl_matrix_get(Pab, n_cvt + 1, index_yy);
+
+  beta = P_xy / P_xx;
+  double tau = (double)df / Px_yy;
+  se = sqrt(1.0 / (tau * P_xx));
+
+  p_score =
+      gsl_cdf_fdist_Q((double)ni_test * P_xy * P_xy / (P_yy * P_xx), 1.0, df);
+
+  gsl_matrix_free(Pab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(v_temp);
+  return;
+}
 
-	double trace_HiK=((double)ni_test-trace_Hi)/l;
+void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
+  size_t index_ab;
+  size_t n_cvt = UtW->size2;
 
-	index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+  gsl_vector *u_a = gsl_vector_alloc(Uty->size);
 
-	double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
-	double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
-	double yPKPy=(P_yy-PP_yy)/l;
-	dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
+  for (size_t a = 1; a <= n_cvt + 2; ++a) {
+    if (a == n_cvt + 1) {
+      continue;
+    }
 
-	gsl_matrix_free (Pab);
-	gsl_matrix_free (PPab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (HiHi_eval);
-	gsl_vector_free (v_temp);
+    if (a == n_cvt + 2) {
+      gsl_vector_memcpy(u_a, Uty);
+    } else {
+      gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, a - 1);
+      gsl_vector_memcpy(u_a, &UtW_col.vector);
+    }
 
-	return dev1;
-}
+    for (size_t b = a; b >= 1; --b) {
+      if (b == n_cvt + 1) {
+        continue;
+      }
 
-double LogL_dev2 (double l, void *params) {
-	FUNC_PARAM *p=(FUNC_PARAM *) params;
-	size_t n_cvt=p->n_cvt;
-	size_t ni_test=p->ni_test;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	size_t nc_total;
-	if (p->calc_null==true) {
-	  nc_total=n_cvt;
-	} else {
-	  nc_total=n_cvt+1;
-	}
-
-	double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
-	size_t index_yy;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
-	gsl_vector_memcpy (v_temp, p->eval);
-	gsl_vector_scale (v_temp, l);
-	if (p->e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	gsl_vector_memcpy (HiHi_eval, Hi_eval);
-	gsl_vector_mul (HiHi_eval, Hi_eval);
-	gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
-	gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
-	gsl_vector_set_all (v_temp, 1.0);
-	gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-	gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
-	if (p->e_mode!=0) {
-		trace_Hi=(double)ni_test-trace_Hi;
-		trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
-	}
-
-	CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-	CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-	CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab,
-		   PPPab);
-
-	double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
-
-	index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
-	double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
-	double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
-
-	double yPKPy=(P_yy-PP_yy)/l;
-	double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
-	dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*
-	  (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
-	gsl_matrix_free (Pab);
-	gsl_matrix_free (PPab);
-	gsl_matrix_free (PPPab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (HiHi_eval);
-	gsl_vector_free (HiHiHi_eval);
-	gsl_vector_free (v_temp);
-
-	return dev2;
-}
+      index_ab = GetabIndex(a, b, n_cvt);
+      gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab);
 
-void LogL_dev12 (double l, void *params, double *dev1, double *dev2) {
-	FUNC_PARAM *p=(FUNC_PARAM *) params;
-	size_t n_cvt=p->n_cvt;
-	size_t ni_test=p->ni_test;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	size_t nc_total;
-	if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
-	double trace_Hi=0.0, trace_HiHi=0.0;
-	size_t index_yy;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
-	gsl_vector_memcpy (v_temp, p->eval);
-	gsl_vector_scale (v_temp, l);
-	if (p->e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	gsl_vector_memcpy (HiHi_eval, Hi_eval);
-	gsl_vector_mul (HiHi_eval, Hi_eval);
-	gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
-	gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
-	gsl_vector_set_all (v_temp, 1.0);
-	gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-	gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
-	if (p->e_mode!=0) {
-		trace_Hi=(double)ni_test-trace_Hi;
-		trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
-	}
-
-	CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-	CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-	CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab,
-		   PPPab);
-
-	double trace_HiK=((double)ni_test-trace_Hi)/l;
-	double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
-
-	index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-
-	double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
-	double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
-	double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
-
-	double yPKPy=(P_yy-PP_yy)/l;
-	double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
-	*dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
-	*dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*
-	  (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
-	gsl_matrix_free (Pab);
-	gsl_matrix_free (PPab);
-	gsl_matrix_free (PPPab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (HiHi_eval);
-	gsl_vector_free (HiHiHi_eval);
-	gsl_vector_free (v_temp);
-
-	return;
-}
+      if (b == n_cvt + 2) {
+        gsl_vector_memcpy(&Uab_col.vector, Uty);
+      } else {
+        gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1);
+        gsl_vector_memcpy(&Uab_col.vector, &UtW_col.vector);
+      }
 
-double LogRL_f (double l, void *params) {
-	FUNC_PARAM *p=(FUNC_PARAM *) params;
-	size_t n_cvt=p->n_cvt;
-	size_t ni_test=p->ni_test;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	double df;
-	size_t nc_total;
-	if (p->calc_null==true) {
-	  nc_total=n_cvt; df=(double)ni_test-(double)n_cvt;
-	}
-	else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
-	double f=0.0, logdet_h=0.0, logdet_hiw=0.0, d;
-	size_t index_ww;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *Iab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
-	gsl_vector_memcpy (v_temp, p->eval);
-	gsl_vector_scale (v_temp, l);
-	if (p->e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	for (size_t i=0; i<(p->eval)->size; ++i) {
-		d=gsl_vector_get (v_temp, i);
-		logdet_h+=log(fabs(d));
-	}
-
-	CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-	gsl_vector_set_all (v_temp, 1.0);
-	CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
-
-	// Calculate |WHiW|-|WW|.
-	logdet_hiw=0.0;
-	for (size_t i=0; i<nc_total; ++i) {
-		index_ww=GetabIndex (i+1, i+1, n_cvt);
-		d=gsl_matrix_get (Pab, i, index_ww);
-		logdet_hiw+=log(d);
-		d=gsl_matrix_get (Iab, i, index_ww);
-		logdet_hiw-=log(d);
-	}
-	index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
-
-	double c=0.5*df*(log(df)-log(2*M_PI)-1.0);
-	f=c-0.5*logdet_h-0.5*logdet_hiw-0.5*df*log(P_yy);
-
-	gsl_matrix_free (Pab);
-	gsl_matrix_free (Iab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (v_temp);
-	return f;
-}
+      gsl_vector_mul(&Uab_col.vector, u_a);
+    }
+  }
 
-double LogRL_dev1 (double l, void *params) {
-	FUNC_PARAM *p=(FUNC_PARAM *) params;
-	size_t n_cvt=p->n_cvt;
-	size_t ni_test=p->ni_test;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	double df;
-	size_t nc_total;
-	if (p->calc_null==true) {
-	  nc_total=n_cvt;
-	  df=(double)ni_test-(double)n_cvt;
-	}
-	else {
-	  nc_total=n_cvt+1;
-	  df=(double)ni_test-(double)n_cvt-1.0;
-	}
-
-	double dev1=0.0, trace_Hi=0.0;
-	size_t index_ww;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
-	gsl_vector_memcpy (v_temp, p->eval);
-	gsl_vector_scale (v_temp, l);
-	if (p->e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	gsl_vector_memcpy (HiHi_eval, Hi_eval);
-	gsl_vector_mul (HiHi_eval, Hi_eval);
-
-	gsl_vector_set_all (v_temp, 1.0);
-	gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-
-	if (p->e_mode!=0) {
-		trace_Hi=(double)ni_test-trace_Hi;
-	}
-
-	CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-	CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-
-	// Calculate tracePK and trace PKPK.
-	double trace_P=trace_Hi;
-	double ps_ww, ps2_ww;
-	for (size_t i=0; i<nc_total; ++i) {
-		index_ww=GetabIndex (i+1, i+1, n_cvt);
-		ps_ww=gsl_matrix_get (Pab, i, index_ww);
-		ps2_ww=gsl_matrix_get (PPab, i, index_ww);
-		trace_P-=ps2_ww/ps_ww;
-	}
-	double trace_PK=(df-trace_P)/l;
-
-	// Calculate yPKPy, yPKPKPy.
-	index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
-	double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
-	double yPKPy=(P_yy-PP_yy)/l;
-
-	dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
-
-	gsl_matrix_free (Pab);
-	gsl_matrix_free (PPab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (HiHi_eval);
-	gsl_vector_free (v_temp);
-
-	return dev1;
+  gsl_vector_free(u_a);
+  return;
 }
 
-double LogRL_dev2 (double l, void *params) {
-	FUNC_PARAM *p=(FUNC_PARAM *) params;
-	size_t n_cvt=p->n_cvt;
-	size_t ni_test=p->ni_test;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	double df;
-	size_t nc_total;
-	if (p->calc_null==true) {
-	  nc_total=n_cvt;
-	  df=(double)ni_test-(double)n_cvt;
-	}
-	else {
-	  nc_total=n_cvt+1;
-	  df=(double)ni_test-(double)n_cvt-1.0;
-	}
-
-	double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
-	size_t index_ww;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
-	gsl_vector_memcpy (v_temp, p->eval);
-	gsl_vector_scale (v_temp, l);
-	if (p->e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	gsl_vector_memcpy (HiHi_eval, Hi_eval);
-	gsl_vector_mul (HiHi_eval, Hi_eval);
-	gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
-	gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
-	gsl_vector_set_all (v_temp, 1.0);
-	gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-	gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
-	if (p->e_mode!=0) {
-		trace_Hi=(double)ni_test-trace_Hi;
-		trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
-	}
-
-	CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-	CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-	CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab,
-		   PPab, PPPab);
-
-	// Calculate tracePK and trace PKPK.
-	double trace_P=trace_Hi, trace_PP=trace_HiHi;
-	double ps_ww, ps2_ww, ps3_ww;
-	for (size_t i=0; i<nc_total; ++i) {
-		index_ww=GetabIndex (i+1, i+1, n_cvt);
-		ps_ww=gsl_matrix_get (Pab, i, index_ww);
-		ps2_ww=gsl_matrix_get (PPab, i, index_ww);
-		ps3_ww=gsl_matrix_get (PPPab, i, index_ww);
-		trace_P-=ps2_ww/ps_ww;
-		trace_PP+=ps2_ww*ps2_ww/(ps_ww*ps_ww)-2.0*ps3_ww/ps_ww;
-	}
-	double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
-
-	// Calculate yPKPy, yPKPKPy.
-	index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
-	double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
-	double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
-	double yPKPy=(P_yy-PP_yy)/l;
-	double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
-	dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
-	gsl_matrix_free (Pab);
-	gsl_matrix_free (PPab);
-	gsl_matrix_free (PPPab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (HiHi_eval);
-	gsl_vector_free (HiHiHi_eval);
-	gsl_vector_free (v_temp);
-
-	return dev2;
-}
+void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty,
+             const gsl_vector *Utx, gsl_matrix *Uab) {
+  size_t index_ab;
+  size_t n_cvt = UtW->size2;
+
+  for (size_t b = 1; b <= n_cvt + 2; ++b) {
+    index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
+    gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab);
+
+    if (b == n_cvt + 2) {
+      gsl_vector_memcpy(&Uab_col.vector, Uty);
+    } else if (b == n_cvt + 1) {
+      gsl_vector_memcpy(&Uab_col.vector, Utx);
+    } else {
+      gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1);
+      gsl_vector_memcpy(&Uab_col.vector, &UtW_col.vector);
+    }
 
-void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) {
-	FUNC_PARAM *p=(FUNC_PARAM *) params;
-	size_t n_cvt=p->n_cvt;
-	size_t ni_test=p->ni_test;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	double df;
-	size_t nc_total;
-	if (p->calc_null==true) {
-	  nc_total=n_cvt;
-	  df=(double)ni_test-(double)n_cvt;
-	}
-	else {
-	  nc_total=n_cvt+1;
-	  df=(double)ni_test-(double)n_cvt-1.0;
-	}
-
-	double trace_Hi=0.0, trace_HiHi=0.0;
-	size_t index_ww;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
-	gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
-	gsl_vector_memcpy (v_temp, p->eval);
-	gsl_vector_scale (v_temp, l);
-	if (p->e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	gsl_vector_memcpy (HiHi_eval, Hi_eval);
-	gsl_vector_mul (HiHi_eval, Hi_eval);
-	gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
-	gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
-	gsl_vector_set_all (v_temp, 1.0);
-	gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-	gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
-	if (p->e_mode!=0) {
-		trace_Hi=(double)ni_test-trace_Hi;
-		trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
-	}
-
-	CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-	CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-	CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab,
-		   PPab, PPPab);
-
-	// Calculate tracePK and trace PKPK.
-	double trace_P=trace_Hi, trace_PP=trace_HiHi;
-	double ps_ww, ps2_ww, ps3_ww;
-	for (size_t i=0; i<nc_total; ++i) {
-		index_ww=GetabIndex (i+1, i+1, n_cvt);
-		ps_ww=gsl_matrix_get (Pab, i, index_ww);
-		ps2_ww=gsl_matrix_get (PPab, i, index_ww);
-		ps3_ww=gsl_matrix_get (PPPab, i, index_ww);
-		trace_P-=ps2_ww/ps_ww;
-		trace_PP+=ps2_ww*ps2_ww/(ps_ww*ps_ww)-2.0*ps3_ww/ps_ww;
-	}
-	double trace_PK=(df-trace_P)/l;
-	double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
-
-	// Calculate yPKPy, yPKPKPy.
-	index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
-	double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
-	double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
-	double yPKPy=(P_yy-PP_yy)/l;
-	double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
-	*dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
-	*dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/
-	  (P_yy*P_yy);
-
-	gsl_matrix_free (Pab);
-	gsl_matrix_free (PPab);
-	gsl_matrix_free (PPPab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (HiHi_eval);
-	gsl_vector_free (HiHiHi_eval);
-	gsl_vector_free (v_temp);
-
-	return;
-}
+    gsl_vector_mul(&Uab_col.vector, Utx);
+  }
 
-void LMM::CalcRLWald (const double &l, const FUNC_PARAM &params,
-		      double &beta, double &se, double &p_wald) {
-	size_t n_cvt=params.n_cvt;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	int df=(int)ni_test-(int)n_cvt-1;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size);
-	gsl_vector *v_temp=gsl_vector_alloc(params.eval->size);
-
-	gsl_vector_memcpy (v_temp, params.eval);
-	gsl_vector_scale (v_temp, l);
-	if (params.e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
-
-	size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt);
-	size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
-	double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx);
-	double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
-	double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
-
-	beta=P_xy/P_xx;
-	double tau=(double)df/Px_yy;
-	se=sqrt(1.0/(tau*P_xx));
-	p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df);
-
-	gsl_matrix_free (Pab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (v_temp);
-	return;
+  return;
 }
 
-void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params,
-		       double &beta, double &se, double &p_score) {
-	size_t n_cvt=params.n_cvt;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	int df=(int)ni_test-(int)n_cvt-1;
-
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size);
-	gsl_vector *v_temp=gsl_vector_alloc(params.eval->size);
-
-	gsl_vector_memcpy (v_temp, params.eval);
-	gsl_vector_scale (v_temp, l);
-	if (params.e_mode==0) {
-	  gsl_vector_set_all (Hi_eval, 1.0);
-	} else {
-	  gsl_vector_memcpy (Hi_eval, v_temp);
-	}
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
-
-	size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt);
-	size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
-	double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx);
-	double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
-	double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
-
-	beta=P_xy/P_xx;
-	double tau=(double)df/Px_yy;
-	se=sqrt(1.0/(tau*P_xx));
-
-	p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx),
-				 1.0, df);
-
-	gsl_matrix_free (Pab);
-	gsl_vector_free (Hi_eval);
-	gsl_vector_free (v_temp);
-	return;
-}
+void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
+  size_t index_ab;
+  size_t n_cvt = W->size2;
 
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
-	size_t index_ab;
-	size_t n_cvt=UtW->size2;
-
-	gsl_vector *u_a=gsl_vector_alloc (Uty->size);
-
-	for (size_t a=1; a<=n_cvt+2; ++a) {
-		if (a==n_cvt+1) {continue;}
-
-		if (a==n_cvt+2) {gsl_vector_memcpy (u_a, Uty);}
-		else {
-		  gsl_vector_const_view UtW_col=
-		    gsl_matrix_const_column (UtW, a-1);
-		  gsl_vector_memcpy (u_a, &UtW_col.vector);
-		}
-
-		for (size_t b=a; b>=1; --b) {
-			if (b==n_cvt+1) {continue;}
-
-			index_ab=GetabIndex (a, b, n_cvt);
-			gsl_vector_view Uab_col=
-			  gsl_matrix_column (Uab, index_ab);
-
-			if (b==n_cvt+2) {
-			  gsl_vector_memcpy (&Uab_col.vector, Uty);
-			}
-			else {
-				gsl_vector_const_view UtW_col=
-				  gsl_matrix_const_column (UtW, b-1);
-				gsl_vector_memcpy (&Uab_col.vector,
-						   &UtW_col.vector);
-			}
-
-			gsl_vector_mul(&Uab_col.vector, u_a);
-		}
-	}
-
-	gsl_vector_free (u_a);
-	return;
-}
+  double d;
+  gsl_vector *v_a = gsl_vector_alloc(y->size);
+  gsl_vector *v_b = gsl_vector_alloc(y->size);
 
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty,
-	      const gsl_vector *Utx, gsl_matrix *Uab) {
-	size_t index_ab;
-	size_t n_cvt=UtW->size2;
-
-	for (size_t b=1; b<=n_cvt+2; ++b) {
-		index_ab=GetabIndex (n_cvt+1, b, n_cvt);
-		gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab);
-
-		if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);}
-		else if (b==n_cvt+1) {
-		  gsl_vector_memcpy (&Uab_col.vector, Utx);
-		}
-		else {
-		  gsl_vector_const_view UtW_col=
-		    gsl_matrix_const_column (UtW, b-1);
-		  gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
-		}
-
-		gsl_vector_mul(&Uab_col.vector, Utx);
-	}
-
-	return;
-}
+  for (size_t a = 1; a <= n_cvt + 2; ++a) {
+    if (a == n_cvt + 1) {
+      continue;
+    }
 
-void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
-	size_t index_ab;
-	size_t n_cvt=W->size2;
-
-	double d;
-	gsl_vector *v_a=gsl_vector_alloc (y->size);
-	gsl_vector *v_b=gsl_vector_alloc (y->size);
-
-	for (size_t a=1; a<=n_cvt+2; ++a) {
-		if (a==n_cvt+1) {continue;}
-
-		if (a==n_cvt+2) {
-		  gsl_vector_memcpy (v_a, y);
-		}
-		else {
-		  gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1);
-		  gsl_vector_memcpy (v_a, &W_col.vector);
-		}
-
-		for (size_t b=a; b>=1; --b) {
-			if (b==n_cvt+1) {continue;}
-
-			index_ab=GetabIndex (a, b, n_cvt);
-
-			if (b==n_cvt+2) {
-			  gsl_vector_memcpy (v_b, y);
-			}
-			else {
-			  gsl_vector_const_view W_col=
-			    gsl_matrix_const_column (W, b-1);
-			  gsl_vector_memcpy (v_b, &W_col.vector);
-			}
-
-			gsl_blas_ddot (v_a, v_b, &d);
-			gsl_vector_set(ab, index_ab, d);
-		}
-	}
-
-	gsl_vector_free (v_a);
-	gsl_vector_free (v_b);
-	return;
+    if (a == n_cvt + 2) {
+      gsl_vector_memcpy(v_a, y);
+    } else {
+      gsl_vector_const_view W_col = gsl_matrix_const_column(W, a - 1);
+      gsl_vector_memcpy(v_a, &W_col.vector);
+    }
+
+    for (size_t b = a; b >= 1; --b) {
+      if (b == n_cvt + 1) {
+        continue;
+      }
+
+      index_ab = GetabIndex(a, b, n_cvt);
+
+      if (b == n_cvt + 2) {
+        gsl_vector_memcpy(v_b, y);
+      } else {
+        gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1);
+        gsl_vector_memcpy(v_b, &W_col.vector);
+      }
+
+      gsl_blas_ddot(v_a, v_b, &d);
+      gsl_vector_set(ab, index_ab, d);
+    }
+  }
+
+  gsl_vector_free(v_a);
+  gsl_vector_free(v_b);
+  return;
 }
 
-void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
-	     gsl_vector *ab) {
-	size_t index_ab;
-	size_t n_cvt=W->size2;
+void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
+            gsl_vector *ab) {
+  size_t index_ab;
+  size_t n_cvt = W->size2;
 
-	double d;
-	gsl_vector *v_b=gsl_vector_alloc (y->size);
+  double d;
+  gsl_vector *v_b = gsl_vector_alloc(y->size);
 
-	for (size_t b=1; b<=n_cvt+2; ++b) {
-		index_ab=GetabIndex (n_cvt+1, b, n_cvt);
+  for (size_t b = 1; b <= n_cvt + 2; ++b) {
+    index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
 
-		if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);}
-		else if (b==n_cvt+1) {gsl_vector_memcpy (v_b, x);}
-		else {
-		  gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
-		  gsl_vector_memcpy (v_b, &W_col.vector);
-		}
+    if (b == n_cvt + 2) {
+      gsl_vector_memcpy(v_b, y);
+    } else if (b == n_cvt + 1) {
+      gsl_vector_memcpy(v_b, x);
+    } else {
+      gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1);
+      gsl_vector_memcpy(v_b, &W_col.vector);
+    }
 
-		gsl_blas_ddot (x, v_b, &d);
-		gsl_vector_set(ab, index_ab, d);
-	}
+    gsl_blas_ddot(x, v_b, &d);
+    gsl_vector_set(ab, index_ab, d);
+  }
 
-	gsl_vector_free (v_b);
-	return;
+  gsl_vector_free(v_b);
+  return;
 }
 
-void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval,
-		       const gsl_matrix *UtW, const gsl_vector *Utx,
-		       const gsl_matrix *W, const gsl_vector *x) {
-	igzstream infile (file_gene.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading gene expression file:"<<file_gene<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-
-	string line;
-	char *ch_ptr;
-
-	double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
-	double p_lrt=0, p_score=0;
-	double logl_H1=0.0, logl_H0=0.0, l_H0;
-	int c_phen;
-	string rs; // Gene id.
-	double d;
-
-	// Calculate basic quantities.
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	gsl_vector *y=gsl_vector_alloc (U->size1);
-	gsl_vector *Uty=gsl_vector_alloc (U->size2);
-	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
-
-	// Header.
-	getline(infile, line);
-
-	for (size_t t=0; t<ng_total; t++) {
-		!safeGetline(infile, line).eof();
-		if (t%d_pace==0 || t==ng_total-1) {
-		  ProgressBar ("Performing Analysis ", t, ng_total-1);
-		}
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		rs=ch_ptr;
-
-		c_phen=0;
-		for (size_t i=0; i<indicator_idv.size(); ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==0) {continue;}
-
-			d=atof(ch_ptr);
-			gsl_vector_set(y, c_phen, d);
-
-			c_phen++;
-		}
-
-		time_start=clock();
-		gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty);
-		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		// Calculate null.
-		time_start=clock();
-
-		gsl_matrix_set_zero (Uab);
-
-		CalcUab (UtW, Uty, Uab);
-		FUNC_PARAM param0={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-		if (a_mode==2 || a_mode==3 || a_mode==4) {
-		  CalcLambda('L', param0, l_min, l_max, n_region,
-			     l_H0, logl_H0);
-		}
-
-		// Calculate alternative.
-		CalcUab(UtW, Uty, Utx, Uab);
-		FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-		//3 is before 1.
-		if (a_mode==3 || a_mode==4) {
-			CalcRLScore (l_H0, param1, beta, se, p_score);
-		}
-
-		if (a_mode==1 || a_mode==4) {
-		  CalcLambda ('R', param1, l_min, l_max, n_region,
-			      lambda_remle, logl_H1);
-		  CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		}
-
-		if (a_mode==2 || a_mode==4) {
-		  CalcLambda ('L', param1, l_min, l_max, n_region,
-			      lambda_mle, logl_H1);
-		  p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
-		}
-
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		// Store summary data.
-		SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
-			      p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
-    }
-	cout<<endl;
-
-	gsl_vector_free (y);
-	gsl_vector_free (Uty);
-	gsl_matrix_free (Uab);
-	gsl_vector_free (ab);
-
-	infile.close();
-	infile.clear();
+void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
+                      const gsl_matrix *UtW, const gsl_vector *Utx,
+                      const gsl_matrix *W, const gsl_vector *x) {
+  igzstream infile(file_gene.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading gene expression file:" << file_gene << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+
+  string line;
+  char *ch_ptr;
+
+  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+  double p_lrt = 0, p_score = 0;
+  double logl_H1 = 0.0, logl_H0 = 0.0, l_H0;
+  int c_phen;
+  string rs; // Gene id.
+  double d;
+
+  // Calculate basic quantities.
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  gsl_vector *y = gsl_vector_alloc(U->size1);
+  gsl_vector *Uty = gsl_vector_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
+
+  // Header.
+  getline(infile, line);
+
+  for (size_t t = 0; t < ng_total; t++) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == ng_total - 1) {
+      ProgressBar("Performing Analysis ", t, ng_total - 1);
+    }
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
+
+    c_phen = 0;
+    for (size_t i = 0; i < indicator_idv.size(); ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      d = atof(ch_ptr);
+      gsl_vector_set(y, c_phen, d);
+
+      c_phen++;
+    }
+
+    time_start = clock();
+    gsl_blas_dgemv(CblasTrans, 1.0, U, y, 0.0, Uty);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Calculate null.
+    time_start = clock();
+
+    gsl_matrix_set_zero(Uab);
+
+    CalcUab(UtW, Uty, Uab);
+    FUNC_PARAM param0 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+    if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
+      CalcLambda('L', param0, l_min, l_max, n_region, l_H0, logl_H0);
+    }
+
+    // Calculate alternative.
+    CalcUab(UtW, Uty, Utx, Uab);
+    FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+    // 3 is before 1.
+    if (a_mode == 3 || a_mode == 4) {
+      CalcRLScore(l_H0, param1, beta, se, p_score);
+    }
+
+    if (a_mode == 1 || a_mode == 4) {
+      CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+      CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+    }
+
+    if (a_mode == 2 || a_mode == 4) {
+      CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+      p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
+    }
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  gsl_vector_free(y);
+  gsl_vector_free(Uty);
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
+
+  infile.close();
+  infile.clear();
 
-	return;
+  return;
 }
 
-void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
-			 const gsl_matrix *UtW, const gsl_vector *Uty,
-			 const gsl_matrix *W, const gsl_vector *y) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-
-	string line;
-	char *ch_ptr;
-
-	double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
-	double p_lrt=0, p_score=0;
-	double logl_H1=0.0;
-	int n_miss, c_phen;
-	double geno, x_mean;
-
-	// Calculate basic quantities.
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	gsl_vector *x=gsl_vector_alloc (U->size1);
-	gsl_vector *x_miss=gsl_vector_alloc (U->size1);
-	gsl_vector *Utx=gsl_vector_alloc (U->size2);
-	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	gsl_matrix_set_zero (Uab);
-	CalcUab (UtW, Uty, Uab);
-
-	//start reading genotypes and analyze
-	size_t c=0, t_last=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		!safeGetline(infile, line).eof();
-		if (t%d_pace==0 || t==(ns_total-1)) {
-		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		x_mean=0.0; c_phen=0; n_miss=0;
-		gsl_vector_set_zero(x_miss);
-		for (size_t i=0; i<ni_total; ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==0) {continue;}
-
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;
-			}
-			else {
-				geno=atof(ch_ptr);
-
-				gsl_vector_set(x, c_phen, geno);
-				gsl_vector_set(x_miss, c_phen, 1.0);
-				x_mean+=geno;
-			}
-			c_phen++;
-		}
-
-		x_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<ni_test; ++i) {
-			if (gsl_vector_get (x_miss, i)==0) {
-			  gsl_vector_set(x, i, x_mean);
-			}
-		}
-
-		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
-		gsl_vector_memcpy (&Xlarge_col.vector, x);
-		c++;
-
-		if (c%msize==0 || c==t_last) {
-		  size_t l=0;
-		  if (c%msize==0) {l=msize;} else {l=c%msize;}
-
-		  gsl_matrix_view Xlarge_sub=
-		    gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-		  gsl_matrix_view UtXlarge_sub=
-		    gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-		  time_start=clock();
-		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
-				  0.0, &UtXlarge_sub.matrix);
-		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		  gsl_matrix_set_zero (Xlarge);
-
-		  for (size_t i=0; i<l; i++) {
-		    gsl_vector_view UtXlarge_col=
-		      gsl_matrix_column (UtXlarge, i);
-		    gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
-
-		    CalcUab(UtW, Uty, Utx, Uab);
-
-		    time_start=clock();
-		    FUNC_PARAM param1=
-		      {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-		    // 3 is before 1.
-		    if (a_mode==3 || a_mode==4) {
-		      CalcRLScore (l_mle_null, param1, beta, se, p_score);
-		    }
-
-		    if (a_mode==1 || a_mode==4) {
-		      CalcLambda ('R', param1, l_min, l_max, n_region,
-				  lambda_remle, logl_H1);
-		      CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		    }
-
-		    if (a_mode==2 || a_mode==4) {
-		      CalcLambda ('L', param1, l_min, l_max, n_region,
-				  lambda_mle, logl_H1);
-		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
-		    }
-
-		    time_opt+=(clock()-time_start)/
-		      (double(CLOCKS_PER_SEC)*60.0);
-
-		    // Store summary data.
-		    SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
-				  p_wald, p_lrt, p_score};
-
-		    sumStat.push_back(SNPs);
-		  }
-		}
-	}
-	cout<<endl;
-
-	gsl_vector_free (x);
-	gsl_vector_free (x_miss);
-	gsl_vector_free (Utx);
-	gsl_matrix_free (Uab);
-	gsl_vector_free (ab);
-
-	gsl_matrix_free (Xlarge);
-	gsl_matrix_free (UtXlarge);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+                        const gsl_matrix *UtW, const gsl_vector *Uty,
+                        const gsl_matrix *W, const gsl_vector *y) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+
+  string line;
+  char *ch_ptr;
+
+  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+  double p_lrt = 0, p_score = 0;
+  double logl_H1 = 0.0;
+  int n_miss, c_phen;
+  double geno, x_mean;
+
+  // Calculate basic quantities.
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  gsl_vector *x = gsl_vector_alloc(U->size1);
+  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  gsl_matrix_set_zero(Uab);
+  CalcUab(UtW, Uty, Uab);
+
+  // start reading genotypes and analyze
+  size_t c = 0, t_last = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    c++;
+
+    if (c % msize == 0 || c == t_last) {
+      size_t l = 0;
+      if (c % msize == 0) {
+        l = msize;
+      } else {
+        l = c % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
+
+        CalcUab(UtW, Uty, Utx, Uab);
+
+        time_start = clock();
+        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+        // 3 is before 1.
+        if (a_mode == 3 || a_mode == 4) {
+          CalcRLScore(l_mle_null, param1, beta, se, p_score);
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
+                     logl_H1);
+          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
+                        p_wald, p_lrt, p_score};
+
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+  gsl_vector_free(Utx);
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
-			const gsl_matrix *UtW, const gsl_vector *Uty,
-			const gsl_matrix *W, const gsl_vector *y) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
-	clock_t time_start=clock();
-
-	char ch[1];
-	bitset<8> b;
-
-	double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
-	double p_lrt=0, p_score=0;
-	double logl_H1=0.0;
-	int n_bit, n_miss, ci_total, ci_test;
-	double geno, x_mean;
-
-	// Calculate basic quantities.
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	gsl_vector *x=gsl_vector_alloc (U->size1);
-	gsl_vector *Utx=gsl_vector_alloc (U->size2);
-	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	gsl_matrix_set_zero (Uab);
-	CalcUab (UtW, Uty, Uab);
-
-	// Calculate n_bit and c, the number of bit for each SNP.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	size_t c=0, t_last=0;
-	for (size_t t=0; t<snpInfo.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-		if (t%d_pace==0 || t==snpInfo.size()-1) {
-		  ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0;
-		for (int i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {
-			  if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
-			    break;
-			  }
-			  if (indicator_idv[ci_total]==0) {
-			    ci_total++;
-			    continue;
-			  }
-
-			  if (b[2*j]==0) {
-			    if (b[2*j+1]==0) {
-			      gsl_vector_set(x, ci_test, 2);
-			      x_mean+=2.0;
-			    }
-			    else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
-			  }
-			  else {
-			    if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
-			    else {gsl_vector_set(x, ci_test, -9); n_miss++; }
-			  }
-
-			  ci_total++;
-			  ci_test++;
-			}
-		}
-
-		x_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<ni_test; ++i) {
-			geno=gsl_vector_get(x,i);
-			if (geno==-9) {
-			  gsl_vector_set(x, i, x_mean);
-			  geno=x_mean;
-			}
-		}
-
-		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
-		gsl_vector_memcpy (&Xlarge_col.vector, x);
-		c++;
-
-		if (c%msize==0 || c==t_last) {
-		  size_t l=0;
-		  if (c%msize==0) {l=msize;} else {l=c%msize;}
-
-		  gsl_matrix_view Xlarge_sub=
-		    gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-		  gsl_matrix_view UtXlarge_sub=
-		    gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-		  time_start=clock();
-		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
-				  0.0, &UtXlarge_sub.matrix);
-		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		  gsl_matrix_set_zero (Xlarge);
-
-		  for (size_t i=0; i<l; i++) {
-		    gsl_vector_view UtXlarge_col=
-		      gsl_matrix_column (UtXlarge, i);
-		    gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
-
-		    CalcUab(UtW, Uty, Utx, Uab);
-
-		    time_start=clock();
-		    FUNC_PARAM param1={false, ni_test, n_cvt, eval,
-				       Uab, ab, 0};
-
-		    // 3 is before 1, for beta.
-		    if (a_mode==3 || a_mode==4) {
-		      CalcRLScore (l_mle_null, param1, beta, se, p_score);
-		    }
-
-		    if (a_mode==1 || a_mode==4) {
-		      CalcLambda ('R', param1, l_min, l_max, n_region,
-				  lambda_remle, logl_H1);
-		      CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		    }
-
-		    if (a_mode==2 || a_mode==4) {
-		      CalcLambda ('L', param1, l_min, l_max, n_region,
-				  lambda_mle, logl_H1);
-		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
-		    }
-
-		    time_opt+=(clock()-time_start)/
-		      (double(CLOCKS_PER_SEC)*60.0);
-
-		    // Store summary data.
-		    SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
-				  p_wald, p_lrt, p_score};
-		    sumStat.push_back(SNPs);
-		  }
-		}
-	}
-	cout<<endl;
-
-	gsl_vector_free (x);
-	gsl_vector_free (Utx);
-	gsl_matrix_free (Uab);
-	gsl_vector_free (ab);
-
-	gsl_matrix_free(Xlarge);
-	gsl_matrix_free(UtXlarge);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+                       const gsl_matrix *UtW, const gsl_vector *Uty,
+                       const gsl_matrix *W, const gsl_vector *y) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+
+  char ch[1];
+  bitset<8> b;
+
+  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+  double p_lrt = 0, p_score = 0;
+  double logl_H1 = 0.0;
+  int n_bit, n_miss, ci_total, ci_test;
+  double geno, x_mean;
+
+  // Calculate basic quantities.
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  gsl_vector *x = gsl_vector_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  gsl_matrix_set_zero(Uab);
+  CalcUab(UtW, Uty, Uab);
+
+  // Calculate n_bit and c, the number of bit for each SNP.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  size_t c = 0, t_last = 0;
+  for (size_t t = 0; t < snpInfo.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(x, ci_test, 2);
+            x_mean += 2.0;
+          } else {
+            gsl_vector_set(x, ci_test, 1);
+            x_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(x, ci_test, 0);
+          } else {
+            gsl_vector_set(x, ci_test, -9);
+            n_miss++;
+          }
+        }
+
+        ci_total++;
+        ci_test++;
+      }
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    c++;
+
+    if (c % msize == 0 || c == t_last) {
+      size_t l = 0;
+      if (c % msize == 0) {
+        l = msize;
+      } else {
+        l = c % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
+
+        CalcUab(UtW, Uty, Utx, Uab);
+
+        time_start = clock();
+        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+        // 3 is before 1, for beta.
+        if (a_mode == 3 || a_mode == 4) {
+          CalcRLScore(l_mle_null, param1, beta, se, p_score);
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
+                     logl_H1);
+          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
+                        p_wald, p_lrt, p_score};
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(Utx);
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
 // WJA added.
-void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
-		       const gsl_matrix *UtW, const gsl_vector *Uty,
-		       const gsl_matrix *W, const gsl_vector *y) {
-	string file_bgen=file_oxford+".bgen";
-	ifstream infile (file_bgen.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bgen file:"<<file_bgen<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-	double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
-	double p_lrt=0, p_score=0;
-	double logl_H1=0.0;
-	int n_miss, c_phen;
-	double geno, x_mean;
-
-	// Calculate basic quantities.
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	gsl_vector *x=gsl_vector_alloc (U->size1);
-	gsl_vector *x_miss=gsl_vector_alloc (U->size1);
-	gsl_vector *Utx=gsl_vector_alloc (U->size2);
-	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	gsl_matrix_set_zero (Uab);
-	CalcUab (UtW, Uty, Uab);
-
-	// Read in header.
-	uint32_t bgen_snp_block_offset;
-	uint32_t bgen_header_length;
-	uint32_t bgen_nsamples;
-	uint32_t bgen_nsnps;
-	uint32_t bgen_flags;
-	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
-	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
-	bgen_snp_block_offset-=4;
-	infile.ignore(4+bgen_header_length-20);
-	bgen_snp_block_offset-=4+bgen_header_length-20;
-	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
-	bgen_snp_block_offset-=4;
-	bool CompressedSNPBlocks=bgen_flags&0x1;
-
-	infile.ignore(bgen_snp_block_offset);
-
-	double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
-	double bgen_geno_prob_non_miss;
-
-	uint32_t bgen_N;
-	uint16_t bgen_LS;
-	uint16_t bgen_LR;
-	uint16_t bgen_LC;
-	uint32_t bgen_SNP_pos;
-	uint32_t bgen_LA;
-	std::string bgen_A_allele;
-	uint32_t bgen_LB;
-	std::string bgen_B_allele;
-	uint32_t bgen_P;
-	size_t unzipped_data_size;
-	string id;
-	string rs;
-	string chr;
-	std::cout << "Warning: WJA hard coded SNP missingness " <<
-	  "threshold of 10%"<<std::endl;
-
-	// Start reading genotypes and analyze.
-	size_t c=0, t_last=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (size_t t=0; t<indicator_snp.size(); ++t)
-	{
-		if (t%d_pace==0 || t==(ns_total-1)) {
-		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		// Read SNP header.
-		id.clear();
-		rs.clear();
-		chr.clear();
-		bgen_A_allele.clear();
-		bgen_B_allele.clear();
-
-		infile.read(reinterpret_cast<char*>(&bgen_N),4);
-		infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
-		id.resize(bgen_LS);
-		infile.read(&id[0], bgen_LS);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
-		rs.resize(bgen_LR);
-		infile.read(&rs[0], bgen_LR);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
-		chr.resize(bgen_LC);
-		infile.read(&chr[0], bgen_LC);
-
-		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
-		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
-		bgen_A_allele.resize(bgen_LA);
-		infile.read(&bgen_A_allele[0], bgen_LA);
-
-
-		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
-		bgen_B_allele.resize(bgen_LB);
-		infile.read(&bgen_B_allele[0], bgen_LB);
-
-		uint16_t unzipped_data[3*bgen_N];
-
-		if (indicator_snp[t]==0) {
-			if(CompressedSNPBlocks)
-			  infile.read(reinterpret_cast<char*>(&bgen_P),4);
-			else
-			  bgen_P=6*bgen_N;
-
-			infile.ignore(static_cast<size_t>(bgen_P));
-
-			continue;
-		}
-
-		if(CompressedSNPBlocks) {
-			infile.read(reinterpret_cast<char*>(&bgen_P),4);
-			uint8_t zipped_data[bgen_P];
-
-			unzipped_data_size=6*bgen_N;
-
-			infile.read(reinterpret_cast<char*>(zipped_data),
-				    bgen_P);
-
-			int result=
-			  uncompress(reinterpret_cast<Bytef*>(unzipped_data),
-			    reinterpret_cast<uLongf*>(&unzipped_data_size),
-			    reinterpret_cast<Bytef*>(zipped_data),
-			    static_cast<uLong> (bgen_P));
-			assert(result == Z_OK);
-
-		}
-		else
-		{
-
-		  bgen_P=6*bgen_N;
-		  infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
-		}
-
-		x_mean=0.0; c_phen=0; n_miss=0;
-		gsl_vector_set_zero(x_miss);
-		for (size_t i=0; i<bgen_N; ++i) {
-		  if (indicator_idv[i]==0) {continue;}
-
-		  bgen_geno_prob_AA=
-		    static_cast<double>(unzipped_data[i*3])/32768.0;
-		  bgen_geno_prob_AB=
-		    static_cast<double>(unzipped_data[i*3+1])/32768.0;
-		  bgen_geno_prob_BB=
-		    static_cast<double>(unzipped_data[i*3+2])/32768.0;
-
-		  // WJA.
-		  bgen_geno_prob_non_miss = bgen_geno_prob_AA +
-		    bgen_geno_prob_AB+bgen_geno_prob_BB;
-		  if (bgen_geno_prob_non_miss<0.9) {
-		    gsl_vector_set(x_miss, c_phen, 0.0);
-		    n_miss++;
-		  }
-		  else {
-
-		    bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
-		    bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
-		    bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
-		    geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
-		    gsl_vector_set(x, c_phen, geno);
-		    gsl_vector_set(x_miss, c_phen, 1.0);
-		    x_mean+=geno;
-		  }
-		  c_phen++;
-		}
-
-		x_mean/=static_cast<double>(ni_test-n_miss);
-
-		for (size_t i=0; i<ni_test; ++i) {
-			if (gsl_vector_get (x_miss, i)==0) {
-			  gsl_vector_set(x, i, x_mean);
-			}
-			geno=gsl_vector_get(x, i);
-		}
-
-		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
-		gsl_vector_memcpy (&Xlarge_col.vector, x);
-		c++;
-
-		if (c%msize==0 || c==t_last ) {
-		  size_t l=0;
-		  if (c%msize==0) {l=msize;} else {l=c%msize;}
-
-		  gsl_matrix_view Xlarge_sub=
-		    gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-		  gsl_matrix_view UtXlarge_sub=
-		    gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-		  time_start=clock();
-		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
-				  0.0, &UtXlarge_sub.matrix);
-		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		  gsl_matrix_set_zero (Xlarge);
-
-		  for (size_t i=0; i<l; i++) {
-		    gsl_vector_view UtXlarge_col=
-		      gsl_matrix_column (UtXlarge, i);
-		    gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
-
-		    CalcUab(UtW, Uty, Utx, Uab);
-
-		    time_start=clock();
-		    FUNC_PARAM param1={false,ni_test,n_cvt,eval,Uab,ab,0};
-
-		    // 3 is before 1.
-		    if (a_mode==3 || a_mode==4) {
-		      CalcRLScore (l_mle_null, param1, beta, se, p_score);
-		    }
-
-		    if (a_mode==1 || a_mode==4) {
-		      CalcLambda ('R', param1, l_min, l_max, n_region,
-				  lambda_remle, logl_H1);
-		      CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		    }
-
-		    if (a_mode==2 || a_mode==4) {
-		      CalcLambda ('L', param1, l_min, l_max, n_region,
-				  lambda_mle, logl_H1);
-		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
-		    }
-
-		    time_opt+=(clock()-time_start)/
-		      (double(CLOCKS_PER_SEC)*60.0);
-
-		    // Store summary data.
-		    SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
-				  p_wald, p_lrt, p_score};
-		    sumStat.push_back(SNPs);
-		  }
-		}
-    }
-	cout<<endl;
-
-	gsl_vector_free (x);
-	gsl_vector_free (x_miss);
-	gsl_vector_free (Utx);
-	gsl_matrix_free (Uab);
-	gsl_vector_free (ab);
-
-	gsl_matrix_free(Xlarge);
-	gsl_matrix_free(UtXlarge);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+                      const gsl_matrix *UtW, const gsl_vector *Uty,
+                      const gsl_matrix *W, const gsl_vector *y) {
+  string file_bgen = file_oxford + ".bgen";
+  ifstream infile(file_bgen.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bgen file:" << file_bgen << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+  double p_lrt = 0, p_score = 0;
+  double logl_H1 = 0.0;
+  int n_miss, c_phen;
+  double geno, x_mean;
+
+  // Calculate basic quantities.
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  gsl_vector *x = gsl_vector_alloc(U->size1);
+  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  gsl_matrix_set_zero(Uab);
+  CalcUab(UtW, Uty, Uab);
+
+  // Read in header.
+  uint32_t bgen_snp_block_offset;
+  uint32_t bgen_header_length;
+  uint32_t bgen_nsamples;
+  uint32_t bgen_nsnps;
+  uint32_t bgen_flags;
+  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+  bgen_snp_block_offset -= 4;
+  infile.ignore(4 + bgen_header_length - 20);
+  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+  bgen_snp_block_offset -= 4;
+  bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+  infile.ignore(bgen_snp_block_offset);
+
+  double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
+  double bgen_geno_prob_non_miss;
+
+  uint32_t bgen_N;
+  uint16_t bgen_LS;
+  uint16_t bgen_LR;
+  uint16_t bgen_LC;
+  uint32_t bgen_SNP_pos;
+  uint32_t bgen_LA;
+  std::string bgen_A_allele;
+  uint32_t bgen_LB;
+  std::string bgen_B_allele;
+  uint32_t bgen_P;
+  size_t unzipped_data_size;
+  string id;
+  string rs;
+  string chr;
+  std::cout << "Warning: WJA hard coded SNP missingness "
+            << "threshold of 10%" << std::endl;
+
+  // Start reading genotypes and analyze.
+  size_t c = 0, t_last = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // Read SNP header.
+    id.clear();
+    rs.clear();
+    chr.clear();
+    bgen_A_allele.clear();
+    bgen_B_allele.clear();
+
+    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+    id.resize(bgen_LS);
+    infile.read(&id[0], bgen_LS);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+    rs.resize(bgen_LR);
+    infile.read(&rs[0], bgen_LR);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+    chr.resize(bgen_LC);
+    infile.read(&chr[0], bgen_LC);
+
+    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+    bgen_A_allele.resize(bgen_LA);
+    infile.read(&bgen_A_allele[0], bgen_LA);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+    bgen_B_allele.resize(bgen_LB);
+    infile.read(&bgen_B_allele[0], bgen_LB);
+
+    uint16_t unzipped_data[3 * bgen_N];
+
+    if (indicator_snp[t] == 0) {
+      if (CompressedSNPBlocks)
+        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      else
+        bgen_P = 6 * bgen_N;
+
+      infile.ignore(static_cast<size_t>(bgen_P));
+
+      continue;
+    }
+
+    if (CompressedSNPBlocks) {
+      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      uint8_t zipped_data[bgen_P];
+
+      unzipped_data_size = 6 * bgen_N;
+
+      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+
+      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+                              reinterpret_cast<uLongf *>(&unzipped_data_size),
+                              reinterpret_cast<Bytef *>(zipped_data),
+                              static_cast<uLong>(bgen_P));
+      assert(result == Z_OK);
+
+    } else {
+
+      bgen_P = 6 * bgen_N;
+      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+    }
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < bgen_N; ++i) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+      bgen_geno_prob_AB =
+          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+      bgen_geno_prob_BB =
+          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+
+      // WJA.
+      bgen_geno_prob_non_miss =
+          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+      if (bgen_geno_prob_non_miss < 0.9) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+
+        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= static_cast<double>(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+      geno = gsl_vector_get(x, i);
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    c++;
+
+    if (c % msize == 0 || c == t_last) {
+      size_t l = 0;
+      if (c % msize == 0) {
+        l = msize;
+      } else {
+        l = c % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
+
+        CalcUab(UtW, Uty, Utx, Uab);
+
+        time_start = clock();
+        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+        // 3 is before 1.
+        if (a_mode == 3 || a_mode == 4) {
+          CalcRLScore(l_mle_null, param1, beta, se, p_score);
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
+                     logl_H1);
+          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
+                        p_wald, p_lrt, p_score};
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+  gsl_vector_free(Utx);
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX,
-		   const gsl_vector *Uty, const gsl_vector *K_eval,
-		   const double l_min, const double l_max,
-		   const size_t n_region,
-		   vector<pair<size_t, double> > &pos_loglr) {
-	double logl_H0, logl_H1, log_lr, lambda0, lambda1;
+void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
+                  const gsl_vector *Uty, const gsl_vector *K_eval,
+                  const double l_min, const double l_max, const size_t n_region,
+                  vector<pair<size_t, double>> &pos_loglr) {
+  double logl_H0, logl_H1, log_lr, lambda0, lambda1;
 
-	gsl_vector *w=gsl_vector_alloc (Uty->size);
-	gsl_matrix *Utw=gsl_matrix_alloc (Uty->size, 1);
-	gsl_matrix *Uab=gsl_matrix_alloc (Uty->size, 6);
-	gsl_vector *ab=gsl_vector_alloc (6);
+  gsl_vector *w = gsl_vector_alloc(Uty->size);
+  gsl_matrix *Utw = gsl_matrix_alloc(Uty->size, 1);
+  gsl_matrix *Uab = gsl_matrix_alloc(Uty->size, 6);
+  gsl_vector *ab = gsl_vector_alloc(6);
 
-	gsl_vector_set_zero(ab);
-	gsl_vector_set_all (w, 1.0);
-	gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0);
-	gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
+  gsl_vector_set_zero(ab);
+  gsl_vector_set_all(w, 1.0);
+  gsl_vector_view Utw_col = gsl_matrix_column(Utw, 0);
+  gsl_blas_dgemv(CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
 
-	CalcUab (Utw, Uty, Uab);
-	FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0};
+  CalcUab(Utw, Uty, Uab);
+  FUNC_PARAM param0 = {true, Uty->size, 1, K_eval, Uab, ab, 0};
 
-	CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0);
+  CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0);
 
-	for (size_t i=0; i<UtX->size2; ++i) {
-		gsl_vector_const_view UtX_col=gsl_matrix_const_column (UtX, i);
-		CalcUab(Utw, Uty, &UtX_col.vector, Uab);
-		FUNC_PARAM param1={false, UtX->size1, 1, K_eval, Uab, ab, 0};
+  for (size_t i = 0; i < UtX->size2; ++i) {
+    gsl_vector_const_view UtX_col = gsl_matrix_const_column(UtX, i);
+    CalcUab(Utw, Uty, &UtX_col.vector, Uab);
+    FUNC_PARAM param1 = {false, UtX->size1, 1, K_eval, Uab, ab, 0};
 
-		CalcLambda ('L', param1, l_min, l_max, n_region, lambda1,
-			    logl_H1);
-		log_lr=logl_H1-logl_H0;
+    CalcLambda('L', param1, l_min, l_max, n_region, lambda1, logl_H1);
+    log_lr = logl_H1 - logl_H0;
 
-		pos_loglr.push_back(make_pair(i,log_lr) );
-	}
+    pos_loglr.push_back(make_pair(i, log_lr));
+  }
 
-	gsl_vector_free (w);
-	gsl_matrix_free (Utw);
-	gsl_matrix_free (Uab);
-	gsl_vector_free (ab);
+  gsl_vector_free(w);
+  gsl_matrix_free(Utw);
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
 
-	return;
+  return;
 }
 
-void CalcLambda (const char func_name, FUNC_PARAM &params,
-		 const double l_min, const double l_max,
-		 const size_t n_region, double &lambda, double &logf) {
-	if (func_name!='R' && func_name!='L' && func_name!='r' &&
-	    func_name!='l') {
-	  cout << "func_name only takes 'R' or 'L': 'R' for " <<
-	    "log-restricted likelihood, 'L' for log-likelihood." << endl;
-	  return;
-	}
-
-	vector<pair<double, double> > lambda_lh;
-
-	// Evaluate first-order derivates in different intervals.
-	double lambda_l, lambda_h, lambda_interval=
-	  log(l_max/l_min)/(double)n_region;
-	double dev1_l, dev1_h, logf_l, logf_h;
-
-	for (size_t i=0; i<n_region; ++i) {
-		lambda_l=l_min*exp(lambda_interval*i);
-		lambda_h=l_min*exp(lambda_interval*(i+1.0));
-
-		if (func_name=='R' || func_name=='r') {
-			dev1_l=LogRL_dev1 (lambda_l, &params);
-			dev1_h=LogRL_dev1 (lambda_h, &params);
-		}
-		else {
-			dev1_l=LogL_dev1 (lambda_l, &params);
-			dev1_h=LogL_dev1 (lambda_h, &params);
-		}
-
-		if (dev1_l*dev1_h<=0) {
-			lambda_lh.push_back(make_pair(lambda_l, lambda_h));
-		}
-	}
-
-	// If derivates do not change signs in any interval.
-	if (lambda_lh.empty()) {
-		if (func_name=='R' || func_name=='r') {
-			logf_l=LogRL_f (l_min, &params);
-			logf_h=LogRL_f (l_max, &params);
-		}
-		else {
-			logf_l=LogL_f (l_min, &params);
-			logf_h=LogL_f (l_max, &params);
-		}
-
-		if (logf_l>=logf_h) {
-		  lambda=l_min;
-		  logf=logf_l;
-		} else {
-		  lambda=l_max;
-		  logf=logf_h;
-		}
-	}
-	else {
-
-		// If derivates change signs.
-		int status;
-		int iter=0, max_iter=100;
-		double l, l_temp;
-
-		gsl_function F;
-		gsl_function_fdf FDF;
-
-		F.params=&params;
-		FDF.params=&params;
-
-		if (func_name=='R' || func_name=='r') {
-			F.function=&LogRL_dev1;
-			FDF.f=&LogRL_dev1;
-			FDF.df=&LogRL_dev2;
-			FDF.fdf=&LogRL_dev12;
-		}
-		else {
-			F.function=&LogL_dev1;
-			FDF.f=&LogL_dev1;
-			FDF.df=&LogL_dev2;
-			FDF.fdf=&LogL_dev12;
-		}
-
-		const gsl_root_fsolver_type *T_f;
-		gsl_root_fsolver *s_f;
-		T_f=gsl_root_fsolver_brent;
-		s_f=gsl_root_fsolver_alloc (T_f);
-
-		const gsl_root_fdfsolver_type *T_fdf;
-		gsl_root_fdfsolver *s_fdf;
-		T_fdf=gsl_root_fdfsolver_newton;
-		s_fdf=gsl_root_fdfsolver_alloc(T_fdf);
-
-		for (vector<double>::size_type i=0; i<lambda_lh.size(); ++i) {
-		  lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second;
-		  gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h);
-
-		  do {
-		    iter++;
-		    status=gsl_root_fsolver_iterate (s_f);
-		    l=gsl_root_fsolver_root (s_f);
-		    lambda_l=gsl_root_fsolver_x_lower (s_f);
-		    lambda_h=gsl_root_fsolver_x_upper (s_f);
-		    status=gsl_root_test_interval(lambda_l,lambda_h,0,1e-1);
-		  }
-		  while (status==GSL_CONTINUE && iter<max_iter);
-
-		  iter=0;
-
-		  gsl_root_fdfsolver_set (s_fdf, &FDF, l);
-
-		  do {
-		    iter++;
-		    status=gsl_root_fdfsolver_iterate (s_fdf);
-		    l_temp=l;
-		    l=gsl_root_fdfsolver_root (s_fdf);
-		    status=gsl_root_test_delta (l, l_temp, 0, 1e-5);
-		  }
-		  while (status==GSL_CONTINUE &&
-			 iter<max_iter &&
-			 l>l_min && l<l_max);
-
-		  l=l_temp;
-		  if (l<l_min) {l=l_min;}
-		  if (l>l_max) {l=l_max;}
-		  if (func_name=='R' || func_name=='r') {
-		    logf_l=LogRL_f (l, &params);
-		  } else {
-		    logf_l=LogL_f (l, &params);
-		  }
-
-		  if (i==0) {logf=logf_l; lambda=l;}
-		  else if (logf<logf_l) {logf=logf_l; lambda=l;}
-		  else {}
-		}
-		gsl_root_fsolver_free (s_f);
-		gsl_root_fdfsolver_free (s_fdf);
-
-		if (func_name=='R' || func_name=='r') {
-			logf_l=LogRL_f (l_min, &params);
-			logf_h=LogRL_f (l_max, &params);
-		}
-		else {
-			logf_l=LogL_f (l_min, &params);
-			logf_h=LogL_f (l_max, &params);
-		}
-
-		if (logf_l>logf) {lambda=l_min; logf=logf_l;}
-		if (logf_h>logf) {lambda=l_max; logf=logf_h;}
-	}
-
-	return;
+void CalcLambda(const char func_name, FUNC_PARAM &params, const double l_min,
+                const double l_max, const size_t n_region, double &lambda,
+                double &logf) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for "
+         << "log-restricted likelihood, 'L' for log-likelihood." << endl;
+    return;
+  }
+
+  vector<pair<double, double>> lambda_lh;
+
+  // Evaluate first-order derivates in different intervals.
+  double lambda_l, lambda_h,
+      lambda_interval = log(l_max / l_min) / (double)n_region;
+  double dev1_l, dev1_h, logf_l, logf_h;
+
+  for (size_t i = 0; i < n_region; ++i) {
+    lambda_l = l_min * exp(lambda_interval * i);
+    lambda_h = l_min * exp(lambda_interval * (i + 1.0));
+
+    if (func_name == 'R' || func_name == 'r') {
+      dev1_l = LogRL_dev1(lambda_l, &params);
+      dev1_h = LogRL_dev1(lambda_h, &params);
+    } else {
+      dev1_l = LogL_dev1(lambda_l, &params);
+      dev1_h = LogL_dev1(lambda_h, &params);
+    }
+
+    if (dev1_l * dev1_h <= 0) {
+      lambda_lh.push_back(make_pair(lambda_l, lambda_h));
+    }
+  }
+
+  // If derivates do not change signs in any interval.
+  if (lambda_lh.empty()) {
+    if (func_name == 'R' || func_name == 'r') {
+      logf_l = LogRL_f(l_min, &params);
+      logf_h = LogRL_f(l_max, &params);
+    } else {
+      logf_l = LogL_f(l_min, &params);
+      logf_h = LogL_f(l_max, &params);
+    }
+
+    if (logf_l >= logf_h) {
+      lambda = l_min;
+      logf = logf_l;
+    } else {
+      lambda = l_max;
+      logf = logf_h;
+    }
+  } else {
+
+    // If derivates change signs.
+    int status;
+    int iter = 0, max_iter = 100;
+    double l, l_temp;
+
+    gsl_function F;
+    gsl_function_fdf FDF;
+
+    F.params = &params;
+    FDF.params = &params;
+
+    if (func_name == 'R' || func_name == 'r') {
+      F.function = &LogRL_dev1;
+      FDF.f = &LogRL_dev1;
+      FDF.df = &LogRL_dev2;
+      FDF.fdf = &LogRL_dev12;
+    } else {
+      F.function = &LogL_dev1;
+      FDF.f = &LogL_dev1;
+      FDF.df = &LogL_dev2;
+      FDF.fdf = &LogL_dev12;
+    }
+
+    const gsl_root_fsolver_type *T_f;
+    gsl_root_fsolver *s_f;
+    T_f = gsl_root_fsolver_brent;
+    s_f = gsl_root_fsolver_alloc(T_f);
+
+    const gsl_root_fdfsolver_type *T_fdf;
+    gsl_root_fdfsolver *s_fdf;
+    T_fdf = gsl_root_fdfsolver_newton;
+    s_fdf = gsl_root_fdfsolver_alloc(T_fdf);
+
+    for (vector<double>::size_type i = 0; i < lambda_lh.size(); ++i) {
+      lambda_l = lambda_lh[i].first;
+      lambda_h = lambda_lh[i].second;
+      gsl_root_fsolver_set(s_f, &F, lambda_l, lambda_h);
+
+      do {
+        iter++;
+        status = gsl_root_fsolver_iterate(s_f);
+        l = gsl_root_fsolver_root(s_f);
+        lambda_l = gsl_root_fsolver_x_lower(s_f);
+        lambda_h = gsl_root_fsolver_x_upper(s_f);
+        status = gsl_root_test_interval(lambda_l, lambda_h, 0, 1e-1);
+      } while (status == GSL_CONTINUE && iter < max_iter);
+
+      iter = 0;
+
+      gsl_root_fdfsolver_set(s_fdf, &FDF, l);
+
+      do {
+        iter++;
+        status = gsl_root_fdfsolver_iterate(s_fdf);
+        l_temp = l;
+        l = gsl_root_fdfsolver_root(s_fdf);
+        status = gsl_root_test_delta(l, l_temp, 0, 1e-5);
+      } while (status == GSL_CONTINUE && iter < max_iter && l > l_min &&
+               l < l_max);
+
+      l = l_temp;
+      if (l < l_min) {
+        l = l_min;
+      }
+      if (l > l_max) {
+        l = l_max;
+      }
+      if (func_name == 'R' || func_name == 'r') {
+        logf_l = LogRL_f(l, &params);
+      } else {
+        logf_l = LogL_f(l, &params);
+      }
+
+      if (i == 0) {
+        logf = logf_l;
+        lambda = l;
+      } else if (logf < logf_l) {
+        logf = logf_l;
+        lambda = l;
+      } else {
+      }
+    }
+    gsl_root_fsolver_free(s_f);
+    gsl_root_fdfsolver_free(s_fdf);
+
+    if (func_name == 'R' || func_name == 'r') {
+      logf_l = LogRL_f(l_min, &params);
+      logf_h = LogRL_f(l_max, &params);
+    } else {
+      logf_l = LogL_f(l_min, &params);
+      logf_h = LogL_f(l_max, &params);
+    }
+
+    if (logf_l > logf) {
+      lambda = l_min;
+      logf = logf_l;
+    }
+    if (logf_h > logf) {
+      lambda = l_max;
+      logf = logf_h;
+    }
+  }
+
+  return;
 }
 
 // Calculate lambda in the null model.
-void CalcLambda (const char func_name, const gsl_vector *eval,
-		 const gsl_matrix *UtW, const gsl_vector *Uty,
-		 const double l_min, const double l_max,
-		 const size_t n_region, double &lambda, double &logl_H0) {
-	if (func_name!='R' && func_name!='L' && func_name!='r' &&
-	    func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for " <<
-	    "log-restricted likelihood, 'L' for log-likelihood." << endl;
-	  return;
-	}
+void CalcLambda(const char func_name, const gsl_vector *eval,
+                const gsl_matrix *UtW, const gsl_vector *Uty,
+                const double l_min, const double l_max, const size_t n_region,
+                double &lambda, double &logl_H0) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for "
+         << "log-restricted likelihood, 'L' for log-likelihood." << endl;
+    return;
+  }
 
-	size_t n_cvt=UtW->size2, ni_test=UtW->size1;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
+  size_t n_cvt = UtW->size2, ni_test = UtW->size1;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-	gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
+  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
 
-	gsl_matrix_set_zero (Uab);
-	CalcUab (UtW, Uty, Uab);
+  gsl_matrix_set_zero(Uab);
+  CalcUab(UtW, Uty, Uab);
 
-	FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
+  FUNC_PARAM param0 = {true, ni_test, n_cvt, eval, Uab, ab, 0};
 
-	CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0);
+  CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0);
 
-	gsl_matrix_free(Uab);
-	gsl_vector_free(ab);
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
 
-	return;
+  return;
 }
 
 // Obtain REMLE estimate for PVE using lambda_remle.
-void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW,
-	      const gsl_vector *Uty, const double lambda,
-	      const double trace_G, double &pve, double &pve_se) {
-	size_t n_cvt=UtW->size2, ni_test=UtW->size1;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
+void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
+             const gsl_vector *Uty, const double lambda, const double trace_G,
+             double &pve, double &pve_se) {
+  size_t n_cvt = UtW->size2, ni_test = UtW->size1;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-	gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
+  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
 
-	gsl_matrix_set_zero (Uab);
-	CalcUab (UtW, Uty, Uab);
+  gsl_matrix_set_zero(Uab);
+  CalcUab(UtW, Uty, Uab);
 
-	FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
+  FUNC_PARAM param0 = {true, ni_test, n_cvt, eval, Uab, ab, 0};
 
-	double se=sqrt(-1.0/LogRL_dev2 (lambda, &param0));
+  double se = sqrt(-1.0 / LogRL_dev2(lambda, &param0));
 
-	pve=trace_G*lambda/(trace_G*lambda+1.0);
-	pve_se=trace_G/((trace_G*lambda+1.0)*(trace_G*lambda+1.0))*se;
+  pve = trace_G * lambda / (trace_G * lambda + 1.0);
+  pve_se = trace_G / ((trace_G * lambda + 1.0) * (trace_G * lambda + 1.0)) * se;
 
-	gsl_matrix_free (Uab);
-	gsl_vector_free (ab);
-	return;
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
+  return;
 }
 
 // Obtain REML estimate for Vg and Ve using lambda_remle.
 // Obtain beta and se(beta) for coefficients.
 // ab is not used when e_mode==0.
-void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
-		      const gsl_vector *Uty, const double lambda,
-		      double &vg, double &ve, gsl_vector *beta,
-		      gsl_vector *se_beta) {
-	size_t n_cvt=UtW->size2, ni_test=UtW->size1;
-	size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
-	gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
-	gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
-	gsl_vector *Hi_eval=gsl_vector_alloc(eval->size);
-	gsl_vector *v_temp=gsl_vector_alloc(eval->size);
-	gsl_matrix *HiW=gsl_matrix_alloc(eval->size, UtW->size2);
-	gsl_matrix *WHiW=gsl_matrix_alloc(UtW->size2, UtW->size2);
-	gsl_vector *WHiy=gsl_vector_alloc(UtW->size2);
-	gsl_matrix *Vbeta=gsl_matrix_alloc(UtW->size2, UtW->size2);
-
-	gsl_matrix_set_zero (Uab);
-	CalcUab (UtW, Uty, Uab);
-
-	gsl_vector_memcpy (v_temp, eval);
-	gsl_vector_scale (v_temp, lambda);
-	gsl_vector_set_all (Hi_eval, 1.0);
-	gsl_vector_add_constant (v_temp, 1.0);
-	gsl_vector_div (Hi_eval, v_temp);
-
-	// Calculate beta.
-	gsl_matrix_memcpy (HiW, UtW);
-	for (size_t i=0; i<UtW->size2; i++) {
-		gsl_vector_view HiW_col=gsl_matrix_column(HiW, i);
-		gsl_vector_mul(&HiW_col.vector, Hi_eval);
-	}
-	gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW);
-	gsl_blas_dgemv (CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
-
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (UtW->size2);
-	LUDecomp (WHiW, pmt, &sig);
-	LUSolve (WHiW, pmt, WHiy, beta);
-	LUInvert (WHiW, pmt, Vbeta);
-
-	// Calculate vg and ve.
-	CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab);
-
-	size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-	double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
-
-	ve=P_yy/(double)(ni_test-n_cvt);
-	vg=ve*lambda;
-
-	// With ve, calculate se(beta).
-	gsl_matrix_scale(Vbeta, ve);
-
-	// Obtain se_beta.
-	for (size_t i=0; i<Vbeta->size1; i++) {
-		gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta,i,i)));
-	}
-
-	gsl_matrix_free(Uab);
-	gsl_matrix_free(Pab);
-	gsl_vector_free(ab);
-	gsl_vector_free(Hi_eval);
-	gsl_vector_free(v_temp);
-	gsl_matrix_free(HiW);
-	gsl_matrix_free(WHiW);
-	gsl_vector_free(WHiy);
-	gsl_matrix_free(Vbeta);
-
-	gsl_permutation_free(pmt);
-	return;
+void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+                     const gsl_vector *Uty, const double lambda, double &vg,
+                     double &ve, gsl_vector *beta, gsl_vector *se_beta) {
+  size_t n_cvt = UtW->size2, ni_test = UtW->size1;
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_alloc(eval->size);
+  gsl_vector *v_temp = gsl_vector_alloc(eval->size);
+  gsl_matrix *HiW = gsl_matrix_alloc(eval->size, UtW->size2);
+  gsl_matrix *WHiW = gsl_matrix_alloc(UtW->size2, UtW->size2);
+  gsl_vector *WHiy = gsl_vector_alloc(UtW->size2);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(UtW->size2, UtW->size2);
+
+  gsl_matrix_set_zero(Uab);
+  CalcUab(UtW, Uty, Uab);
+
+  gsl_vector_memcpy(v_temp, eval);
+  gsl_vector_scale(v_temp, lambda);
+  gsl_vector_set_all(Hi_eval, 1.0);
+  gsl_vector_add_constant(v_temp, 1.0);
+  gsl_vector_div(Hi_eval, v_temp);
+
+  // Calculate beta.
+  gsl_matrix_memcpy(HiW, UtW);
+  for (size_t i = 0; i < UtW->size2; i++) {
+    gsl_vector_view HiW_col = gsl_matrix_column(HiW, i);
+    gsl_vector_mul(&HiW_col.vector, Hi_eval);
+  }
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW);
+  gsl_blas_dgemv(CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
+
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(UtW->size2);
+  LUDecomp(WHiW, pmt, &sig);
+  LUSolve(WHiW, pmt, WHiy, beta);
+  LUInvert(WHiW, pmt, Vbeta);
+
+  // Calculate vg and ve.
+  CalcPab(n_cvt, 0, Hi_eval, Uab, ab, Pab);
+
+  size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+  double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy);
+
+  ve = P_yy / (double)(ni_test - n_cvt);
+  vg = ve * lambda;
+
+  // With ve, calculate se(beta).
+  gsl_matrix_scale(Vbeta, ve);
+
+  // Obtain se_beta.
+  for (size_t i = 0; i < Vbeta->size1; i++) {
+    gsl_vector_set(se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i)));
+  }
+
+  gsl_matrix_free(Uab);
+  gsl_matrix_free(Pab);
+  gsl_vector_free(ab);
+  gsl_vector_free(Hi_eval);
+  gsl_vector_free(v_temp);
+  gsl_matrix_free(HiW);
+  gsl_matrix_free(WHiW);
+  gsl_vector_free(WHiy);
+  gsl_matrix_free(Vbeta);
+
+  gsl_permutation_free(pmt);
+  return;
 }
 
-void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
-			    const gsl_matrix *UtW, const gsl_vector *Uty,
-			    const gsl_matrix *W, const gsl_vector *y,
-			    const gsl_vector *env) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-
-	string line;
-	char *ch_ptr;
-
-	double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
-	double p_lrt=0, p_score=0;
-	double logl_H1=0.0, logl_H0=0.0;
-	int n_miss, c_phen;
-	double geno, x_mean;
-
-	// Calculate basic quantities.
-	size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
-
-	gsl_vector *x=gsl_vector_alloc (U->size1);
-	gsl_vector *x_miss=gsl_vector_alloc (U->size1);
-	gsl_vector *Utx=gsl_vector_alloc (U->size2);
-	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
-
-	gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
-	gsl_matrix_view UtW_expand_mat=
-	  gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
-	gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
-	gsl_vector_view UtW_expand_env=
-	  gsl_matrix_column(UtW_expand, UtW->size2);
-	gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
-	gsl_vector_view UtW_expand_x=
-	  gsl_matrix_column(UtW_expand, UtW->size2+1);
-
-	// Start reading genotypes and analyze.
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		!safeGetline(infile, line).eof();
-		if (t%d_pace==0 || t==(ns_total-1)) {
-		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		x_mean=0.0; c_phen=0; n_miss=0;
-		gsl_vector_set_zero(x_miss);
-		for (size_t i=0; i<ni_total; ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==0) {continue;}
-
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set(x_miss, c_phen, 0.0);
-			  n_miss++;
-			}
-			else {
-				geno=atof(ch_ptr);
-
-				gsl_vector_set(x, c_phen, geno);
-				gsl_vector_set(x_miss, c_phen, 1.0);
-				x_mean+=geno;
-			}
-			c_phen++;
-		}
-
-		x_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<ni_test; ++i) {
-			if (gsl_vector_get (x_miss, i)==0) {
-			  gsl_vector_set(x, i, x_mean);
-			}
-			geno=gsl_vector_get(x, i);
-			if (x_mean>1) {
-				gsl_vector_set(x, i, 2-geno);
-			}
-		}
-
-		// Calculate statistics.
-		time_start=clock();
-		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0,
-				&UtW_expand_x.vector);
-		gsl_vector_mul (x, env);
-		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
-		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		gsl_matrix_set_zero (Uab);
-		CalcUab (UtW_expand, Uty, Uab);
-
-		if (a_mode==2 || a_mode==4) {
-		  FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
-		  CalcLambda ('L', param0, l_min, l_max, n_region,
-			      lambda_mle, logl_H0);
-		}
-
-		CalcUab(UtW_expand, Uty, Utx, Uab);
-
-		time_start=clock();
-		FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
-
-		// 3 is before 1.
-		if (a_mode==3 || a_mode==4) {
-			CalcRLScore (l_mle_null, param1, beta, se, p_score);
-		}
-
-		if (a_mode==1 || a_mode==4) {
-			CalcLambda ('R', param1, l_min, l_max, n_region,
-				    lambda_remle, logl_H1);
-			CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		}
-
-		if (a_mode==2 || a_mode==4) {
-			CalcLambda ('L', param1, l_min, l_max, n_region,
-				    lambda_mle, logl_H1);
-			p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
-		}
-
-		if (x_mean>1) {beta*=-1;}
-
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		// Store summary data.
-		SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
-			      p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
-	}
-	cout<<endl;
-
-	gsl_vector_free (x);
-	gsl_vector_free (x_miss);
-	gsl_vector_free (Utx);
-	gsl_matrix_free (Uab);
-	gsl_vector_free (ab);
-
-	gsl_matrix_free (UtW_expand);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+                           const gsl_matrix *UtW, const gsl_vector *Uty,
+                           const gsl_matrix *W, const gsl_vector *y,
+                           const gsl_vector *env) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+
+  string line;
+  char *ch_ptr;
+
+  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+  double p_lrt = 0, p_score = 0;
+  double logl_H1 = 0.0, logl_H0 = 0.0;
+  int n_miss, c_phen;
+  double geno, x_mean;
+
+  // Calculate basic quantities.
+  size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
+
+  gsl_vector *x = gsl_vector_alloc(U->size1);
+  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
+
+  gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+  gsl_matrix_view UtW_expand_mat =
+      gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+  gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
+  gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2);
+  gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
+  gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1);
+
+  // Start reading genotypes and analyze.
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+      geno = gsl_vector_get(x, i);
+      if (x_mean > 1) {
+        gsl_vector_set(x, i, 2 - geno);
+      }
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+    gsl_vector_mul(x, env);
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    gsl_matrix_set_zero(Uab);
+    CalcUab(UtW_expand, Uty, Uab);
+
+    if (a_mode == 2 || a_mode == 4) {
+      FUNC_PARAM param0 = {true, ni_test, n_cvt + 2, eval, Uab, ab, 0};
+      CalcLambda('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+    }
+
+    CalcUab(UtW_expand, Uty, Utx, Uab);
+
+    time_start = clock();
+    FUNC_PARAM param1 = {false, ni_test, n_cvt + 2, eval, Uab, ab, 0};
+
+    // 3 is before 1.
+    if (a_mode == 3 || a_mode == 4) {
+      CalcRLScore(l_mle_null, param1, beta, se, p_score);
+    }
+
+    if (a_mode == 1 || a_mode == 4) {
+      CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+      CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+    }
+
+    if (a_mode == 2 || a_mode == 4) {
+      CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+      p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
+    }
+
+    if (x_mean > 1) {
+      beta *= -1;
+    }
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+  gsl_vector_free(Utx);
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
+
+  gsl_matrix_free(UtW_expand);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
-			   const gsl_matrix *UtW, const gsl_vector *Uty,
-			   const gsl_matrix *W, const gsl_vector *y,
-			   const gsl_vector *env) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
-	clock_t time_start=clock();
-
-	char ch[1];
-	bitset<8> b;
-
-	double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
-	double p_lrt=0, p_score=0;
-	double logl_H1=0.0, logl_H0=0.0;
-	int n_bit, n_miss, ci_total, ci_test;
-	double geno, x_mean;
-
-	// Calculate basic quantities.
-	size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
-
-	gsl_vector *x=gsl_vector_alloc (U->size1);
-	gsl_vector *Utx=gsl_vector_alloc (U->size2);
-	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
-	gsl_vector *ab=gsl_vector_alloc (n_index);
-
-	gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
-	gsl_matrix_view UtW_expand_mat=
-	  gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
-	gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
-	gsl_vector_view UtW_expand_env=
-	  gsl_matrix_column(UtW_expand, UtW->size2);
-	gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
-	gsl_vector_view UtW_expand_x=
-	  gsl_matrix_column(UtW_expand, UtW->size2+1);
-
-	// Calculate n_bit and c, the number of bit for each SNP.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-	  if (t%d_pace==0 || t==snpInfo.size()-1) {
-	    ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-
-	        // n_bit, and 3 is the number of magic numbers
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0;
-		for (int i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {
-			  if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
-			    break;
-			  }
-			  if (indicator_idv[ci_total]==0) {
-			    ci_total++;
-			    continue;
-			  }
-
-			  if (b[2*j]==0) {
-			    if (b[2*j+1]==0) {
-			      gsl_vector_set(x, ci_test, 2);
-			      x_mean+=2.0;
-			    }
-			    else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
-			  }
-			  else {
-			    if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
-			    else {gsl_vector_set(x, ci_test, -9); n_miss++; }
-			  }
-
-			  ci_total++;
-			  ci_test++;
-			}
-		}
-
-		x_mean/=(double)(ni_test-n_miss);
-
-		for (size_t i=0; i<ni_test; ++i) {
-			geno=gsl_vector_get(x,i);
-			if (geno==-9) {
-			  gsl_vector_set(x, i, x_mean);
-			  geno=x_mean;
-			}
-			if (x_mean>1) {
-			  gsl_vector_set(x, i, 2-geno);
-			}
-		}
-
-		// Calculate statistics.
-		time_start=clock();
-		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0,
-				&UtW_expand_x.vector);
-		gsl_vector_mul (x, env);
-		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
-		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		gsl_matrix_set_zero (Uab);
-		CalcUab (UtW_expand, Uty, Uab);
-
-		if (a_mode==2 || a_mode==4) {
-		  FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
-		  CalcLambda ('L', param0, l_min, l_max, n_region,
-			      lambda_mle, logl_H0);
-		}
-
-		CalcUab(UtW_expand, Uty, Utx, Uab);
-
-		time_start=clock();
-		FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
-
-		// 3 is before 1, for beta.
-		if (a_mode==3 || a_mode==4) {
-			CalcRLScore (l_mle_null, param1, beta, se, p_score);
-		}
-
-		if (a_mode==1 || a_mode==4) {
-		  CalcLambda ('R', param1, l_min, l_max, n_region,
-			      lambda_remle, logl_H1);
-		  CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		}
-
-		if (a_mode==2 || a_mode==4) {
-		  CalcLambda ('L', param1, l_min, l_max, n_region,
-			      lambda_mle, logl_H1);
-		  p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
-		}
-
-		if (x_mean>1) {beta*=-1;}
-
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		// Store summary data.
-		SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald,
-			      p_lrt, p_score};
-		sumStat.push_back(SNPs);
-    }
-	cout<<endl;
-
-	gsl_vector_free (x);
-	gsl_vector_free (Utx);
-	gsl_matrix_free (Uab);
-	gsl_vector_free (ab);
-
-	gsl_matrix_free (UtW_expand);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+                          const gsl_matrix *UtW, const gsl_vector *Uty,
+                          const gsl_matrix *W, const gsl_vector *y,
+                          const gsl_vector *env) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+
+  char ch[1];
+  bitset<8> b;
+
+  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+  double p_lrt = 0, p_score = 0;
+  double logl_H1 = 0.0, logl_H0 = 0.0;
+  int n_bit, n_miss, ci_total, ci_test;
+  double geno, x_mean;
+
+  // Calculate basic quantities.
+  size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
+
+  gsl_vector *x = gsl_vector_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_alloc(n_index);
+
+  gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+  gsl_matrix_view UtW_expand_mat =
+      gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+  gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
+  gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2);
+  gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
+  gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1);
+
+  // Calculate n_bit and c, the number of bit for each SNP.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(x, ci_test, 2);
+            x_mean += 2.0;
+          } else {
+            gsl_vector_set(x, ci_test, 1);
+            x_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(x, ci_test, 0);
+          } else {
+            gsl_vector_set(x, ci_test, -9);
+            n_miss++;
+          }
+        }
+
+        ci_total++;
+        ci_test++;
+      }
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+      if (x_mean > 1) {
+        gsl_vector_set(x, i, 2 - geno);
+      }
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+    gsl_vector_mul(x, env);
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    gsl_matrix_set_zero(Uab);
+    CalcUab(UtW_expand, Uty, Uab);
+
+    if (a_mode == 2 || a_mode == 4) {
+      FUNC_PARAM param0 = {true, ni_test, n_cvt + 2, eval, Uab, ab, 0};
+      CalcLambda('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+    }
+
+    CalcUab(UtW_expand, Uty, Utx, Uab);
+
+    time_start = clock();
+    FUNC_PARAM param1 = {false, ni_test, n_cvt + 2, eval, Uab, ab, 0};
+
+    // 3 is before 1, for beta.
+    if (a_mode == 3 || a_mode == 4) {
+      CalcRLScore(l_mle_null, param1, beta, se, p_score);
+    }
+
+    if (a_mode == 1 || a_mode == 4) {
+      CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+      CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+    }
+
+    if (a_mode == 2 || a_mode == 4) {
+      CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+      p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
+    }
+
+    if (x_mean > 1) {
+      beta *= -1;
+    }
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(Utx);
+  gsl_matrix_free(Uab);
+  gsl_vector_free(ab);
+
+  gsl_matrix_free(UtW_expand);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
diff --git a/src/lmm.h b/src/lmm.h
index 9c3de9d..c393daf 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -19,120 +19,117 @@
 #ifndef __LMM_H__
 #define __LMM_H__
 
-#include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
 #include "io.h"
+#include "param.h"
 
 using namespace std;
 
 class FUNC_PARAM {
 
 public:
-	bool calc_null;
-	size_t ni_test;
-	size_t n_cvt;
-	const gsl_vector *eval;
-	const gsl_matrix *Uab;
-	const gsl_vector *ab;
-	size_t e_mode;
+  bool calc_null;
+  size_t ni_test;
+  size_t n_cvt;
+  const gsl_vector *eval;
+  const gsl_matrix *Uab;
+  const gsl_vector *ab;
+  size_t e_mode;
 };
 
 class LMM {
 
 public:
-	// IO-related parameters
-	int a_mode;	// Analysis mode: 1/2/3/4 for Frequentist tests.
-	size_t d_pace;	// Display pace.
-
-	string file_bfile;
-	string file_geno;
-	string file_out;
-	string path_out;
-
-	string file_gene;
-	// WJA added
-	string file_oxford;
-
-	// LMM related parameters
-	double l_min;
-	double l_max;
-	size_t n_region;
-	double l_mle_null;
-	double logl_mle_H0;
-
-	// Summary statistics
-	size_t ni_total, ni_test; // Number of individuals.
-	size_t ns_total, ns_test; // Number of SNPs.
-	size_t ng_total, ng_test; // Number of genes.
-	size_t n_cvt;
-	double time_UtX;	  // Time spent on optimization iterations.
-	double time_opt;	  // Time spent on optimization iterations.
-
-        // Indicator for individuals (phenotypes): 0 missing, 1
-        // available for analysis.
-	vector<int> indicator_idv;
-
-        // Sequence indicator for SNPs: 0 ignored because of (a) maf,
-        // (b) miss, (c) non-poly; 1 available for analysis.
-	vector<int> indicator_snp;
-
-	vector<SNPINFO> snpInfo;  // Record SNP information.
-
-	// Not included in PARAM.
-	vector<SUMSTAT> sumStat;  // Output SNPSummary Data.
-
-	// Main functions.
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
-	void AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval,
-			  const gsl_matrix *UtW, const gsl_vector *Utx,
-			  const gsl_matrix *W, const gsl_vector *x);
-	void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
-			   const gsl_matrix *UtW, const gsl_vector *Uty,
-			   const gsl_matrix *W, const gsl_vector *y);
-	// WJA added.
-	void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
-			  const gsl_matrix *UtW, const gsl_vector *Uty,
-			  const gsl_matrix *W, const gsl_vector *y);
-	void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
-			    const gsl_matrix *UtW, const gsl_vector *Uty,
-			    const gsl_matrix *W, const gsl_vector *y);
-	void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
-			      const gsl_matrix *UtW, const gsl_vector *Uty,
-			      const gsl_matrix *W, const gsl_vector *y,
-			      const gsl_vector *env);
-	void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
-			       const gsl_matrix *UtW, const gsl_vector *Uty,
-			       const gsl_matrix *W, const gsl_vector *y,
-			       const gsl_vector *env);
-	void WriteFiles ();
-
-	void CalcRLWald (const double &lambda, const FUNC_PARAM &params,
-			 double &beta, double &se, double &p_wald);
-	void CalcRLScore (const double &l, const FUNC_PARAM &params,
-			  double &beta, double &se, double &p_score);
+  // IO-related parameters
+  int a_mode;    // Analysis mode: 1/2/3/4 for Frequentist tests.
+  size_t d_pace; // Display pace.
+
+  string file_bfile;
+  string file_geno;
+  string file_out;
+  string path_out;
+
+  string file_gene;
+  // WJA added
+  string file_oxford;
+
+  // LMM related parameters
+  double l_min;
+  double l_max;
+  size_t n_region;
+  double l_mle_null;
+  double logl_mle_H0;
+
+  // Summary statistics
+  size_t ni_total, ni_test; // Number of individuals.
+  size_t ns_total, ns_test; // Number of SNPs.
+  size_t ng_total, ng_test; // Number of genes.
+  size_t n_cvt;
+  double time_UtX; // Time spent on optimization iterations.
+  double time_opt; // Time spent on optimization iterations.
+
+  // Indicator for individuals (phenotypes): 0 missing, 1
+  // available for analysis.
+  vector<int> indicator_idv;
+
+  // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+  // (b) miss, (c) non-poly; 1 available for analysis.
+  vector<int> indicator_snp;
+
+  vector<SNPINFO> snpInfo; // Record SNP information.
+
+  // Not included in PARAM.
+  vector<SUMSTAT> sumStat; // Output SNPSummary Data.
+
+  // Main functions.
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
+  void AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
+                   const gsl_matrix *UtW, const gsl_vector *Utx,
+                   const gsl_matrix *W, const gsl_vector *x);
+  void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+                    const gsl_matrix *UtW, const gsl_vector *Uty,
+                    const gsl_matrix *W, const gsl_vector *y);
+  // WJA added.
+  void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+                   const gsl_matrix *UtW, const gsl_vector *Uty,
+                   const gsl_matrix *W, const gsl_vector *y);
+  void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+                     const gsl_matrix *UtW, const gsl_vector *Uty,
+                     const gsl_matrix *W, const gsl_vector *y);
+  void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+                       const gsl_matrix *UtW, const gsl_vector *Uty,
+                       const gsl_matrix *W, const gsl_vector *y,
+                       const gsl_vector *env);
+  void AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+                        const gsl_matrix *UtW, const gsl_vector *Uty,
+                        const gsl_matrix *W, const gsl_vector *y,
+                        const gsl_vector *env);
+  void WriteFiles();
+
+  void CalcRLWald(const double &lambda, const FUNC_PARAM &params, double &beta,
+                  double &se, double &p_wald);
+  void CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
+                   double &se, double &p_score);
 };
 
-void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX,
-		   const gsl_vector *Uty, const gsl_vector *K_eval,
-		   const double l_min, const double l_max,
-		   const size_t n_region,
-		   vector<pair<size_t, double> > &pos_loglr);
-void CalcLambda (const char func_name, FUNC_PARAM &params,
-		 const double l_min, const double l_max,
-		 const size_t n_region, double &lambda, double &logf);
-void CalcLambda (const char func_name, const gsl_vector *eval,
-		 const gsl_matrix *UtW, const gsl_vector *Uty,
-		 const double l_min, const double l_max,
-		 const size_t n_region, double &lambda, double &logl_H0);
-void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW,
-	      const gsl_vector *Uty, const double lambda,
-	      const double trace_G, double &pve, double &pve_se);
-void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
-		      const gsl_vector *Uty, const double lambda, double &vg,
-		      double &ve, gsl_vector *beta, gsl_vector *se_beta);
+void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
+                  const gsl_vector *Uty, const gsl_vector *K_eval,
+                  const double l_min, const double l_max, const size_t n_region,
+                  vector<pair<size_t, double>> &pos_loglr);
+void CalcLambda(const char func_name, FUNC_PARAM &params, const double l_min,
+                const double l_max, const size_t n_region, double &lambda,
+                double &logf);
+void CalcLambda(const char func_name, const gsl_vector *eval,
+                const gsl_matrix *UtW, const gsl_vector *Uty,
+                const double l_min, const double l_max, const size_t n_region,
+                double &lambda, double &logl_H0);
+void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
+             const gsl_vector *Uty, const double lambda, const double trace_G,
+             double &pve, double &pve_se);
+void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+                     const gsl_vector *Uty, const double lambda, double &vg,
+                     double &ve, gsl_vector *beta, gsl_vector *se_beta);
 
 #endif
-
-
diff --git a/src/logistic.cpp b/src/logistic.cpp
index f9edc68..2308de7 100644
--- a/src/logistic.cpp
+++ b/src/logistic.cpp
@@ -1,15 +1,15 @@
-#include <stdio.h>
-#include <math.h>
+#include "logistic.h"
+#include <gsl/gsl_linalg.h>
 #include <gsl/gsl_matrix.h>
-#include <gsl/gsl_rng.h>
 #include <gsl/gsl_multimin.h>
+#include <gsl/gsl_rng.h>
 #include <gsl/gsl_sf.h>
-#include <gsl/gsl_linalg.h>
-#include "logistic.h"
+#include <math.h>
+#include <stdio.h>
 
 // I need to bundle all the data that goes to the function to optimze
 // together.
-typedef struct{
+typedef struct {
   gsl_matrix_int *X;
   gsl_vector_int *nlev;
   gsl_vector *y;
@@ -18,13 +18,9 @@ typedef struct{
   double lambdaL2;
 } fix_parm_mixed_T;
 
-double fLogit_mixed(gsl_vector *beta,
-		    gsl_matrix_int *X,
-		    gsl_vector_int *nlev,
-		    gsl_matrix *Xc,
-		    gsl_vector *y,
-		    double lambdaL1,
-		    double lambdaL2) {
+double fLogit_mixed(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+                    gsl_matrix *Xc, gsl_vector *y, double lambdaL1,
+                    double lambdaL2) {
   int n = y->size;
   int npar = beta->size;
   double total = 0;
@@ -33,57 +29,56 @@ double fLogit_mixed(gsl_vector *beta,
   // Changed loop start at 1 instead of 0 to avoid regularization of
   // beta_0*\/
   // #pragma omp parallel for reduction (+:total)
-  for(int i = 1; i < npar; ++i)
-    total += beta->data[i]*beta->data[i];
-  total = (-total*lambdaL2/2);
+  for (int i = 1; i < npar; ++i)
+    total += beta->data[i] * beta->data[i];
+  total = (-total * lambdaL2 / 2);
   // #pragma omp parallel for reduction (+:aux)
-  for(int i = 1; i < npar; ++i)
-    aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
-  total = total-aux*lambdaL1;
+  for (int i = 1; i < npar; ++i)
+    aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]);
+  total = total - aux * lambdaL1;
   // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
   // #reduction (+:total)
-  for(int i = 0; i < n; ++i) {
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < X->size2; ++k) {
-      if(gsl_matrix_int_get(X,i,k)>0)
-	Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
-      iParm+=nlev->data[k]-1;
+  for (int i = 0; i < n; ++i) {
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < X->size2; ++k) {
+      if (gsl_matrix_int_get(X, i, k) > 0)
+        Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
+      iParm += nlev->data[k] - 1;
     }
-    for(int k = 0; k < (Xc->size2); ++k)
-      Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
-    total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+    for (int k = 0; k < (Xc->size2); ++k)
+      Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
+    total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
   }
   return -total;
 }
 
 void logistic_mixed_pred(gsl_vector *beta,     // Vector of parameters
-					       // length = 1 + Sum_k(C_k -1)
-			 gsl_matrix_int *X,    // Matrix Nobs x K
-			 gsl_vector_int *nlev, // Vector with number categories
-			 gsl_matrix *Xc,       // Continuous covariates matrix:
-			                       // obs x Kc (NULL if not used).
-			 gsl_vector *yhat){    // Vector of prob. predicted by
-					       // the logistic
-  for(int i = 0; i < X->size1; ++i) {
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < X->size2; ++k) {
-      if(gsl_matrix_int_get(X,i,k)>0)
-	Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
-      iParm+=nlev->data[k]-1;
+                                               // length = 1 + Sum_k(C_k -1)
+                         gsl_matrix_int *X,    // Matrix Nobs x K
+                         gsl_vector_int *nlev, // Vector with number categories
+                         gsl_matrix *Xc,       // Continuous covariates matrix:
+                                               // obs x Kc (NULL if not used).
+                         gsl_vector *yhat) {   // Vector of prob. predicted by
+                                               // the logistic
+  for (int i = 0; i < X->size1; ++i) {
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < X->size2; ++k) {
+      if (gsl_matrix_int_get(X, i, k) > 0)
+        Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
+      iParm += nlev->data[k] - 1;
     }
     // Adding the continuous.
-    for(int k = 0; k < (Xc->size2); ++k)
-      Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
-    yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+    for (int k = 0; k < (Xc->size2); ++k)
+      Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
+    yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai));
   }
 }
 
-
 // The gradient of f, df = (df/dx, df/dy).
-void wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
-			  gsl_vector *out) {
+void wgsl_mixed_optim_df(const gsl_vector *beta, void *params,
+                         gsl_vector *out) {
   fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
   int n = p->y->size;
   int K = p->X->size2;
@@ -91,50 +86,49 @@ void wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
   int npar = beta->size;
 
   // Intitialize gradient out necessary?
-  for(int i = 0; i < npar; ++i)
-    out->data[i]= 0;
+  for (int i = 0; i < npar; ++i)
+    out->data[i] = 0;
 
   // Changed loop start at 1 instead of 0 to avoid regularization of beta 0.
-  for(int i = 1; i < npar; ++i)
-    out->data[i]= p->lambdaL2*beta->data[i];
-  for(int i = 1; i < npar; ++i)
-    out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
-
-  for(int i = 0; i < n; ++i) {
-    double pn=0;
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < K; ++k) {
-      if(gsl_matrix_int_get(p->X,i,k)>0)
-	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
-      iParm+=p->nlev->data[k]-1;
+  for (int i = 1; i < npar; ++i)
+    out->data[i] = p->lambdaL2 * beta->data[i];
+  for (int i = 1; i < npar; ++i)
+    out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0));
+
+  for (int i = 0; i < n; ++i) {
+    double pn = 0;
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < K; ++k) {
+      if (gsl_matrix_int_get(p->X, i, k) > 0)
+        Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm];
+      iParm += p->nlev->data[k] - 1;
     }
 
     // Adding the continuous.
-    for(int k = 0; k < Kc; ++k)
-      Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
+    for (int k = 0; k < Kc; ++k)
+      Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm++];
 
-    pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+    pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai)));
 
-    out->data[0]+= pn;
-    iParm=1;
-    for(int k = 0; k < K; ++k) {
-      if(gsl_matrix_int_get(p->X,i,k)>0)
-	out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
-      iParm+=p->nlev->data[k]-1;
+    out->data[0] += pn;
+    iParm = 1;
+    for (int k = 0; k < K; ++k) {
+      if (gsl_matrix_int_get(p->X, i, k) > 0)
+        out->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm] += pn;
+      iParm += p->nlev->data[k] - 1;
     }
 
     // Adding the continuous.
-    for(int k = 0; k < Kc; ++k) {
-      out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
+    for (int k = 0; k < Kc; ++k) {
+      out->data[iParm++] += gsl_matrix_get(p->Xc, i, k) * pn;
     }
   }
-
 }
 
 // The Hessian of f.
-void  wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params,
-				gsl_matrix *out) {
+void wgsl_mixed_optim_hessian(const gsl_vector *beta, void *params,
+                              gsl_matrix *out) {
   fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
   int n = p->y->size;
   int K = p->X->size2;
@@ -146,120 +140,121 @@ void  wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params,
   gsl_matrix_set_zero(out);
 
   /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/
-  for(int i = 1; i < npar; ++i)
-    gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double-check this.
+  for (int i = 1; i < npar; ++i)
+    gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this.
 
   // L1 penalty not working yet, as not differentiable, I may need to
   // do coordinate descent (as in glm_net)
-  for(int i = 0; i < n; ++i) {
-    double pn=0;
-    double aux=0;
-    double Xbetai=beta->data[0];
-    int iParm1=1;
-    for(int k = 0; k < K; ++k) {
-      if(gsl_matrix_int_get(p->X,i,k)>0)
-	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1];
-      iParm1+=p->nlev->data[k]-1;  //-1?
+  for (int i = 0; i < n; ++i) {
+    double pn = 0;
+    double aux = 0;
+    double Xbetai = beta->data[0];
+    int iParm1 = 1;
+    for (int k = 0; k < K; ++k) {
+      if (gsl_matrix_int_get(p->X, i, k) > 0)
+        Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1];
+      iParm1 += p->nlev->data[k] - 1; //-1?
     }
 
     // Adding the continuous.
-    for(int k = 0; k < Kc; ++k)
-      Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++];
+    for (int k = 0; k < Kc; ++k)
+      Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm1++];
 
-    pn= 1/(1 + gsl_sf_exp(-Xbetai));
+    pn = 1 / (1 + gsl_sf_exp(-Xbetai));
 
     // Add a protection for pn very close to 0 or 1?
-    aux=pn*(1-pn);
+    aux = pn * (1 - pn);
 
     // Calculate sub-gradient vector gn.
     gsl_vector_set_zero(gn);
-    gn->data[0]= 1;
-    iParm1=1;
-    for(int k = 0; k < K; ++k) {
-      if(gsl_matrix_int_get(p->X,i,k)>0)
-	gn->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1]=1;
-      iParm1+=p->nlev->data[k]-1;
+    gn->data[0] = 1;
+    iParm1 = 1;
+    for (int k = 0; k < K; ++k) {
+      if (gsl_matrix_int_get(p->X, i, k) > 0)
+        gn->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1] = 1;
+      iParm1 += p->nlev->data[k] - 1;
     }
 
     // Adding the continuous.
-    for(int k = 0; k < Kc; ++k) {
-      gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k);
+    for (int k = 0; k < Kc; ++k) {
+      gn->data[iParm1++] = gsl_matrix_get(p->Xc, i, k);
     }
 
-    for(int k1=0;k1<npar; ++k1)
-      if(gn->data[k1]!=0)
-	for(int k2=0;k2<npar; ++k2)
-	  if(gn->data[k2]!=0)
-	    *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]);
+    for (int k1 = 0; k1 < npar; ++k1)
+      if (gn->data[k1] != 0)
+        for (int k2 = 0; k2 < npar; ++k2)
+          if (gn->data[k2] != 0)
+            *gsl_matrix_ptr(out, k1, k2) += (aux * gn->data[k1] * gn->data[k2]);
   }
   gsl_vector_free(gn);
 }
 
 double wgsl_mixed_optim_f(gsl_vector *v, void *params) {
-  double mLogLik=0;
+  double mLogLik = 0;
   fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
-  mLogLik = fLogit_mixed(v,p->X,p->nlev,p->Xc,p->y,p->lambdaL1,p->lambdaL2);
+  mLogLik =
+      fLogit_mixed(v, p->X, p->nlev, p->Xc, p->y, p->lambdaL1, p->lambdaL2);
   return mLogLik;
 }
 
 // Compute both f and df together.
-void
-wgsl_mixed_optim_fdf (gsl_vector *x, void *params, double *f, gsl_vector *df) {
+void wgsl_mixed_optim_fdf(gsl_vector *x, void *params, double *f,
+                          gsl_vector *df) {
   *f = wgsl_mixed_optim_f(x, params);
   wgsl_mixed_optim_df(x, params, df);
 }
 
 // Xc is the matrix of continuous covariates, Nobs x Kc (NULL if not used).
 int logistic_mixed_fit(gsl_vector *beta, gsl_matrix_int *X,
-		       gsl_vector_int *nlev, gsl_matrix *Xc,
-		       gsl_vector *y, double lambdaL1, double lambdaL2) {
-  double mLogLik=0;
+                       gsl_vector_int *nlev, gsl_matrix *Xc, gsl_vector *y,
+                       double lambdaL1, double lambdaL2) {
+  double mLogLik = 0;
   fix_parm_mixed_T p;
   int npar = beta->size;
-  int iter=0;
-  double maxchange=0;
+  int iter = 0;
+  double maxchange = 0;
 
   // Intializing fix parameters.
-  p.X=X;
-  p.Xc=Xc;
-  p.nlev=nlev;
-  p.y=y;
-  p.lambdaL1=lambdaL1;
-  p.lambdaL2=lambdaL2;
+  p.X = X;
+  p.Xc = Xc;
+  p.nlev = nlev;
+  p.y = y;
+  p.lambdaL1 = lambdaL1;
+  p.lambdaL2 = lambdaL2;
 
   // Initial fit.
-  mLogLik = wgsl_mixed_optim_f(beta,&p);
+  mLogLik = wgsl_mixed_optim_f(beta, &p);
 
-  gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix.
-  gsl_vector *stBeta = gsl_vector_alloc(npar);   // Direction to move.
+  gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
+  gsl_vector *stBeta = gsl_vector_alloc(npar);    // Direction to move.
 
-  gsl_vector *myG = gsl_vector_alloc(npar);      // Gradient.
-  gsl_vector *tau = gsl_vector_alloc(npar);      // tau for QR.
+  gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
+  gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
 
-  for(iter=0;iter<100;iter++){
-    wgsl_mixed_optim_hessian(beta,&p,myH);       // Calculate Hessian.
-    wgsl_mixed_optim_df(beta,&p,myG);            // Calculate Gradient.
-    gsl_linalg_QR_decomp(myH,tau);               // Calculate next beta.
-    gsl_linalg_QR_solve(myH,tau,myG,stBeta);
-    gsl_vector_sub(beta,stBeta);
+  for (iter = 0; iter < 100; iter++) {
+    wgsl_mixed_optim_hessian(beta, &p, myH); // Calculate Hessian.
+    wgsl_mixed_optim_df(beta, &p, myG);      // Calculate Gradient.
+    gsl_linalg_QR_decomp(myH, tau);          // Calculate next beta.
+    gsl_linalg_QR_solve(myH, tau, myG, stBeta);
+    gsl_vector_sub(beta, stBeta);
 
     // Monitor convergence.
-    maxchange=0;
-    for(int i=0;i<npar; i++)
-      if(maxchange<fabs(stBeta->data[i]))
-	maxchange=fabs(stBeta->data[i]);
+    maxchange = 0;
+    for (int i = 0; i < npar; i++)
+      if (maxchange < fabs(stBeta->data[i]))
+        maxchange = fabs(stBeta->data[i]);
 
-    if(maxchange<1E-4)
+    if (maxchange < 1E-4)
       break;
   }
 
   // Final fit.
-  mLogLik = wgsl_mixed_optim_f(beta,&p);
+  mLogLik = wgsl_mixed_optim_f(beta, &p);
 
-  gsl_vector_free (tau);
-  gsl_vector_free (stBeta);
-  gsl_vector_free (myG);
-  gsl_matrix_free (myH);
+  gsl_vector_free(tau);
+  gsl_vector_free(stBeta);
+  gsl_vector_free(myG);
+  gsl_matrix_free(myH);
 
   return 0;
 }
@@ -278,8 +273,8 @@ typedef struct {
   double lambdaL2;
 } fix_parm_cat_T;
 
-double fLogit_cat (gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
-		   gsl_vector *y, double lambdaL1, double lambdaL2) {
+double fLogit_cat(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+                  gsl_vector *y, double lambdaL1, double lambdaL2) {
   int n = y->size;
   int npar = beta->size;
   double total = 0;
@@ -288,91 +283,90 @@ double fLogit_cat (gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
   // omp_set_num_threads(ompthr); /\* Changed loop start at 1 instead
   // of 0 to avoid regularization of beta 0*\/ /\*#pragma omp parallel
   // for reduction (+:total)*\/
-  for(int i = 1; i < npar; ++i)
-    total += beta->data[i]*beta->data[i];
-  total = (-total*lambdaL2/2);
+  for (int i = 1; i < npar; ++i)
+    total += beta->data[i] * beta->data[i];
+  total = (-total * lambdaL2 / 2);
 
   // /\*#pragma omp parallel for reduction (+:aux)*\/
-  for(int i = 1; i < npar; ++i)
-    aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
-  total = total-aux*lambdaL1;
+  for (int i = 1; i < npar; ++i)
+    aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]);
+  total = total - aux * lambdaL1;
 
   // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
   // #reduction (+:total)
-  for(int i = 0; i < n; ++i) {
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < X->size2; ++k) {
-      if(gsl_matrix_int_get(X,i,k)>0)
-	Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
-      iParm+=nlev->data[k]-1;
+  for (int i = 0; i < n; ++i) {
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < X->size2; ++k) {
+      if (gsl_matrix_int_get(X, i, k) > 0)
+        Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
+      iParm += nlev->data[k] - 1;
     }
-    total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+    total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
   }
   return -total;
 }
 
-void logistic_cat_pred (gsl_vector *beta,     // Vector of parameters
-					      // length = 1 + Sum_k(C_k-1).
-		        gsl_matrix_int *X,    // Matrix Nobs x K
-		        gsl_vector_int *nlev, // Vector with #categories
-		        gsl_vector *yhat){    // Vector of prob. predicted by
-					      // the logistic.
-  for(int i = 0; i < X->size1; ++i) {
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < X->size2; ++k) {
-      if(gsl_matrix_int_get(X,i,k)>0)
-	Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
-      iParm+=nlev->data[k]-1;
+void logistic_cat_pred(gsl_vector *beta,     // Vector of parameters
+                                             // length = 1 + Sum_k(C_k-1).
+                       gsl_matrix_int *X,    // Matrix Nobs x K
+                       gsl_vector_int *nlev, // Vector with #categories
+                       gsl_vector *yhat) {   // Vector of prob. predicted by
+                                             // the logistic.
+  for (int i = 0; i < X->size1; ++i) {
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < X->size2; ++k) {
+      if (gsl_matrix_int_get(X, i, k) > 0)
+        Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
+      iParm += nlev->data[k] - 1;
     }
-    yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+    yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai));
   }
 }
 
 // The gradient of f, df = (df/dx, df/dy).
-void  wgsl_cat_optim_df (const gsl_vector *beta, void *params,
-		   gsl_vector *out) {
+void wgsl_cat_optim_df(const gsl_vector *beta, void *params, gsl_vector *out) {
   fix_parm_cat_T *p = (fix_parm_cat_T *)params;
   int n = p->y->size;
   int K = p->X->size2;
   int npar = beta->size;
 
   // Intitialize gradient out necessary?
-  for(int i = 0; i < npar; ++i)
-    out->data[i]= 0;
+  for (int i = 0; i < npar; ++i)
+    out->data[i] = 0;
 
   // Changed loop start at 1 instead of 0 to avoid regularization of beta 0.
-  for(int i = 1; i < npar; ++i)
-    out->data[i]= p->lambdaL2*beta->data[i];
-  for(int i = 1; i < npar; ++i)
-    out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
-
-  for(int i = 0; i < n; ++i) {
-    double pn=0;
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < K; ++k) {
-      if(gsl_matrix_int_get(p->X,i,k)>0)
-	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
-      iParm+=p->nlev->data[k]-1;
+  for (int i = 1; i < npar; ++i)
+    out->data[i] = p->lambdaL2 * beta->data[i];
+  for (int i = 1; i < npar; ++i)
+    out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0));
+
+  for (int i = 0; i < n; ++i) {
+    double pn = 0;
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < K; ++k) {
+      if (gsl_matrix_int_get(p->X, i, k) > 0)
+        Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm];
+      iParm += p->nlev->data[k] - 1;
     }
 
-    pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+    pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai)));
 
-    out->data[0]+= pn;
-    iParm=1;
-    for(int k = 0; k < K; ++k) {
-      if(gsl_matrix_int_get(p->X,i,k)>0)
-	out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
-      iParm+=p->nlev->data[k]-1;
+    out->data[0] += pn;
+    iParm = 1;
+    for (int k = 0; k < K; ++k) {
+      if (gsl_matrix_int_get(p->X, i, k) > 0)
+        out->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm] += pn;
+      iParm += p->nlev->data[k] - 1;
     }
   }
 }
 
 // The Hessian of f.
-void  wgsl_cat_optim_hessian (const gsl_vector *beta, void *params,
-			      gsl_matrix *out) {
+void wgsl_cat_optim_hessian(const gsl_vector *beta, void *params,
+                            gsl_matrix *out) {
   fix_parm_cat_T *p = (fix_parm_cat_T *)params;
   int n = p->y->size;
   int K = p->X->size2;
@@ -382,123 +376,119 @@ void  wgsl_cat_optim_hessian (const gsl_vector *beta, void *params,
   gsl_matrix_set_zero(out);
 
   // Changed loop start at 1 instead of 0 to avoid regularization of beta.
-  for(int i = 1; i < npar; ++i)
-    gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double-check this.
+  for (int i = 1; i < npar; ++i)
+    gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this.
 
   // L1 penalty not working yet, as not differentiable, I may need to
   // do coordinate descent (as in glm_net).
-  for(int i = 0; i < n; ++i) {
-    double pn=0;
-    double aux=0;
-    double Xbetai=beta->data[0];
-    int iParm2=1;
-    int iParm1=1;
-    for(int k = 0; k < K; ++k) {
-      if(gsl_matrix_int_get(p->X,i,k)>0)
-	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1];
-      iParm1+=p->nlev->data[k]-1;  //-1?
+  for (int i = 0; i < n; ++i) {
+    double pn = 0;
+    double aux = 0;
+    double Xbetai = beta->data[0];
+    int iParm2 = 1;
+    int iParm1 = 1;
+    for (int k = 0; k < K; ++k) {
+      if (gsl_matrix_int_get(p->X, i, k) > 0)
+        Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1];
+      iParm1 += p->nlev->data[k] - 1; //-1?
     }
 
-    pn= 1/(1 + gsl_sf_exp(-Xbetai));
+    pn = 1 / (1 + gsl_sf_exp(-Xbetai));
 
     // Add a protection for pn very close to 0 or 1?
-    aux=pn*(1-pn);
-    *gsl_matrix_ptr(out,0,0)+=aux;
-    iParm2=1;
-    for(int k2 = 0; k2 < K; ++k2) {
-      if(gsl_matrix_int_get(p->X,i,k2)>0)
-	*gsl_matrix_ptr(out,0,gsl_matrix_int_get(p->X,i,k2)-1+iParm2)+=aux;
-      iParm2+=p->nlev->data[k2]-1;   //-1?
+    aux = pn * (1 - pn);
+    *gsl_matrix_ptr(out, 0, 0) += aux;
+    iParm2 = 1;
+    for (int k2 = 0; k2 < K; ++k2) {
+      if (gsl_matrix_int_get(p->X, i, k2) > 0)
+        *gsl_matrix_ptr(out, 0, gsl_matrix_int_get(p->X, i, k2) - 1 + iParm2) +=
+            aux;
+      iParm2 += p->nlev->data[k2] - 1; //-1?
     }
-    iParm1=1;
-    for(int k1 = 0; k1 < K; ++k1) {
-      if(gsl_matrix_int_get(p->X,i,k1)>0)
-	*gsl_matrix_ptr(out,gsl_matrix_int_get(p->X,i,k1)-1+iParm1,0)+=aux;
-      iParm2=1;
-      for(int k2 = 0; k2 < K; ++k2) {
-	if((gsl_matrix_int_get(p->X,i,k1)>0) &&
-	   (gsl_matrix_int_get(p->X,i,k2)>0))
-	  *gsl_matrix_ptr(out
-			  ,gsl_matrix_int_get(p->X,i,k1)-1+iParm1
-			  ,gsl_matrix_int_get(p->X,i,k2)-1+iParm2
-			  )+=aux;
-	iParm2+=p->nlev->data[k2]-1;  //-1?
+    iParm1 = 1;
+    for (int k1 = 0; k1 < K; ++k1) {
+      if (gsl_matrix_int_get(p->X, i, k1) > 0)
+        *gsl_matrix_ptr(out, gsl_matrix_int_get(p->X, i, k1) - 1 + iParm1, 0) +=
+            aux;
+      iParm2 = 1;
+      for (int k2 = 0; k2 < K; ++k2) {
+        if ((gsl_matrix_int_get(p->X, i, k1) > 0) &&
+            (gsl_matrix_int_get(p->X, i, k2) > 0))
+          *gsl_matrix_ptr(out, gsl_matrix_int_get(p->X, i, k1) - 1 + iParm1,
+                          gsl_matrix_int_get(p->X, i, k2) - 1 + iParm2) += aux;
+        iParm2 += p->nlev->data[k2] - 1; //-1?
       }
-      iParm1+=p->nlev->data[k1]-1; //-1?
+      iParm1 += p->nlev->data[k1] - 1; //-1?
     }
   }
 }
 
 double wgsl_cat_optim_f(gsl_vector *v, void *params) {
-  double mLogLik=0;
+  double mLogLik = 0;
   fix_parm_cat_T *p = (fix_parm_cat_T *)params;
-  mLogLik = fLogit_cat(v,p->X,p->nlev,p->y,p->lambdaL1,p->lambdaL2);
+  mLogLik = fLogit_cat(v, p->X, p->nlev, p->y, p->lambdaL1, p->lambdaL2);
   return mLogLik;
 }
 
 // Compute both f and df together.
-void wgsl_cat_optim_fdf (gsl_vector *x, void *params, double *f,
-			 gsl_vector *df) {
+void wgsl_cat_optim_fdf(gsl_vector *x, void *params, double *f,
+                        gsl_vector *df) {
   *f = wgsl_cat_optim_f(x, params);
   wgsl_cat_optim_df(x, params, df);
 }
 
-int logistic_cat_fit(gsl_vector *beta,
-		     gsl_matrix_int *X,
-		     gsl_vector_int *nlev,
-		     gsl_vector *y,
-		     double lambdaL1,
-		     double lambdaL2) {
-  double mLogLik=0;
+int logistic_cat_fit(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+                     gsl_vector *y, double lambdaL1, double lambdaL2) {
+  double mLogLik = 0;
   fix_parm_cat_T p;
   int npar = beta->size;
-  int iter=0;
-  double maxchange=0;
+  int iter = 0;
+  double maxchange = 0;
 
   // Intializing fix parameters.
-  p.X=X;
-  p.nlev=nlev;
-  p.y=y;
-  p.lambdaL1=lambdaL1;
-  p.lambdaL2=lambdaL2;
+  p.X = X;
+  p.nlev = nlev;
+  p.y = y;
+  p.lambdaL1 = lambdaL1;
+  p.lambdaL2 = lambdaL2;
 
   // Initial fit.
-  mLogLik = wgsl_cat_optim_f(beta,&p);
+  mLogLik = wgsl_cat_optim_f(beta, &p);
 
-  gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix.
-  gsl_vector *stBeta = gsl_vector_alloc(npar);   // Direction to move.
+  gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
+  gsl_vector *stBeta = gsl_vector_alloc(npar);    // Direction to move.
 
-  gsl_vector *myG = gsl_vector_alloc(npar);      // Gradient.
-  gsl_vector *tau = gsl_vector_alloc(npar);      // tau for QR.
+  gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
+  gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
 
-  for(iter=0;iter<100;iter++){
-    wgsl_cat_optim_hessian(beta,&p,myH); // Calculate Hessian.
-    wgsl_cat_optim_df(beta,&p,myG);      // Calculate Gradient.
-    gsl_linalg_QR_decomp(myH,tau);       // Calculate next beta.
-    gsl_linalg_QR_solve(myH,tau,myG,stBeta);
-    gsl_vector_sub(beta,stBeta);
+  for (iter = 0; iter < 100; iter++) {
+    wgsl_cat_optim_hessian(beta, &p, myH); // Calculate Hessian.
+    wgsl_cat_optim_df(beta, &p, myG);      // Calculate Gradient.
+    gsl_linalg_QR_decomp(myH, tau);        // Calculate next beta.
+    gsl_linalg_QR_solve(myH, tau, myG, stBeta);
+    gsl_vector_sub(beta, stBeta);
 
     // Monitor convergence.
-    maxchange=0;
-    for(int i=0;i<npar; i++)
-      if(maxchange<fabs(stBeta->data[i]))
-	maxchange=fabs(stBeta->data[i]);
+    maxchange = 0;
+    for (int i = 0; i < npar; i++)
+      if (maxchange < fabs(stBeta->data[i]))
+        maxchange = fabs(stBeta->data[i]);
 
 #ifdef _RPR_DEBUG_
-    mLogLik = wgsl_cat_optim_f(beta,&p);
+    mLogLik = wgsl_cat_optim_f(beta, &p);
 #endif
 
-    if(maxchange<1E-4)
+    if (maxchange < 1E-4)
       break;
   }
 
   // Final fit.
-  mLogLik = wgsl_cat_optim_f(beta,&p);
+  mLogLik = wgsl_cat_optim_f(beta, &p);
 
-  gsl_vector_free (tau);
-  gsl_vector_free (stBeta);
-  gsl_vector_free (myG);
-  gsl_matrix_free (myH);
+  gsl_vector_free(tau);
+  gsl_vector_free(stBeta);
+  gsl_vector_free(myG);
+  gsl_matrix_free(myH);
 
   return 0;
 }
@@ -509,15 +499,15 @@ int logistic_cat_fit(gsl_vector *beta,
 
 // I need to bundle all the data that goes to the function to optimze
 // together.
-typedef struct{
-  gsl_matrix *Xc;   // continuous covariates; Matrix Nobs x Kc
+typedef struct {
+  gsl_matrix *Xc; // continuous covariates; Matrix Nobs x Kc
   gsl_vector *y;
   double lambdaL1;
   double lambdaL2;
-}fix_parm_cont_T;
+} fix_parm_cont_T;
 
 double fLogit_cont(gsl_vector *beta, gsl_matrix *Xc, gsl_vector *y,
-		   double lambdaL1, double lambdaL2) {
+                   double lambdaL1, double lambdaL2) {
   int n = y->size;
   int npar = beta->size;
   double total = 0;
@@ -526,82 +516,81 @@ double fLogit_cont(gsl_vector *beta, gsl_matrix *Xc, gsl_vector *y,
   // omp_set_num_threads(ompthr); /\* Changed loop start at 1 instead
   // of 0 to avoid regularization of beta_0*\/ /\*#pragma omp parallel
   // for reduction (+:total)*\/
-  for(int i = 1; i < npar; ++i)
-    total += beta->data[i]*beta->data[i];
-  total = (-total*lambdaL2/2);
+  for (int i = 1; i < npar; ++i)
+    total += beta->data[i] * beta->data[i];
+  total = (-total * lambdaL2 / 2);
 
   // /\*#pragma omp parallel for reduction (+:aux)*\/
-  for(int i = 1; i < npar; ++i)
-    aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
-  total = total-aux*lambdaL1;
+  for (int i = 1; i < npar; ++i)
+    aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]);
+  total = total - aux * lambdaL1;
 
   // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
   // #reduction (+:total)
-  for(int i = 0; i < n; ++i) {
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < (Xc->size2); ++k)
-      Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
-    total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+  for (int i = 0; i < n; ++i) {
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < (Xc->size2); ++k)
+      Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
+    total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
   }
   return -total;
 }
 
-void logistic_cont_pred(gsl_vector *beta,    // Vector of parameters
-					     // length = 1 + Sum_k(C_k-1).
-			gsl_matrix *Xc,      // Continuous covariates matrix,
-			                     // Nobs x Kc (NULL if not used).
-			gsl_vector *yhat) {  // Vector of prob. predicted by
-                                             // the logistic.
-  for(int i = 0; i < Xc->size1; ++i) {
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < (Xc->size2); ++k)
-      Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
-    yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+void logistic_cont_pred(gsl_vector *beta,   // Vector of parameters
+                                            // length = 1 + Sum_k(C_k-1).
+                        gsl_matrix *Xc,     // Continuous covariates matrix,
+                                            // Nobs x Kc (NULL if not used).
+                        gsl_vector *yhat) { // Vector of prob. predicted by
+                                            // the logistic.
+  for (int i = 0; i < Xc->size1; ++i) {
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < (Xc->size2); ++k)
+      Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
+    yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai));
   }
 }
 
 // The gradient of f, df = (df/dx, df/dy).
-void wgsl_cont_optim_df (const gsl_vector *beta, void *params,
-			 gsl_vector *out) {
+void wgsl_cont_optim_df(const gsl_vector *beta, void *params, gsl_vector *out) {
   fix_parm_cont_T *p = (fix_parm_cont_T *)params;
   int n = p->y->size;
   int Kc = p->Xc->size2;
   int npar = beta->size;
 
   // Intitialize gradient out necessary?
-  for(int i = 0; i < npar; ++i)
-    out->data[i]= 0;
+  for (int i = 0; i < npar; ++i)
+    out->data[i] = 0;
 
   // Changed loop start at 1 instead of 0 to avoid regularization of beta 0.
-  for(int i = 1; i < npar; ++i)
-    out->data[i]= p->lambdaL2*beta->data[i];
-  for(int i = 1; i < npar; ++i)
-    out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
+  for (int i = 1; i < npar; ++i)
+    out->data[i] = p->lambdaL2 * beta->data[i];
+  for (int i = 1; i < npar; ++i)
+    out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0));
 
-  for(int i = 0; i < n; ++i) {
-    double pn=0;
-    double Xbetai=beta->data[0];
-    int iParm=1;
-    for(int k = 0; k < Kc; ++k)
-      Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
+  for (int i = 0; i < n; ++i) {
+    double pn = 0;
+    double Xbetai = beta->data[0];
+    int iParm = 1;
+    for (int k = 0; k < Kc; ++k)
+      Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm++];
 
-    pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+    pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai)));
 
-    out->data[0]+= pn;
-    iParm=1;
+    out->data[0] += pn;
+    iParm = 1;
 
     // Adding the continuous.
-    for(int k = 0; k < Kc; ++k) {
-      out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
+    for (int k = 0; k < Kc; ++k) {
+      out->data[iParm++] += gsl_matrix_get(p->Xc, i, k) * pn;
     }
   }
 }
 
 // The Hessian of f.
-void wgsl_cont_optim_hessian (const gsl_vector *beta, void *params,
-			      gsl_matrix *out) {
+void wgsl_cont_optim_hessian(const gsl_vector *beta, void *params,
+                             gsl_matrix *out) {
   fix_parm_cont_T *p = (fix_parm_cont_T *)params;
   int n = p->y->size;
   int Kc = p->Xc->size2;
@@ -614,111 +603,109 @@ void wgsl_cont_optim_hessian (const gsl_vector *beta, void *params,
 
   // Changed loop start at 1 instead of 0 to avoid regularization of
   // beta 0.
-  for(int i = 1; i < npar; ++i)
-    gsl_matrix_set(out,i,i,(p->lambdaL2));  // Double-check this.
+  for (int i = 1; i < npar; ++i)
+    gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this.
 
   // L1 penalty not working yet, as not differentiable, I may need to
   // do coordinate descent (as in glm_net).
-  for(int i = 0; i < n; ++i) {
-    double pn=0;
-    double aux=0;
-    double Xbetai=beta->data[0];
-    int iParm1=1;
-    for(int k = 0; k < Kc; ++k)
-      Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++];
+  for (int i = 0; i < n; ++i) {
+    double pn = 0;
+    double aux = 0;
+    double Xbetai = beta->data[0];
+    int iParm1 = 1;
+    for (int k = 0; k < Kc; ++k)
+      Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm1++];
 
-    pn= 1/(1 + gsl_sf_exp(-Xbetai));
+    pn = 1 / (1 + gsl_sf_exp(-Xbetai));
 
     // Add a protection for pn very close to 0 or 1?
-    aux=pn*(1-pn);
+    aux = pn * (1 - pn);
 
     // Calculate sub-gradient vector gn.
     gsl_vector_set_zero(gn);
-    gn->data[0]= 1;
-    iParm1=1;
-    for(int k = 0; k < Kc; ++k) {
-      gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k);
+    gn->data[0] = 1;
+    iParm1 = 1;
+    for (int k = 0; k < Kc; ++k) {
+      gn->data[iParm1++] = gsl_matrix_get(p->Xc, i, k);
     }
 
-    for(int k1=0;k1<npar; ++k1)
-      if(gn->data[k1]!=0)
-	for(int k2=0;k2<npar; ++k2)
-	  if(gn->data[k2]!=0)
-	    *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]);
+    for (int k1 = 0; k1 < npar; ++k1)
+      if (gn->data[k1] != 0)
+        for (int k2 = 0; k2 < npar; ++k2)
+          if (gn->data[k2] != 0)
+            *gsl_matrix_ptr(out, k1, k2) += (aux * gn->data[k1] * gn->data[k2]);
   }
   gsl_vector_free(gn);
 }
 
 double wgsl_cont_optim_f(gsl_vector *v, void *params) {
-  double mLogLik=0;
+  double mLogLik = 0;
   fix_parm_cont_T *p = (fix_parm_cont_T *)params;
-  mLogLik = fLogit_cont(v,p->Xc,p->y,p->lambdaL1,p->lambdaL2);
+  mLogLik = fLogit_cont(v, p->Xc, p->y, p->lambdaL1, p->lambdaL2);
   return mLogLik;
 }
 
 // Compute both f and df together.
-void wgsl_cont_optim_fdf (gsl_vector *x, void *params,
-			  double *f, gsl_vector *df) {
+void wgsl_cont_optim_fdf(gsl_vector *x, void *params, double *f,
+                         gsl_vector *df) {
   *f = wgsl_cont_optim_f(x, params);
   wgsl_cont_optim_df(x, params, df);
 }
 
-int logistic_cont_fit (gsl_vector *beta,
-		       gsl_matrix *Xc,   // Continuous covariates matrix,
-		 		         // Nobs x Kc (NULL if not used).
-		       gsl_vector *y,
-		       double lambdaL1,
-		       double lambdaL2) {
+int logistic_cont_fit(gsl_vector *beta,
+                      gsl_matrix *Xc, // Continuous covariates matrix,
+                                      // Nobs x Kc (NULL if not used).
+                      gsl_vector *y, double lambdaL1, double lambdaL2) {
 
-  double mLogLik=0;
+  double mLogLik = 0;
   fix_parm_cont_T p;
   int npar = beta->size;
-  int iter=0;
-  double maxchange=0;
+  int iter = 0;
+  double maxchange = 0;
 
   // Initializing fix parameters.
-  p.Xc=Xc;
-  p.y=y;
-  p.lambdaL1=lambdaL1;
-  p.lambdaL2=lambdaL2;
+  p.Xc = Xc;
+  p.y = y;
+  p.lambdaL1 = lambdaL1;
+  p.lambdaL2 = lambdaL2;
 
   // Initial fit.
-  mLogLik = wgsl_cont_optim_f(beta,&p);
+  mLogLik = wgsl_cont_optim_f(beta, &p);
 
-  gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix.
-  gsl_vector *stBeta = gsl_vector_alloc(npar);   // Direction to move.
+  gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
+  gsl_vector *stBeta = gsl_vector_alloc(npar);    // Direction to move.
 
-  gsl_vector *myG = gsl_vector_alloc(npar);      // Gradient.
-  gsl_vector *tau = gsl_vector_alloc(npar);      // tau for QR.
+  gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
+  gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
 
-  for(iter=0;iter<100;iter++){
-    wgsl_cont_optim_hessian(beta,&p,myH); // Calculate Hessian.
-    wgsl_cont_optim_df(beta,&p,myG);      // Calculate Gradient.
-    gsl_linalg_QR_decomp(myH,tau);        // Calculate next beta.
-    gsl_linalg_QR_solve(myH,tau,myG,stBeta);
-    gsl_vector_sub(beta,stBeta);
+  for (iter = 0; iter < 100; iter++) {
+    wgsl_cont_optim_hessian(beta, &p, myH); // Calculate Hessian.
+    wgsl_cont_optim_df(beta, &p, myG);      // Calculate Gradient.
+    gsl_linalg_QR_decomp(myH, tau);         // Calculate next beta.
+    gsl_linalg_QR_solve(myH, tau, myG, stBeta);
+    gsl_vector_sub(beta, stBeta);
 
     // Monitor convergence.
-    maxchange=0;
-    for(int i=0;i<npar; i++)
-      if(maxchange<fabs(stBeta->data[i]))
-	maxchange=fabs(stBeta->data[i]);
+    maxchange = 0;
+    for (int i = 0; i < npar; i++)
+      if (maxchange < fabs(stBeta->data[i]))
+        maxchange = fabs(stBeta->data[i]);
 
 #ifdef _RPR_DEBUG_
-    mLogLik = wgsl_cont_optim_f(beta,&p);
+    mLogLik = wgsl_cont_optim_f(beta, &p);
 #endif
 
-    if(maxchange<1E-4)
+    if (maxchange < 1E-4)
       break;
   }
 
   // Final fit.
-  mLogLik = wgsl_cont_optim_f(beta,&p);
+  mLogLik = wgsl_cont_optim_f(beta, &p);
 
-  gsl_vector_free (tau);
-  gsl_vector_free (stBeta);
-  gsl_vector_free (myG);
-  gsl_matrix_free (myH);
+  gsl_vector_free(tau);
+  gsl_vector_free(stBeta);
+  gsl_vector_free(myG);
+  gsl_matrix_free(myH);
 
   return 0;
 }
diff --git a/src/logistic.h b/src/logistic.h
index b61ab14..bebcbf6 100644
--- a/src/logistic.h
+++ b/src/logistic.h
@@ -3,73 +3,63 @@
 
 // Mixed interface.
 void logistic_mixed_pred(gsl_vector *beta,     // Vector of parameters
-					       // length = 1+Sum_k(C_k-1)+Kc.
-			 gsl_matrix_int *X,    // Matrix Nobs x K.
-			 gsl_vector_int *nlev, // Vector with num. categories.
-			 gsl_matrix *Xc,       // Continuous covariates matrix
-					       // Nobs x Kc
-			 gsl_vector *yhat);    // Vector of prob. predicted by
-					       // the logistic.
+                                               // length = 1+Sum_k(C_k-1)+Kc.
+                         gsl_matrix_int *X,    // Matrix Nobs x K.
+                         gsl_vector_int *nlev, // Vector with num. categories.
+                         gsl_matrix *Xc,       // Continuous covariates matrix
+                                               // Nobs x Kc
+                         gsl_vector *yhat);    // Vector of prob. predicted by
+                                               // the logistic.
 
 int logistic_mixed_fit(gsl_vector *beta,     // Vector of parameters
-					     // length = 1+Sum_k(C_k-1)+Kc
-		       gsl_matrix_int *X,    // Matrix Nobs x K.
-		       gsl_vector_int *nlev, // Vector with number categories.
-		       gsl_matrix *Xc,       // Continuous covariates
-					     // matrix Nobs x Kc
-		       gsl_vector *y,        // Vector of prob. to predict.
-		       double lambdaL1,      // Reg. L1 0.0 if not used.
-		       double lambdaL2);     // Reg. L2 0.0 if not used.
+                                             // length = 1+Sum_k(C_k-1)+Kc
+                       gsl_matrix_int *X,    // Matrix Nobs x K.
+                       gsl_vector_int *nlev, // Vector with number categories.
+                       gsl_matrix *Xc,       // Continuous covariates
+                                             // matrix Nobs x Kc
+                       gsl_vector *y,        // Vector of prob. to predict.
+                       double lambdaL1,      // Reg. L1 0.0 if not used.
+                       double lambdaL2);     // Reg. L2 0.0 if not used.
 
-double fLogit_mixed(gsl_vector *beta,
-		    gsl_matrix_int *X,
-		    gsl_vector_int *nlev,
-		    gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
-		    gsl_vector *y,
-		    double lambdaL1,
-		    double lambdaL2);
+double fLogit_mixed(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+                    gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
+                    gsl_vector *y, double lambdaL1, double lambdaL2);
 
 // Categorical-only interface.
 void logistic_cat_pred(gsl_vector *beta,     // Vector of parameters
-					     // length = 1+Sum_k(C_k-1)+Kc.
-		       gsl_matrix_int *X,    // Matrix Nobs x K.
-		       gsl_vector_int *nlev, // Vector with number categories.
-		       gsl_vector *yhat);    // Vector of prob. predicted by
-					     // the logistic.
+                                             // length = 1+Sum_k(C_k-1)+Kc.
+                       gsl_matrix_int *X,    // Matrix Nobs x K.
+                       gsl_vector_int *nlev, // Vector with number categories.
+                       gsl_vector *yhat);    // Vector of prob. predicted by
+                                             // the logistic.
 
 int logistic_cat_fit(gsl_vector *beta,     // Vector of parameters
-					   // length = 1+Sum_k(C_k-1)+Kc.
-		     gsl_matrix_int *X,    // Matrix Nobs x K .
-		     gsl_vector_int *nlev, // Vector with number categories.
-		     gsl_vector *y,        // Vector of prob. to predict.
-		     double lambdaL1,      // Regularization L1, 0 if not used
-		     double lambdaL2);     // Regularization L2, 0 if not used
+                                           // length = 1+Sum_k(C_k-1)+Kc.
+                     gsl_matrix_int *X,    // Matrix Nobs x K .
+                     gsl_vector_int *nlev, // Vector with number categories.
+                     gsl_vector *y,        // Vector of prob. to predict.
+                     double lambdaL1,      // Regularization L1, 0 if not used
+                     double lambdaL2);     // Regularization L2, 0 if not used
 
-double fLogit_cat(gsl_vector *beta,
-		  gsl_matrix_int *X,
-		  gsl_vector_int *nlev,
-		  gsl_vector *y,
-		  double lambdaL1,
-		  double lambdaL2);
+double fLogit_cat(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+                  gsl_vector *y, double lambdaL1, double lambdaL2);
 
 // Continuous-only interface.
-void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
-					  // length = 1 + Sum_k(C_k-1) + Kc.
-			gsl_matrix *Xc,   // Continuous cov's matrix Nobs x Kc.
-			gsl_vector *yhat);// Vector of prob. predicted
-					  // by the logistic.
+void logistic_cont_pred(gsl_vector *beta,  // Vector of parameters
+                                           // length = 1 + Sum_k(C_k-1) + Kc.
+                        gsl_matrix *Xc,    // Continuous cov's matrix Nobs x Kc.
+                        gsl_vector *yhat); // Vector of prob. predicted
+                                           // by the logistic.
 
 int logistic_cont_fit(gsl_vector *beta, // Vector of parameters
-					// length = 1+Sum_k(C_k-1)+Kc.
-		      gsl_matrix *Xc,   // Continuous cov's matrix Nobs x Kc.
-		      gsl_vector *y,    // Vector of prob. to predict.
-		      double lambdaL1,  // Regularization L1, 0 if not used.
-		      double lambdaL2); // Regularization L2, 0 if not used.
+                                        // length = 1+Sum_k(C_k-1)+Kc.
+                      gsl_matrix *Xc,   // Continuous cov's matrix Nobs x Kc.
+                      gsl_vector *y,    // Vector of prob. to predict.
+                      double lambdaL1,  // Regularization L1, 0 if not used.
+                      double lambdaL2); // Regularization L2, 0 if not used.
 
 double fLogit_cont(gsl_vector *beta,
-		   gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
-		   gsl_vector *y,
-		   double lambdaL1,
-		   double lambdaL2);
+                   gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
+                   gsl_vector *y, double lambdaL1, double lambdaL2);
 
 #endif
diff --git a/src/main.cpp b/src/main.cpp
index c7f0573..833136c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -16,57 +16,67 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
+#include "gemma.h"
 #include <fstream>
+#include <iostream>
 #include <sstream>
 #include <sys/stat.h>
 #include <sys/types.h>
-#include "gemma.h"
 
 using namespace std;
 
-int main(int argc, char * argv[]) {
-	GEMMA cGemma;
-	PARAM cPar;
-
-	if (argc <= 1) {
-		cGemma.PrintHeader();
-		return EXIT_SUCCESS;
-	}
-	if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'h') {
-		cGemma.PrintHelp(0);
-		return EXIT_SUCCESS;
-	}
-	if (argc==3 && argv[1][0] == '-' && argv[1][1] == 'h') {
-		string str;
-		str.assign(argv[2]);
-		cGemma.PrintHelp(atoi(str.c_str()));
-		return EXIT_SUCCESS;
-	}
-	if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'l') {
-		cGemma.PrintLicense();
-		return EXIT_SUCCESS;
-	}
-
-	cGemma.Assign(argc, argv, cPar);
-
-	ifstream check_dir((cPar.path_out).c_str());
-	if (!check_dir) {
-	  mkdir((cPar.path_out).c_str(), S_IRWXU|S_IRGRP|S_IROTH);
-	}
-
-	if (cPar.error==true) {return EXIT_FAILURE;}
-
-	if (cPar.mode_silence) {stringstream ss; cout.rdbuf (ss.rdbuf());}
-
-	cPar.CheckParam();
-
-	if (cPar.error==true) {return EXIT_FAILURE;}
-
-	cGemma.BatchRun(cPar);
-
-	if (cPar.error==true) {return EXIT_FAILURE;}
-
-	cGemma.WriteLog(argc, argv, cPar);
-
-    return EXIT_SUCCESS;                                                       }
+int main(int argc, char *argv[]) {
+  GEMMA cGemma;
+  PARAM cPar;
+
+  if (argc <= 1) {
+    cGemma.PrintHeader();
+    return EXIT_SUCCESS;
+  }
+  if (argc == 2 && argv[1][0] == '-' && argv[1][1] == 'h') {
+    cGemma.PrintHelp(0);
+    return EXIT_SUCCESS;
+  }
+  if (argc == 3 && argv[1][0] == '-' && argv[1][1] == 'h') {
+    string str;
+    str.assign(argv[2]);
+    cGemma.PrintHelp(atoi(str.c_str()));
+    return EXIT_SUCCESS;
+  }
+  if (argc == 2 && argv[1][0] == '-' && argv[1][1] == 'l') {
+    cGemma.PrintLicense();
+    return EXIT_SUCCESS;
+  }
+
+  cGemma.Assign(argc, argv, cPar);
+
+  ifstream check_dir((cPar.path_out).c_str());
+  if (!check_dir) {
+    mkdir((cPar.path_out).c_str(), S_IRWXU | S_IRGRP | S_IROTH);
+  }
+
+  if (cPar.error == true) {
+    return EXIT_FAILURE;
+  }
+
+  if (cPar.mode_silence) {
+    stringstream ss;
+    cout.rdbuf(ss.rdbuf());
+  }
+
+  cPar.CheckParam();
+
+  if (cPar.error == true) {
+    return EXIT_FAILURE;
+  }
+
+  cGemma.BatchRun(cPar);
+
+  if (cPar.error == true) {
+    return EXIT_FAILURE;
+  }
+
+  cGemma.WriteLog(argc, argv, cPar);
+
+  return EXIT_SUCCESS;
+}
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 709bdde..9e19bf1 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -16,394 +16,381 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
+#include <bitset>
+#include <cmath>
+#include <cstring>
 #include <fstream>
-#include <sstream>
-#include <string>
 #include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
+#include <limits.h>
 #include <map>
 #include <set>
-#include <cstring>
-#include <cmath>
+#include <sstream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <limits.h>
+#include <string>
+#include <vector>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
+#include "Eigen/Dense"
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
-#include "Eigen/Dense"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 
-#include "lapack.h"
 #include "eigenlib.h"
+#include "lapack.h"
 #include "mathfunc.h"
 
 using namespace std;
 using namespace Eigen;
 
-//calculate variance of a vector
-double VectorVar (const gsl_vector *v) {
-	double d, m=0.0, m2=0.0;
-	for (size_t i=0; i<v->size; ++i) {
-		d=gsl_vector_get (v, i);
-		m+=d;
-		m2+=d*d;
-	}
-	m/=(double)v->size;
-	m2/=(double)v->size;
-	return m2-m*m;
+// calculate variance of a vector
+double VectorVar(const gsl_vector *v) {
+  double d, m = 0.0, m2 = 0.0;
+  for (size_t i = 0; i < v->size; ++i) {
+    d = gsl_vector_get(v, i);
+    m += d;
+    m2 += d * d;
+  }
+  m /= (double)v->size;
+  m2 /= (double)v->size;
+  return m2 - m * m;
 }
 
 // Center the matrix G.
-void CenterMatrix (gsl_matrix *G) {
-	double d;
-	gsl_vector *w=gsl_vector_alloc (G->size1);
-	gsl_vector *Gw=gsl_vector_alloc (G->size1);
-	gsl_vector_set_all (w, 1.0);
-
-	gsl_blas_dgemv (CblasNoTrans, 1.0, G, w, 0.0, Gw);
-	gsl_blas_dsyr2 (CblasUpper, -1.0/(double)G->size1, Gw, w, G);
-	gsl_blas_ddot (w, Gw, &d);
-	gsl_blas_dsyr (CblasUpper, d/((double)G->size1*(double)G->size1),
-		       w, G);
-
-	for (size_t i=0; i<G->size1; ++i) {
-		for (size_t j=0; j<i; ++j) {
-			d=gsl_matrix_get (G, j, i);
-			gsl_matrix_set (G, i, j, d);
-		}
-	}
-
-	gsl_vector_free(w);
-	gsl_vector_free(Gw);
-
-	return;
+void CenterMatrix(gsl_matrix *G) {
+  double d;
+  gsl_vector *w = gsl_vector_alloc(G->size1);
+  gsl_vector *Gw = gsl_vector_alloc(G->size1);
+  gsl_vector_set_all(w, 1.0);
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
+  gsl_blas_dsyr2(CblasUpper, -1.0 / (double)G->size1, Gw, w, G);
+  gsl_blas_ddot(w, Gw, &d);
+  gsl_blas_dsyr(CblasUpper, d / ((double)G->size1 * (double)G->size1), w, G);
+
+  for (size_t i = 0; i < G->size1; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      d = gsl_matrix_get(G, j, i);
+      gsl_matrix_set(G, i, j, d);
+    }
+  }
+
+  gsl_vector_free(w);
+  gsl_vector_free(Gw);
+
+  return;
 }
 
 // Center the matrix G.
-void CenterMatrix (gsl_matrix *G, const gsl_vector *w) {
-	double d, wtw;
-	gsl_vector *Gw=gsl_vector_alloc (G->size1);
-
-	gsl_blas_ddot (w, w, &wtw);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, G, w, 0.0, Gw);
-	gsl_blas_dsyr2 (CblasUpper, -1.0/wtw, Gw, w, G);
-	gsl_blas_ddot (w, Gw, &d);
-	gsl_blas_dsyr (CblasUpper, d/(wtw*wtw), w, G);
-
-	for (size_t i=0; i<G->size1; ++i) {
-		for (size_t j=0; j<i; ++j) {
-			d=gsl_matrix_get (G, j, i);
-			gsl_matrix_set (G, i, j, d);
-		}
-	}
-
-	gsl_vector_free(Gw);
-
-	return;
+void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
+  double d, wtw;
+  gsl_vector *Gw = gsl_vector_alloc(G->size1);
+
+  gsl_blas_ddot(w, w, &wtw);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
+  gsl_blas_dsyr2(CblasUpper, -1.0 / wtw, Gw, w, G);
+  gsl_blas_ddot(w, Gw, &d);
+  gsl_blas_dsyr(CblasUpper, d / (wtw * wtw), w, G);
+
+  for (size_t i = 0; i < G->size1; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      d = gsl_matrix_get(G, j, i);
+      gsl_matrix_set(G, i, j, d);
+    }
+  }
+
+  gsl_vector_free(Gw);
+
+  return;
 }
 
 // Center the matrix G.
-void CenterMatrix (gsl_matrix *G, const gsl_matrix *W) {
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWiWt=gsl_matrix_alloc (W->size2, G->size1);
-	gsl_matrix *GW=gsl_matrix_alloc (G->size1, W->size2);
-	gsl_matrix *WtGW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *Gtmp=gsl_matrix_alloc (G->size1, G->size1);
-
-	gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-	LUDecomp (WtW, pmt, &sig);
-	LUInvert (WtW, pmt, WtWi);
-
-	gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, WtWi, W, 0.0, WtWiWt);
-	gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, G, W, 0.0, GW);
-	gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0,
-			Gtmp);
-
-	gsl_matrix_sub (G, Gtmp);
-	gsl_matrix_transpose (Gtmp);
-	gsl_matrix_sub (G, Gtmp);
-
-	gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, GW, 0.0, WtGW);
-	//GW is destroyed.
-	gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, WtWiWt, WtGW, 0.0, GW);
-	gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0,
-			Gtmp);
-
-	gsl_matrix_add (G, Gtmp);
-
-	gsl_matrix_free(WtW);
-	gsl_matrix_free(WtWi);
-	gsl_matrix_free(WtWiWt);
-	gsl_matrix_free(GW);
-	gsl_matrix_free(WtGW);
-	gsl_matrix_free(Gtmp);
-
-	return;
+void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *WtWiWt = gsl_matrix_alloc(W->size2, G->size1);
+  gsl_matrix *GW = gsl_matrix_alloc(G->size1, W->size2);
+  gsl_matrix *WtGW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_matrix *Gtmp = gsl_matrix_alloc(G->size1, G->size1);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+  LUDecomp(WtW, pmt, &sig);
+  LUInvert(WtW, pmt, WtWi);
+
+  gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, WtWi, W, 0.0, WtWiWt);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, G, W, 0.0, GW);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp);
+
+  gsl_matrix_sub(G, Gtmp);
+  gsl_matrix_transpose(Gtmp);
+  gsl_matrix_sub(G, Gtmp);
+
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, GW, 0.0, WtGW);
+  // GW is destroyed.
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, WtWiWt, WtGW, 0.0, GW);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp);
+
+  gsl_matrix_add(G, Gtmp);
+
+  gsl_matrix_free(WtW);
+  gsl_matrix_free(WtWi);
+  gsl_matrix_free(WtWiWt);
+  gsl_matrix_free(GW);
+  gsl_matrix_free(WtGW);
+  gsl_matrix_free(Gtmp);
+
+  return;
 }
 
 // "Standardize" the matrix G such that all diagonal elements = 1.
-void StandardizeMatrix (gsl_matrix *G) {
-	double d=0.0;
-	vector<double> vec_d;
-
-	for (size_t i=0; i<G->size1; ++i) {
-	  vec_d.push_back(gsl_matrix_get(G, i, i));
-	}
-	for (size_t i=0; i<G->size1; ++i) {
-	  for (size_t j=i; j<G->size2; ++j) {
-	    if (j==i) {
-	      gsl_matrix_set(G, i, j, 1);
-	    } else {
-	      d=gsl_matrix_get(G, i, j);
-	      d/=sqrt(vec_d[i]*vec_d[j]);
-	      gsl_matrix_set(G, i, j, d);
-	      gsl_matrix_set(G, j, i, d);
-	    }
-	  }
-	}
-
-	return;
+void StandardizeMatrix(gsl_matrix *G) {
+  double d = 0.0;
+  vector<double> vec_d;
+
+  for (size_t i = 0; i < G->size1; ++i) {
+    vec_d.push_back(gsl_matrix_get(G, i, i));
+  }
+  for (size_t i = 0; i < G->size1; ++i) {
+    for (size_t j = i; j < G->size2; ++j) {
+      if (j == i) {
+        gsl_matrix_set(G, i, j, 1);
+      } else {
+        d = gsl_matrix_get(G, i, j);
+        d /= sqrt(vec_d[i] * vec_d[j]);
+        gsl_matrix_set(G, i, j, d);
+        gsl_matrix_set(G, j, i, d);
+      }
+    }
+  }
+
+  return;
 }
 
 // Scale the matrix G such that the mean diagonal = 1.
-double ScaleMatrix (gsl_matrix *G) {
-	double d=0.0;
+double ScaleMatrix(gsl_matrix *G) {
+  double d = 0.0;
 
-	for (size_t i=0; i<G->size1; ++i) {
-		d+=gsl_matrix_get(G, i, i);
-	}
-	d/=(double)G->size1;
+  for (size_t i = 0; i < G->size1; ++i) {
+    d += gsl_matrix_get(G, i, i);
+  }
+  d /= (double)G->size1;
 
-	if (d!=0) {
-	  gsl_matrix_scale (G, 1.0/d);
-	}
+  if (d != 0) {
+    gsl_matrix_scale(G, 1.0 / d);
+  }
 
-	return d;
+  return d;
 }
 
 // Center the vector y.
-double CenterVector (gsl_vector *y) {
-	double d=0.0;
+double CenterVector(gsl_vector *y) {
+  double d = 0.0;
 
-	for (size_t i=0; i<y->size; ++i) {
-		d+=gsl_vector_get (y, i);
-	}
-	d/=(double)y->size;
+  for (size_t i = 0; i < y->size; ++i) {
+    d += gsl_vector_get(y, i);
+  }
+  d /= (double)y->size;
 
-	gsl_vector_add_constant (y, -1.0*d);
+  gsl_vector_add_constant(y, -1.0 * d);
 
-	return d;
+  return d;
 }
 
 // Center the vector y.
-void CenterVector (gsl_vector *y, const gsl_matrix *W) {
-	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_vector *Wty=gsl_vector_alloc (W->size2);
-	gsl_vector *WtWiWty=gsl_vector_alloc (W->size2);
+void CenterVector(gsl_vector *y, const gsl_matrix *W) {
+  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+  gsl_vector *Wty = gsl_vector_alloc(W->size2);
+  gsl_vector *WtWiWty = gsl_vector_alloc(W->size2);
 
-	gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+  gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
 
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-	LUDecomp (WtW, pmt, &sig);
-	LUSolve (WtW, pmt, Wty, WtWiWty);
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+  LUDecomp(WtW, pmt, &sig);
+  LUSolve(WtW, pmt, Wty, WtWiWty);
 
-	gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
+  gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
 
-	gsl_matrix_free(WtW);
-	gsl_vector_free(Wty);
-	gsl_vector_free(WtWiWty);
+  gsl_matrix_free(WtW);
+  gsl_vector_free(Wty);
+  gsl_vector_free(WtWiWty);
 
-	return;
+  return;
 }
 
 // "Standardize" vector y to have mean 0 and y^ty/n=1.
-void StandardizeVector (gsl_vector *y) {
-  double d=0.0, m=0.0, v=0.0;
+void StandardizeVector(gsl_vector *y) {
+  double d = 0.0, m = 0.0, v = 0.0;
 
-  for (size_t i=0; i<y->size; ++i) {
-    d=gsl_vector_get (y, i);
-    m+=d;
-    v+=d*d;
+  for (size_t i = 0; i < y->size; ++i) {
+    d = gsl_vector_get(y, i);
+    m += d;
+    v += d * d;
   }
-  m/=(double)y->size;
-  v/=(double)y->size;
-  v-=m*m;
+  m /= (double)y->size;
+  v /= (double)y->size;
+  v -= m * m;
 
-  gsl_vector_add_constant (y, -1.0*m);
-  gsl_vector_scale (y, 1.0/sqrt(v));
+  gsl_vector_add_constant(y, -1.0 * m);
+  gsl_vector_scale(y, 1.0 / sqrt(v));
 
   return;
 }
 
 // Calculate UtX.
-void CalcUtX (const gsl_matrix *U, gsl_matrix *UtX) {
-	gsl_matrix *X=gsl_matrix_alloc (UtX->size1, UtX->size2);
-	gsl_matrix_memcpy (X, UtX);
-	eigenlib_dgemm ("T", "N", 1.0, U, X, 0.0, UtX);
-	gsl_matrix_free (X);
+void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
+  gsl_matrix *X = gsl_matrix_alloc(UtX->size1, UtX->size2);
+  gsl_matrix_memcpy(X, UtX);
+  eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
+  gsl_matrix_free(X);
 
-	return;
+  return;
 }
 
-void CalcUtX (const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
-	eigenlib_dgemm ("T", "N", 1.0, U, X, 0.0, UtX);
-	return;
+void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
+  eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
+  return;
 }
 
-void CalcUtX (const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
-	gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
-	return;
+void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
+  gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
+  return;
 }
 
 // Kronecker product.
 void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
-	for (size_t i=0; i<K->size1; i++) {
-		for (size_t j=0; j<K->size2; j++) {
-			gsl_matrix_view H_sub=
-			  gsl_matrix_submatrix (H, i*V->size1, j*V->size2,
-						V->size1, V->size2);
-			gsl_matrix_memcpy (&H_sub.matrix, V);
-			gsl_matrix_scale (&H_sub.matrix,
-					  gsl_matrix_get (K, i, j));
-		}
-	}
-	return;
+  for (size_t i = 0; i < K->size1; i++) {
+    for (size_t j = 0; j < K->size2; j++) {
+      gsl_matrix_view H_sub = gsl_matrix_submatrix(
+          H, i * V->size1, j * V->size2, V->size1, V->size2);
+      gsl_matrix_memcpy(&H_sub.matrix, V);
+      gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
+    }
+  }
+  return;
 }
 
 // Symmetric K matrix.
 void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
-	for (size_t i=0; i<K->size1; i++) {
-		for (size_t j=i; j<K->size2; j++) {
-			gsl_matrix_view H_sub=
-			  gsl_matrix_submatrix (H, i*V->size1, j*V->size2,
-						V->size1, V->size2);
-			gsl_matrix_memcpy (&H_sub.matrix, V);
-			gsl_matrix_scale (&H_sub.matrix,
-					  gsl_matrix_get (K, i, j));
-
-			if (i!=j) {
-				gsl_matrix_view H_sub_sym=
-				  gsl_matrix_submatrix (H, j*V->size1,
-							i*V->size2, V->size1,
-							V->size2);
-				gsl_matrix_memcpy (&H_sub_sym.matrix,
-						   &H_sub.matrix);
-			}
-		}
-	}
-	return;
+  for (size_t i = 0; i < K->size1; i++) {
+    for (size_t j = i; j < K->size2; j++) {
+      gsl_matrix_view H_sub = gsl_matrix_submatrix(
+          H, i * V->size1, j * V->size2, V->size1, V->size2);
+      gsl_matrix_memcpy(&H_sub.matrix, V);
+      gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
+
+      if (i != j) {
+        gsl_matrix_view H_sub_sym = gsl_matrix_submatrix(
+            H, j * V->size1, i * V->size2, V->size1, V->size2);
+        gsl_matrix_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
+      }
+    }
+  }
+  return;
 }
 
 // This function calculates HWE p value with methods described in
 // Wigginton et al. (2005) AJHG; it is based on the code in plink 1.07.
-double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab) {
-	if ( (n_hom1+n_hom2+n_ab)==0 ) {return 1;}
-
-	// "AA" is the rare allele.
-	int n_aa=n_hom1 < n_hom2 ? n_hom1 : n_hom2;
-	int n_bb=n_hom1 < n_hom2 ? n_hom2 : n_hom1;
-
-	int rare_copies = 2 * n_aa + n_ab;
-	int genotypes   = n_ab + n_bb + n_aa;
-
-	double * het_probs = (double *) malloc( (rare_copies + 1) *
-						sizeof(double));
-	if (het_probs == NULL)
-		cout << "Internal error: SNP-HWE: Unable to allocate array" <<
-		  endl;
-
-		int i;
-	for (i = 0; i <= rare_copies; i++)
-		het_probs[i] = 0.0;
-
-	// Start at midpoint.
-	// XZ modified to add (long int)
-	int mid = ((long int)rare_copies *
-		   (2 * (long int)genotypes - (long int)rare_copies)) /
-	  (2 * (long int)genotypes);
-
-	// Check to ensure that midpoint and rare alleles have same
-	// parity.
-	if ((rare_copies & 1) ^ (mid & 1))
-	  mid++;
-
-	int curr_hets = mid;
-	int curr_homr = (rare_copies - mid) / 2;
-	int curr_homc = genotypes - curr_hets - curr_homr;
-
-	het_probs[mid] = 1.0;
-	double sum = het_probs[mid];
-	for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) {
-		het_probs[curr_hets - 2] = het_probs[curr_hets] *
-		  curr_hets * (curr_hets - 1.0)
-		/ (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
-		sum += het_probs[curr_hets - 2];
-
-		// Two fewer heterozygotes for next iteration; add one
-		// rare, one common homozygote.
-		curr_homr++;
-		curr_homc++;
-    }
+double CalcHWE(const size_t n_hom1, const size_t n_hom2, const size_t n_ab) {
+  if ((n_hom1 + n_hom2 + n_ab) == 0) {
+    return 1;
+  }
 
-	curr_hets = mid;
-	curr_homr = (rare_copies - mid) / 2;
-	curr_homc = genotypes - curr_hets - curr_homr;
-	for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) {
-		het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 *
-		  curr_homr * curr_homc /
-		  ((curr_hets + 2.0) * (curr_hets + 1.0));
-		sum += het_probs[curr_hets + 2];
-
-		// Add 2 heterozygotes for next iteration; subtract
-		// one rare, one common homozygote.
-		curr_homr--;
-		curr_homc--;
-    }
+  // "AA" is the rare allele.
+  int n_aa = n_hom1 < n_hom2 ? n_hom1 : n_hom2;
+  int n_bb = n_hom1 < n_hom2 ? n_hom2 : n_hom1;
+
+  int rare_copies = 2 * n_aa + n_ab;
+  int genotypes = n_ab + n_bb + n_aa;
+
+  double *het_probs = (double *)malloc((rare_copies + 1) * sizeof(double));
+  if (het_probs == NULL)
+    cout << "Internal error: SNP-HWE: Unable to allocate array" << endl;
+
+  int i;
+  for (i = 0; i <= rare_copies; i++)
+    het_probs[i] = 0.0;
+
+  // Start at midpoint.
+  // XZ modified to add (long int)
+  int mid = ((long int)rare_copies *
+             (2 * (long int)genotypes - (long int)rare_copies)) /
+            (2 * (long int)genotypes);
+
+  // Check to ensure that midpoint and rare alleles have same
+  // parity.
+  if ((rare_copies & 1) ^ (mid & 1))
+    mid++;
+
+  int curr_hets = mid;
+  int curr_homr = (rare_copies - mid) / 2;
+  int curr_homc = genotypes - curr_hets - curr_homr;
+
+  het_probs[mid] = 1.0;
+  double sum = het_probs[mid];
+  for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) {
+    het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets *
+                               (curr_hets - 1.0) /
+                               (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
+    sum += het_probs[curr_hets - 2];
+
+    // Two fewer heterozygotes for next iteration; add one
+    // rare, one common homozygote.
+    curr_homr++;
+    curr_homc++;
+  }
 
-	for (i = 0; i <= rare_copies; i++)
-		het_probs[i] /= sum;
+  curr_hets = mid;
+  curr_homr = (rare_copies - mid) / 2;
+  curr_homc = genotypes - curr_hets - curr_homr;
+  for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) {
+    het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr *
+                               curr_homc /
+                               ((curr_hets + 2.0) * (curr_hets + 1.0));
+    sum += het_probs[curr_hets + 2];
+
+    // Add 2 heterozygotes for next iteration; subtract
+    // one rare, one common homozygote.
+    curr_homr--;
+    curr_homc--;
+  }
+
+  for (i = 0; i <= rare_copies; i++)
+    het_probs[i] /= sum;
 
-		double p_hwe = 0.0;
+  double p_hwe = 0.0;
 
-	        // p-value calculation for p_hwe.
-		for (i = 0; i <= rare_copies; i++)
-		{
-			if (het_probs[i] > het_probs[n_ab])
-				continue;
-			p_hwe += het_probs[i];
-		}
+  // p-value calculation for p_hwe.
+  for (i = 0; i <= rare_copies; i++) {
+    if (het_probs[i] > het_probs[n_ab])
+      continue;
+    p_hwe += het_probs[i];
+  }
 
-		p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
+  p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
 
-	free(het_probs);
+  free(het_probs);
 
-	return p_hwe;
+  return p_hwe;
 }
 
-double UcharToDouble02(const unsigned char c) {
-  return (double)c*0.01;
-}
+double UcharToDouble02(const unsigned char c) { return (double)c * 0.01; }
 
 unsigned char Double02ToUchar(const double dosage) {
-  return (int) (dosage*100);
+  return (int)(dosage * 100);
 }
 
-void uchar_matrix_get_row (const vector<vector<unsigned char> > &X,
-			   const size_t i_row, VectorXd &x_row) {
-  if (i_row<X.size()) {
-    for (size_t j=0; j<x_row.size(); j++) {
-      x_row(j)=UcharToDouble02(X[i_row][j]);
+void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
+                          const size_t i_row, VectorXd &x_row) {
+  if (i_row < X.size()) {
+    for (size_t j = 0; j < x_row.size(); j++) {
+      x_row(j) = UcharToDouble02(X[i_row][j]);
     }
   } else {
     std::cerr << "Error return genotype vector...\n";
diff --git a/src/mathfunc.h b/src/mathfunc.h
index b24364b..29eafe4 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -19,32 +19,32 @@
 #ifndef __MATHFUNC_H__
 #define __MATHFUNC_H__
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
 #include "Eigen/Dense"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 
 using namespace std;
 using namespace Eigen;
 
-double VectorVar (const gsl_vector *v);
-void CenterMatrix (gsl_matrix *G);
-void CenterMatrix (gsl_matrix *G, const gsl_vector *w);
-void CenterMatrix (gsl_matrix *G, const gsl_matrix *W);
-void StandardizeMatrix (gsl_matrix *G);
-double ScaleMatrix (gsl_matrix *G);
-double CenterVector (gsl_vector *y);
-void CenterVector (gsl_vector *y, const gsl_matrix *W);
-void StandardizeVector (gsl_vector *y);
-void CalcUtX (const gsl_matrix *U, gsl_matrix *UtX);
-void CalcUtX (const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX);
-void CalcUtX (const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx);
-double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab);
+double VectorVar(const gsl_vector *v);
+void CenterMatrix(gsl_matrix *G);
+void CenterMatrix(gsl_matrix *G, const gsl_vector *w);
+void CenterMatrix(gsl_matrix *G, const gsl_matrix *W);
+void StandardizeMatrix(gsl_matrix *G);
+double ScaleMatrix(gsl_matrix *G);
+double CenterVector(gsl_vector *y);
+void CenterVector(gsl_vector *y, const gsl_matrix *W);
+void StandardizeVector(gsl_vector *y);
+void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX);
+void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX);
+void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx);
+double CalcHWE(const size_t n_hom1, const size_t n_hom2, const size_t n_ab);
 void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H);
 void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H);
 
 double UcharToDouble02(const unsigned char c);
 unsigned char Double02ToUchar(const double dosage);
-void uchar_matrix_get_row (const vector<vector<unsigned char> > &X,
-			   const size_t i_row, VectorXd &x_row);
+void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
+                          const size_t i_row, VectorXd &x_row);
 
 #endif
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index 78cd926..f1ab3fc 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -16,895 +16,914 @@
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-#include <iomanip>
+#include <assert.h>
+#include <bitset>
 #include <cmath>
+#include <cstring>
+#include <iomanip>
 #include <iostream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <bitset>
-#include <cstring>
-#include <assert.h>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
-#include "gsl/gsl_roots.h"
-#include "gsl/gsl_min.h"
 #include "gsl/gsl_integration.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_min.h"
+#include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
 
-#include "io.h"
-#include "lapack.h"
 #include "eigenlib.h"
 #include "gzstream.h"
+#include "io.h"
+#include "lapack.h"
 #include "lmm.h"
 #include "mvlmm.h"
 
 using namespace std;
 
 // In this file, X, Y are already transformed (i.e. UtX and UtY).
-void MVLMM::CopyFromParam (PARAM &cPar) {
-	a_mode=cPar.a_mode;
-	d_pace=cPar.d_pace;
-
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_oxford=cPar.file_oxford;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
-
-	l_min=cPar.l_min;
-	l_max=cPar.l_max;
-	n_region=cPar.n_region;
-	p_nr=cPar.p_nr;
-	em_iter=cPar.em_iter;
-	nr_iter=cPar.nr_iter;
-	em_prec=cPar.em_prec;
-	nr_prec=cPar.nr_prec;
-	crt=cPar.crt;
-
-	Vg_remle_null=cPar.Vg_remle_null;
-	Ve_remle_null=cPar.Ve_remle_null;
-	Vg_mle_null=cPar.Vg_mle_null;
-	Ve_mle_null=cPar.Ve_mle_null;
-
-	time_UtX=0.0;
-	time_opt=0.0;
-
-	ni_total=cPar.ni_total;
-	ns_total=cPar.ns_total;
-	ni_test=cPar.ni_test;
-	ns_test=cPar.ns_test;
-	n_cvt=cPar.n_cvt;
-
-	n_ph=cPar.n_ph;
-
-	indicator_idv=cPar.indicator_idv;
-	indicator_snp=cPar.indicator_snp;
-	snpInfo=cPar.snpInfo;
-
-	return;
+void MVLMM::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
+  d_pace = cPar.d_pace;
+
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_oxford = cPar.file_oxford;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
+
+  l_min = cPar.l_min;
+  l_max = cPar.l_max;
+  n_region = cPar.n_region;
+  p_nr = cPar.p_nr;
+  em_iter = cPar.em_iter;
+  nr_iter = cPar.nr_iter;
+  em_prec = cPar.em_prec;
+  nr_prec = cPar.nr_prec;
+  crt = cPar.crt;
+
+  Vg_remle_null = cPar.Vg_remle_null;
+  Ve_remle_null = cPar.Ve_remle_null;
+  Vg_mle_null = cPar.Vg_mle_null;
+  Ve_mle_null = cPar.Ve_mle_null;
+
+  time_UtX = 0.0;
+  time_opt = 0.0;
+
+  ni_total = cPar.ni_total;
+  ns_total = cPar.ns_total;
+  ni_test = cPar.ni_test;
+  ns_test = cPar.ns_test;
+  n_cvt = cPar.n_cvt;
+
+  n_ph = cPar.n_ph;
+
+  indicator_idv = cPar.indicator_idv;
+  indicator_snp = cPar.indicator_snp;
+  snpInfo = cPar.snpInfo;
+
+  return;
 }
 
-void MVLMM::CopyToParam (PARAM &cPar) {
-	cPar.time_UtX=time_UtX;
-	cPar.time_opt=time_opt;
+void MVLMM::CopyToParam(PARAM &cPar) {
+  cPar.time_UtX = time_UtX;
+  cPar.time_opt = time_opt;
 
-	cPar.Vg_remle_null=Vg_remle_null;
-	cPar.Ve_remle_null=Ve_remle_null;
-	cPar.Vg_mle_null=Vg_mle_null;
-	cPar.Ve_mle_null=Ve_mle_null;
+  cPar.Vg_remle_null = Vg_remle_null;
+  cPar.Ve_remle_null = Ve_remle_null;
+  cPar.Vg_mle_null = Vg_mle_null;
+  cPar.Ve_mle_null = Ve_mle_null;
 
-	cPar.VVg_remle_null=VVg_remle_null;
-	cPar.VVe_remle_null=VVe_remle_null;
-	cPar.VVg_mle_null=VVg_mle_null;
-	cPar.VVe_mle_null=VVe_mle_null;
+  cPar.VVg_remle_null = VVg_remle_null;
+  cPar.VVe_remle_null = VVe_remle_null;
+  cPar.VVg_mle_null = VVg_mle_null;
+  cPar.VVe_mle_null = VVe_mle_null;
 
-	cPar.beta_remle_null=beta_remle_null;
-	cPar.se_beta_remle_null=se_beta_remle_null;
-	cPar.beta_mle_null=beta_mle_null;
-	cPar.se_beta_mle_null=se_beta_mle_null;
+  cPar.beta_remle_null = beta_remle_null;
+  cPar.se_beta_remle_null = se_beta_remle_null;
+  cPar.beta_mle_null = beta_mle_null;
+  cPar.se_beta_mle_null = se_beta_mle_null;
 
-	cPar.logl_remle_H0=logl_remle_H0;
-	cPar.logl_mle_H0=logl_mle_H0;
-	return;
+  cPar.logl_remle_H0 = logl_remle_H0;
+  cPar.logl_mle_H0 = logl_mle_H0;
+  return;
 }
 
-void MVLMM::WriteFiles () {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".assoc.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl; 
-	  return;
-	}
-
-	outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"
-	       <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
-	for (size_t i=0; i<n_ph; i++) {
-		outfile<<"beta_"<<i+1<<"\t";
-	}
-	for (size_t i=0; i<n_ph; i++) {
-		for (size_t j=i; j<n_ph; j++) {
-			outfile<<"Vbeta_"<<i+1<<"_"<<j+1<<"\t";
-		}
-	}
-
-	if (a_mode==1) {
-		outfile<<"p_wald"<<endl;
-	} else if (a_mode==2) {
-		outfile<<"p_lrt"<<endl;
-	} else if (a_mode==3) {
-		outfile<<"p_score"<<endl;
-	} else if (a_mode==4) {
-		outfile<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
-	} else {}
-
-
-	size_t t=0, c=0;
-	for (size_t i=0; i<snpInfo.size(); ++i) {
-		if (indicator_snp[i]==0) {continue;}
-
-		outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
-		       <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<
-		  "\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<
-		  fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
-		outfile<<scientific<<setprecision(6);
-
-		for (size_t i=0; i<n_ph; i++) {
-			outfile<<sumStat[t].v_beta[i]<<"\t";
-		}
-
-		c=0;
-		for (size_t i=0; i<n_ph; i++) {
-			for (size_t j=i; j<n_ph; j++) {
-				outfile<<sumStat[t].v_Vbeta[c]<<"\t";
-				c++;
-			}
-		}
-
-		if (a_mode==1) {
-			outfile<<sumStat[t].p_wald <<endl;
-		} else if (a_mode==2) {
-			outfile<<sumStat[t].p_lrt<<endl;
-		} else if (a_mode==3) {
-			outfile<<sumStat[t].p_score<<endl;
-		} else if (a_mode==4) {
-			outfile<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<
-			  "\t"<<sumStat[t].p_score<<endl;
-		} else {}
-
-		t++;
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+void MVLMM::WriteFiles() {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".assoc.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  outfile << "chr"
+          << "\t"
+          << "rs"
+          << "\t"
+          << "ps"
+          << "\t"
+          << "n_miss"
+          << "\t"
+          << "allele1"
+          << "\t"
+          << "allele0"
+          << "\t"
+          << "af"
+          << "\t";
+
+  for (size_t i = 0; i < n_ph; i++) {
+    outfile << "beta_" << i + 1 << "\t";
+  }
+  for (size_t i = 0; i < n_ph; i++) {
+    for (size_t j = i; j < n_ph; j++) {
+      outfile << "Vbeta_" << i + 1 << "_" << j + 1 << "\t";
+    }
+  }
+
+  if (a_mode == 1) {
+    outfile << "p_wald" << endl;
+  } else if (a_mode == 2) {
+    outfile << "p_lrt" << endl;
+  } else if (a_mode == 3) {
+    outfile << "p_score" << endl;
+  } else if (a_mode == 4) {
+    outfile << "p_wald"
+            << "\t"
+            << "p_lrt"
+            << "\t"
+            << "p_score" << endl;
+  } else {
+  }
+
+  size_t t = 0, c = 0;
+  for (size_t i = 0; i < snpInfo.size(); ++i) {
+    if (indicator_snp[i] == 0) {
+      continue;
+    }
+
+    outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+            << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
+            << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" << fixed
+            << setprecision(3) << snpInfo[i].maf << "\t";
+
+    outfile << scientific << setprecision(6);
+
+    for (size_t i = 0; i < n_ph; i++) {
+      outfile << sumStat[t].v_beta[i] << "\t";
+    }
+
+    c = 0;
+    for (size_t i = 0; i < n_ph; i++) {
+      for (size_t j = i; j < n_ph; j++) {
+        outfile << sumStat[t].v_Vbeta[c] << "\t";
+        c++;
+      }
+    }
+
+    if (a_mode == 1) {
+      outfile << sumStat[t].p_wald << endl;
+    } else if (a_mode == 2) {
+      outfile << sumStat[t].p_lrt << endl;
+    } else if (a_mode == 3) {
+      outfile << sumStat[t].p_score << endl;
+    } else if (a_mode == 4) {
+      outfile << sumStat[t].p_wald << "\t" << sumStat[t].p_lrt << "\t"
+              << sumStat[t].p_score << endl;
+    } else {
+    }
+
+    t++;
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
 // Below are functions for EM algorithm.
-double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, 
-		  gsl_vector *D_l, gsl_matrix *UltVeh, 
-		  gsl_matrix *UltVehi) {
-	size_t d_size=V_g->size1;
-	double d, logdet_Ve=0.0;
-
-	// Eigen decomposition of V_e.
-	gsl_matrix *Lambda=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_temp=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_h=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_hi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *VgVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *U_l=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_matrix_memcpy(V_e_temp, V_e);
-	EigenDecomp(V_e_temp, U_l, D_l, 0);
-
-	// Calculate V_e_h and V_e_hi.
-	gsl_matrix_set_zero(V_e_h);
-	gsl_matrix_set_zero(V_e_hi);
-	for (size_t i=0; i<d_size; i++) {
-		d=gsl_vector_get (D_l, i);
-		if (d<=0) {continue;}
-		logdet_Ve+=log(d);
-
-		gsl_vector_view U_col=gsl_matrix_column(U_l, i);
-		d=sqrt(d);
-		gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_h);
-		d=1.0/d;
-		gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_hi);
-	}
-
-	// Copy the upper part to lower part.
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<i; j++) {
-		  gsl_matrix_set (V_e_h, i, j, gsl_matrix_get(V_e_h, j, i));
-		  gsl_matrix_set (V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i));
-		}
-	}
-
-	// Calculate Lambda=V_ehi V_g V_ehi.
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,V_g,V_e_hi,0.0,VgVehi);
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,V_e_hi,VgVehi,0.0,Lambda);
-
-	// Eigen decomposition of Lambda.
-	EigenDecomp(Lambda, U_l, D_l, 0);
-
-	for (size_t i=0; i<d_size; i++) {
-	  d=gsl_vector_get (D_l, i);
-	  if (d<0) {gsl_vector_set (D_l, i, 0);}
-	}
-
-	// Calculate UltVeh and UltVehi.
-	gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,U_l,V_e_h,0.0,UltVeh);
-	gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,U_l,V_e_hi,0.0,UltVehi);
-
-	//free memory
-	gsl_matrix_free (Lambda);
-	gsl_matrix_free (V_e_temp);
-	gsl_matrix_free (V_e_h);
-	gsl_matrix_free (V_e_hi);
-	gsl_matrix_free (VgVehi);
-	gsl_matrix_free (U_l);
-
-	return logdet_Ve;
+double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
+                 gsl_matrix *UltVeh, gsl_matrix *UltVehi) {
+  size_t d_size = V_g->size1;
+  double d, logdet_Ve = 0.0;
+
+  // Eigen decomposition of V_e.
+  gsl_matrix *Lambda = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_temp = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_h = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_hi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *VgVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *U_l = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_matrix_memcpy(V_e_temp, V_e);
+  EigenDecomp(V_e_temp, U_l, D_l, 0);
+
+  // Calculate V_e_h and V_e_hi.
+  gsl_matrix_set_zero(V_e_h);
+  gsl_matrix_set_zero(V_e_hi);
+  for (size_t i = 0; i < d_size; i++) {
+    d = gsl_vector_get(D_l, i);
+    if (d <= 0) {
+      continue;
+    }
+    logdet_Ve += log(d);
+
+    gsl_vector_view U_col = gsl_matrix_column(U_l, i);
+    d = sqrt(d);
+    gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_h);
+    d = 1.0 / d;
+    gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_hi);
+  }
+
+  // Copy the upper part to lower part.
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < i; j++) {
+      gsl_matrix_set(V_e_h, i, j, gsl_matrix_get(V_e_h, j, i));
+      gsl_matrix_set(V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i));
+    }
+  }
+
+  // Calculate Lambda=V_ehi V_g V_ehi.
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_g, V_e_hi, 0.0, VgVehi);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_e_hi, VgVehi, 0.0, Lambda);
+
+  // Eigen decomposition of Lambda.
+  EigenDecomp(Lambda, U_l, D_l, 0);
+
+  for (size_t i = 0; i < d_size; i++) {
+    d = gsl_vector_get(D_l, i);
+    if (d < 0) {
+      gsl_vector_set(D_l, i, 0);
+    }
+  }
+
+  // Calculate UltVeh and UltVehi.
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_h, 0.0, UltVeh);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_hi, 0.0, UltVehi);
+
+  // free memory
+  gsl_matrix_free(Lambda);
+  gsl_matrix_free(V_e_temp);
+  gsl_matrix_free(V_e_h);
+  gsl_matrix_free(V_e_hi);
+  gsl_matrix_free(VgVehi);
+  gsl_matrix_free(U_l);
+
+  return logdet_Ve;
 }
 
-//Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}.
-double CalcQi (const gsl_vector *eval, const gsl_vector *D_l, 
-	       const gsl_matrix *X, gsl_matrix *Qi) {
-	size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1;
-	size_t c_size=dc_size/d_size;
-
-	double delta, dl, d1, d2, d, logdet_Q;
-
-	gsl_matrix *Q=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix_set_zero (Q);
-
-	for (size_t i=0; i<c_size; i++) {
-		for (size_t j=0; j<c_size; j++) {
-			for (size_t l=0; l<d_size; l++) {
-				dl=gsl_vector_get(D_l, l);
-
-				if (j<i) {
-				  d=gsl_matrix_get (Q, j*d_size+l, i*d_size+l);
-				} else {
-					d=0.0;
-					for (size_t k=0; k<n_size; k++) {
-					  d1=gsl_matrix_get(X, i, k);
-					  d2=gsl_matrix_get(X, j, k);
-					  delta=gsl_vector_get(eval, k);
-					  d+=d1*d2/(dl*delta+1.0);
-					}
-				}
-
-				gsl_matrix_set (Q, i*d_size+l, j*d_size+l, d);
-			}
-		}
-	}
-
-	// Calculate LU decomposition of Q, and invert Q and calculate |Q|.
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (dc_size);
-	LUDecomp (Q, pmt, &sig);
-	LUInvert (Q, pmt, Qi);
-
-	logdet_Q=LULndet (Q);
-
-	gsl_matrix_free (Q);
-	gsl_permutation_free (pmt);
-
-	return logdet_Q;
+// Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}.
+double CalcQi(const gsl_vector *eval, const gsl_vector *D_l,
+              const gsl_matrix *X, gsl_matrix *Qi) {
+  size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1;
+  size_t c_size = dc_size / d_size;
+
+  double delta, dl, d1, d2, d, logdet_Q;
+
+  gsl_matrix *Q = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix_set_zero(Q);
+
+  for (size_t i = 0; i < c_size; i++) {
+    for (size_t j = 0; j < c_size; j++) {
+      for (size_t l = 0; l < d_size; l++) {
+        dl = gsl_vector_get(D_l, l);
+
+        if (j < i) {
+          d = gsl_matrix_get(Q, j * d_size + l, i * d_size + l);
+        } else {
+          d = 0.0;
+          for (size_t k = 0; k < n_size; k++) {
+            d1 = gsl_matrix_get(X, i, k);
+            d2 = gsl_matrix_get(X, j, k);
+            delta = gsl_vector_get(eval, k);
+            d += d1 * d2 / (dl * delta + 1.0);
+          }
+        }
+
+        gsl_matrix_set(Q, i * d_size + l, j * d_size + l, d);
+      }
+    }
+  }
+
+  // Calculate LU decomposition of Q, and invert Q and calculate |Q|.
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(dc_size);
+  LUDecomp(Q, pmt, &sig);
+  LUInvert(Q, pmt, Qi);
+
+  logdet_Q = LULndet(Q);
+
+  gsl_matrix_free(Q);
+  gsl_permutation_free(pmt);
+
+  return logdet_Q;
 }
 
 // xHiy=\sum_{k=1}^n x_k\otimes ((delta_k*Dl+I)^{-1}Ul^TVe^{-1/2}y.
-void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l, 
-	      const gsl_matrix *X, const gsl_matrix *UltVehiY, 
-	      gsl_vector *xHiy) {
-	size_t n_size=eval->size, c_size=X->size1, d_size=D_l->size;
-
-	gsl_vector_set_zero (xHiy);
-
-	double x, delta, dl, y, d;
-	for (size_t i=0; i<d_size; i++) {
-		dl=gsl_vector_get(D_l, i);
-		for (size_t j=0; j<c_size; j++) {
-			d=0.0;
-			for (size_t k=0; k<n_size; k++) {
-				x=gsl_matrix_get(X, j, k);
-				y=gsl_matrix_get(UltVehiY, i, k);
-				delta=gsl_vector_get(eval, k);
-				d+=x*y/(delta*dl+1.0);
-			}
-			gsl_vector_set(xHiy, j*d_size+i, d);
-		}
-	}
-
-	return;
-}
+void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l,
+              const gsl_matrix *X, const gsl_matrix *UltVehiY,
+              gsl_vector *xHiy) {
+  size_t n_size = eval->size, c_size = X->size1, d_size = D_l->size;
+
+  gsl_vector_set_zero(xHiy);
+
+  double x, delta, dl, y, d;
+  for (size_t i = 0; i < d_size; i++) {
+    dl = gsl_vector_get(D_l, i);
+    for (size_t j = 0; j < c_size; j++) {
+      d = 0.0;
+      for (size_t k = 0; k < n_size; k++) {
+        x = gsl_matrix_get(X, j, k);
+        y = gsl_matrix_get(UltVehiY, i, k);
+        delta = gsl_vector_get(eval, k);
+        d += x * y / (delta * dl + 1.0);
+      }
+      gsl_vector_set(xHiy, j * d_size + i, d);
+    }
+  }
 
+  return;
+}
 
 // OmegaU=D_l/(delta Dl+I)^{-1}
 // OmegaE=delta D_l/(delta Dl+I)^{-1}
-void CalcOmega (const gsl_vector *eval, const gsl_vector *D_l, 
-gsl_matrix *OmegaU, gsl_matrix *OmegaE) {
-	size_t n_size=eval->size, d_size=D_l->size;
-	double delta, dl, d_u, d_e;
+void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l,
+               gsl_matrix *OmegaU, gsl_matrix *OmegaE) {
+  size_t n_size = eval->size, d_size = D_l->size;
+  double delta, dl, d_u, d_e;
 
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get(eval, k);
-		for (size_t i=0; i<d_size; i++) {
-			dl=gsl_vector_get(D_l, i);
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    for (size_t i = 0; i < d_size; i++) {
+      dl = gsl_vector_get(D_l, i);
 
-			d_u=dl/(delta*dl+1.0);
-			d_e=delta*d_u;
+      d_u = dl / (delta * dl + 1.0);
+      d_e = delta * d_u;
 
-			gsl_matrix_set(OmegaU, i, k, d_u);
-			gsl_matrix_set(OmegaE, i, k, d_e);
-		}
-	}
+      gsl_matrix_set(OmegaU, i, k, d_u);
+      gsl_matrix_set(OmegaE, i, k, d_e);
+    }
+  }
 
-	return;
+  return;
 }
 
-void UpdateU (const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY, 
-	      const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) {
-	gsl_matrix_memcpy (UltVehiU, UltVehiY);
-	gsl_matrix_sub (UltVehiU, UltVehiBX);
+void UpdateU(const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY,
+             const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) {
+  gsl_matrix_memcpy(UltVehiU, UltVehiY);
+  gsl_matrix_sub(UltVehiU, UltVehiBX);
 
-	gsl_matrix_mul_elements (UltVehiU, OmegaE);
-	return;
+  gsl_matrix_mul_elements(UltVehiU, OmegaE);
+  return;
 }
 
-void UpdateE (const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX, 
-	      const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) {
-	gsl_matrix_memcpy (UltVehiE, UltVehiY);
-	gsl_matrix_sub (UltVehiE, UltVehiBX);
-	gsl_matrix_sub (UltVehiE, UltVehiU);
+void UpdateE(const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX,
+             const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) {
+  gsl_matrix_memcpy(UltVehiE, UltVehiY);
+  gsl_matrix_sub(UltVehiE, UltVehiBX);
+  gsl_matrix_sub(UltVehiE, UltVehiU);
 
-	return;
+  return;
 }
 
-void UpdateL_B (const gsl_matrix *X, const gsl_matrix *XXti, 
-		const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU, 
-		gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) {
-	size_t c_size=X->size1, d_size=UltVehiY->size1;
+void UpdateL_B(const gsl_matrix *X, const gsl_matrix *XXti,
+               const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU,
+               gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) {
+  size_t c_size = X->size1, d_size = UltVehiY->size1;
 
-	gsl_matrix *YUX=gsl_matrix_alloc (d_size, c_size);
+  gsl_matrix *YUX = gsl_matrix_alloc(d_size, c_size);
 
-	gsl_matrix_memcpy (UltVehiBX, UltVehiY);
-	gsl_matrix_sub (UltVehiBX, UltVehiU);
+  gsl_matrix_memcpy(UltVehiBX, UltVehiY);
+  gsl_matrix_sub(UltVehiBX, UltVehiU);
 
-	gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,UltVehiBX,X,0.0,YUX);
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,YUX,XXti,0.0,UltVehiB);
+  gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, UltVehiBX, X, 0.0, YUX);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, YUX, XXti, 0.0, UltVehiB);
 
-	gsl_matrix_free(YUX);
+  gsl_matrix_free(YUX);
 
-	return;
+  return;
 }
 
-void UpdateRL_B (const gsl_vector *xHiy, const gsl_matrix *Qi, 
-		 gsl_matrix *UltVehiB) {
-	size_t d_size=UltVehiB->size1, c_size=UltVehiB->size2, 
-	  dc_size=Qi->size1;
+void UpdateRL_B(const gsl_vector *xHiy, const gsl_matrix *Qi,
+                gsl_matrix *UltVehiB) {
+  size_t d_size = UltVehiB->size1, c_size = UltVehiB->size2,
+         dc_size = Qi->size1;
 
-	gsl_vector *b=gsl_vector_alloc (dc_size);
+  gsl_vector *b = gsl_vector_alloc(dc_size);
 
-	// Calculate b=Qiv.
-	gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b);
+  // Calculate b=Qiv.
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b);
 
-	// Copy b to UltVehiB.
-	for (size_t i=0; i<c_size; i++) {
-		gsl_vector_view UltVehiB_col=gsl_matrix_column (UltVehiB, i);
-		gsl_vector_const_view b_subcol=
-		  gsl_vector_const_subvector (b, i*d_size, d_size);
-		gsl_vector_memcpy (&UltVehiB_col.vector, &b_subcol.vector);
-	}
+  // Copy b to UltVehiB.
+  for (size_t i = 0; i < c_size; i++) {
+    gsl_vector_view UltVehiB_col = gsl_matrix_column(UltVehiB, i);
+    gsl_vector_const_view b_subcol =
+        gsl_vector_const_subvector(b, i * d_size, d_size);
+    gsl_vector_memcpy(&UltVehiB_col.vector, &b_subcol.vector);
+  }
 
-	gsl_vector_free(b);
+  gsl_vector_free(b);
 
-	return;
+  return;
 }
 
-void UpdateV (const gsl_vector *eval, const gsl_matrix *U, 
-	      const gsl_matrix *E, const gsl_matrix *Sigma_uu, 
-	      const gsl_matrix *Sigma_ee, gsl_matrix *V_g, gsl_matrix *V_e) {
-	size_t n_size=eval->size, d_size=U->size1;
+void UpdateV(const gsl_vector *eval, const gsl_matrix *U, const gsl_matrix *E,
+             const gsl_matrix *Sigma_uu, const gsl_matrix *Sigma_ee,
+             gsl_matrix *V_g, gsl_matrix *V_e) {
+  size_t n_size = eval->size, d_size = U->size1;
 
-	gsl_matrix_set_zero (V_g);
-	gsl_matrix_set_zero (V_e);
+  gsl_matrix_set_zero(V_g);
+  gsl_matrix_set_zero(V_e);
 
-	double delta;
+  double delta;
 
-	// Calculate the first part: UD^{-1}U^T and EE^T.
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-		if (delta==0) {continue;}
+  // Calculate the first part: UD^{-1}U^T and EE^T.
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    if (delta == 0) {
+      continue;
+    }
 
-		gsl_vector_const_view U_col=gsl_matrix_const_column (U, k);
-		gsl_blas_dsyr (CblasUpper, 1.0/delta, &U_col.vector, V_g);
-	}
+    gsl_vector_const_view U_col = gsl_matrix_const_column(U, k);
+    gsl_blas_dsyr(CblasUpper, 1.0 / delta, &U_col.vector, V_g);
+  }
 
-	gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
+  gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
 
-	// Copy the upper part to lower part.
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<i; j++) {
-			gsl_matrix_set (V_g, i, j, gsl_matrix_get(V_g, j, i));
-			gsl_matrix_set (V_e, i, j, gsl_matrix_get(V_e, j, i));
-		}
-	}
+  // Copy the upper part to lower part.
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < i; j++) {
+      gsl_matrix_set(V_g, i, j, gsl_matrix_get(V_g, j, i));
+      gsl_matrix_set(V_e, i, j, gsl_matrix_get(V_e, j, i));
+    }
+  }
 
-	// Add Sigma.
-	gsl_matrix_add (V_g, Sigma_uu);
-	gsl_matrix_add (V_e, Sigma_ee);
+  // Add Sigma.
+  gsl_matrix_add(V_g, Sigma_uu);
+  gsl_matrix_add(V_e, Sigma_ee);
 
-	// Scale by 1/n.
-	gsl_matrix_scale (V_g, 1.0/(double)n_size);
-	gsl_matrix_scale (V_e, 1.0/(double)n_size);
+  // Scale by 1/n.
+  gsl_matrix_scale(V_g, 1.0 / (double)n_size);
+  gsl_matrix_scale(V_e, 1.0 / (double)n_size);
 
-	return;
+  return;
 }
 
-void CalcSigma (const char func_name, const gsl_vector *eval, 
-		const gsl_vector *D_l, const gsl_matrix *X, 
-		const gsl_matrix *OmegaU, const gsl_matrix *OmegaE, 
-		const gsl_matrix *UltVeh, const gsl_matrix *Qi, 
-		gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) {
-	if (func_name!='R' && func_name!='L' && func_name!='r' && 
-	    func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
-	    "likelihood, 'L' for log-likelihood."<<endl; 
-	  return;
-	}
-
-	size_t n_size=eval->size, c_size=X->size1;
-	size_t d_size=D_l->size, dc_size=Qi->size1;
-
-	gsl_matrix_set_zero(Sigma_uu);
-	gsl_matrix_set_zero(Sigma_ee);
-
-	double delta, dl, x, d;
-
-	// Calculate the first diagonal term.
-	gsl_vector_view Suu_diag=gsl_matrix_diagonal (Sigma_uu);
-	gsl_vector_view See_diag=gsl_matrix_diagonal (Sigma_ee);
-
-	for (size_t k=0; k<n_size; k++) {
-	  gsl_vector_const_view OmegaU_col=gsl_matrix_const_column (OmegaU, k);
-	  gsl_vector_const_view OmegaE_col=gsl_matrix_const_column (OmegaE, k);
-
-	  gsl_vector_add (&Suu_diag.vector, &OmegaU_col.vector);
-	  gsl_vector_add (&See_diag.vector, &OmegaE_col.vector);
-	}
-
-	// Calculate the second term for REML.
-	if (func_name=='R' || func_name=='r') {
-		gsl_matrix *M_u=gsl_matrix_alloc(dc_size, d_size);
-		gsl_matrix *M_e=gsl_matrix_alloc(dc_size, d_size);
-		gsl_matrix *QiM=gsl_matrix_alloc(dc_size, d_size);
-
-		gsl_matrix_set_zero(M_u);
-		gsl_matrix_set_zero(M_e);
-
-		for (size_t k=0; k<n_size; k++) {
-		  delta=gsl_vector_get(eval, k);
-		  
-		  for (size_t i=0; i<d_size; i++) {
-		    dl=gsl_vector_get(D_l, i);
-		    for (size_t j=0; j<c_size; j++) {
-		      x=gsl_matrix_get(X, j, k);
-		      d=x/(delta*dl+1.0);
-		      gsl_matrix_set(M_e, j*d_size+i, i, d);
-		      gsl_matrix_set(M_u, j*d_size+i, i, d*dl);
-		    }
-		  }
-		  gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Qi,M_u,0.0,QiM);
-		  gsl_blas_dgemm(CblasTrans,CblasNoTrans,delta,M_u,QiM,1.0,
-				 Sigma_uu);
-		  
-		  gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Qi,M_e,0.0,QiM);
-		  gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,M_e,QiM,1.0,
-				 Sigma_ee);
-		}
-		
-		gsl_matrix_free(M_u);
-		gsl_matrix_free(M_e);
-		gsl_matrix_free(QiM);
-	}
-
-	// Multiply both sides by VehUl.
-	gsl_matrix *M=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Sigma_uu,UltVeh,0.0,M);
-	gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,M,0.0,Sigma_uu);
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Sigma_ee,UltVeh,0.0,M);
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans,1.0,UltVeh,M,0.0,Sigma_ee);
-
-	gsl_matrix_free(M);
-	return;
+void CalcSigma(const char func_name, const gsl_vector *eval,
+               const gsl_vector *D_l, const gsl_matrix *X,
+               const gsl_matrix *OmegaU, const gsl_matrix *OmegaE,
+               const gsl_matrix *UltVeh, const gsl_matrix *Qi,
+               gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+         << "likelihood, 'L' for log-likelihood." << endl;
+    return;
+  }
+
+  size_t n_size = eval->size, c_size = X->size1;
+  size_t d_size = D_l->size, dc_size = Qi->size1;
+
+  gsl_matrix_set_zero(Sigma_uu);
+  gsl_matrix_set_zero(Sigma_ee);
+
+  double delta, dl, x, d;
+
+  // Calculate the first diagonal term.
+  gsl_vector_view Suu_diag = gsl_matrix_diagonal(Sigma_uu);
+  gsl_vector_view See_diag = gsl_matrix_diagonal(Sigma_ee);
+
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_vector_const_view OmegaU_col = gsl_matrix_const_column(OmegaU, k);
+    gsl_vector_const_view OmegaE_col = gsl_matrix_const_column(OmegaE, k);
+
+    gsl_vector_add(&Suu_diag.vector, &OmegaU_col.vector);
+    gsl_vector_add(&See_diag.vector, &OmegaE_col.vector);
+  }
+
+  // Calculate the second term for REML.
+  if (func_name == 'R' || func_name == 'r') {
+    gsl_matrix *M_u = gsl_matrix_alloc(dc_size, d_size);
+    gsl_matrix *M_e = gsl_matrix_alloc(dc_size, d_size);
+    gsl_matrix *QiM = gsl_matrix_alloc(dc_size, d_size);
+
+    gsl_matrix_set_zero(M_u);
+    gsl_matrix_set_zero(M_e);
+
+    for (size_t k = 0; k < n_size; k++) {
+      delta = gsl_vector_get(eval, k);
+
+      for (size_t i = 0; i < d_size; i++) {
+        dl = gsl_vector_get(D_l, i);
+        for (size_t j = 0; j < c_size; j++) {
+          x = gsl_matrix_get(X, j, k);
+          d = x / (delta * dl + 1.0);
+          gsl_matrix_set(M_e, j * d_size + i, i, d);
+          gsl_matrix_set(M_u, j * d_size + i, i, d * dl);
+        }
+      }
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_u, 0.0, QiM);
+      gsl_blas_dgemm(CblasTrans, CblasNoTrans, delta, M_u, QiM, 1.0, Sigma_uu);
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_e, 0.0, QiM);
+      gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, M_e, QiM, 1.0, Sigma_ee);
+    }
+
+    gsl_matrix_free(M_u);
+    gsl_matrix_free(M_e);
+    gsl_matrix_free(QiM);
+  }
+
+  // Multiply both sides by VehUl.
+  gsl_matrix *M = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_uu, UltVeh, 0.0, M);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_uu);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_ee, UltVeh, 0.0, M);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_ee);
+
+  gsl_matrix_free(M);
+  return;
 }
 
 // 'R' for restricted likelihood and 'L' for likelihood.
 // 'R' update B and 'L' don't.
 // only calculate -0.5*\sum_{k=1}^n|H_k|-0.5yPxy.
-double MphCalcLogL (const gsl_vector *eval, const gsl_vector *xHiy, 
-		    const gsl_vector *D_l, const gsl_matrix *UltVehiY, 
-		    const gsl_matrix *Qi) {
-	size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1;
-	double logl=0.0, delta, dl, y, d;
-
-	// Calculate yHiy+log|H_k|.
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get(eval, k);
-		for (size_t i=0; i<d_size; i++) {
-			y=gsl_matrix_get(UltVehiY, i, k);
-			dl=gsl_vector_get(D_l, i);
-			d=delta*dl+1.0;
-
-			logl+=y*y/d+log(d);
-		}
-	}
+double MphCalcLogL(const gsl_vector *eval, const gsl_vector *xHiy,
+                   const gsl_vector *D_l, const gsl_matrix *UltVehiY,
+                   const gsl_matrix *Qi) {
+  size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1;
+  double logl = 0.0, delta, dl, y, d;
+
+  // Calculate yHiy+log|H_k|.
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    for (size_t i = 0; i < d_size; i++) {
+      y = gsl_matrix_get(UltVehiY, i, k);
+      dl = gsl_vector_get(D_l, i);
+      d = delta * dl + 1.0;
+
+      logl += y * y / d + log(d);
+    }
+  }
 
-	// Calculate the rest of yPxy.
-	gsl_vector *Qiv=gsl_vector_alloc(dc_size);
+  // Calculate the rest of yPxy.
+  gsl_vector *Qiv = gsl_vector_alloc(dc_size);
 
-	gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv);
-	gsl_blas_ddot(xHiy, Qiv, &d);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv);
+  gsl_blas_ddot(xHiy, Qiv, &d);
 
-	logl-=d;
+  logl -= d;
 
-	gsl_vector_free(Qiv);
+  gsl_vector_free(Qiv);
 
-	return -0.5*logl;
+  return -0.5 * logl;
 }
 
 // Y is a dxn matrix, X is a cxn matrix, B is a dxc matrix, V_g is a
 // dxd matrix, V_e is a dxd matrix, eval is a size n vector
 //'R' for restricted likelihood and 'L' for likelihood.
-double MphEM (const char func_name, const size_t max_iter, 
-	      const double max_prec, const gsl_vector *eval, 
-	      const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *U_hat, 
-	      gsl_matrix *E_hat, gsl_matrix *OmegaU, gsl_matrix *OmegaE, 
-	      gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX, 
-	      gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, 
-	      gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B) {
-	if (func_name!='R' && func_name!='L' && 
-	    func_name!='r' && func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
-	    "likelihood, 'L' for log-likelihood."<<endl; 
-	  return 0.0;
-	}
-
-	size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
-	size_t dc_size=d_size*c_size;
-
-	gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size);
-	gsl_matrix *XXti=gsl_matrix_alloc (c_size, c_size);
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehiB=gsl_matrix_alloc (d_size, c_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *Sigma_uu=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Sigma_ee=gsl_matrix_alloc (d_size, d_size);
-	gsl_vector *xHiy=gsl_vector_alloc (dc_size);
-	gsl_permutation * pmt=gsl_permutation_alloc (c_size);
-
-	double logl_const=0.0, logl_old=0.0, logl_new=0.0;
-	double logdet_Q, logdet_Ve;
-	int sig;
-
-	// Calculate |XXt| and (XXt)^{-1}.
-	gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
-	for (size_t i=0; i<c_size; ++i) {
-		for (size_t j=0; j<i; ++j) {
-		  gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i));
-		}
-	}
-
-	LUDecomp (XXt, pmt, &sig);
-	LUInvert (XXt, pmt, XXti);
-
-	// Calculate the constant for logl.
-	if (func_name=='R' || func_name=='r') {
-	  logl_const=-0.5*(double)(n_size-c_size)*
-	    (double)d_size*log(2.0*M_PI)+0.5*(double)d_size*LULndet (XXt);
-	} else {
-	  logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI);
-	}
-
-	// Start EM.
-	for (size_t t=0; t<max_iter; t++) {
-	  logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-	  
-	  logdet_Q=CalcQi (eval, D_l, X, Qi);
-	  
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, 
-			 Y, 0.0, UltVehiY);
-	  CalcXHiY(eval, D_l, X, UltVehiY, xHiy);
-
-	  // Calculate log likelihood/restricted likelihood value, and
-	  // terminate if change is small.
-	  logl_new=logl_const+MphCalcLogL (eval, xHiy, D_l, UltVehiY, Qi) - 
-	    0.5*(double)n_size*logdet_Ve;
-	  if (func_name=='R' || func_name=='r') {
-	    logl_new+=-0.5*(logdet_Q-(double)c_size*logdet_Ve);
-	  }
-	  if (t!=0 && abs(logl_new-logl_old)<max_prec) {break;}
-	  logl_old=logl_new;
-	  
-	  CalcOmega (eval, D_l, OmegaU, OmegaE);
-
-	  // Update UltVehiB, UltVehiU.
-	  if (func_name=='R' || func_name=='r') {
-	    UpdateRL_B(xHiy, Qi, UltVehiB);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 
-			   0.0, UltVehiBX);
-	  } else if (t==0) {
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, 
-			   0.0, UltVehiB);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 
-			   0.0, UltVehiBX);
-	  }
-	  
-	  UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU);
-	  
-	  if (func_name=='L' || func_name=='l') {
-
-	    // UltVehiBX is destroyed here.
-	    UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 
-			   0.0, UltVehiBX);
-	  }
-	  
-	  UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE);
-
-	  // Calculate U_hat, E_hat and B.
-	  gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiU,
-			 0.0,U_hat);
-	  gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiE,
-			 0.0,E_hat);
-	  gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiB,0.0,B);
-	  
-	  // Calculate Sigma_uu and Sigma_ee.
-	  CalcSigma (func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, 
-		     Qi, Sigma_uu, Sigma_ee);
-	  
-	  // Update V_g and V_e.
-	  UpdateV (eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
-	}
-
-	gsl_matrix_free(XXt);
-	gsl_matrix_free(XXti);
-	gsl_vector_free(D_l);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_matrix_free(UltVehiB);
-	gsl_matrix_free(Qi);
-	gsl_matrix_free(Sigma_uu);
-	gsl_matrix_free(Sigma_ee);
-	gsl_vector_free(xHiy);
-	gsl_permutation_free(pmt);
-
-	return logl_new;
+double MphEM(const char func_name, const size_t max_iter, const double max_prec,
+             const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y,
+             gsl_matrix *U_hat, gsl_matrix *E_hat, gsl_matrix *OmegaU,
+             gsl_matrix *OmegaE, gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX,
+             gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, gsl_matrix *V_g,
+             gsl_matrix *V_e, gsl_matrix *B) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+         << "likelihood, 'L' for log-likelihood." << endl;
+    return 0.0;
+  }
+
+  size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+  size_t dc_size = d_size * c_size;
+
+  gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size);
+  gsl_matrix *XXti = gsl_matrix_alloc(c_size, c_size);
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehiB = gsl_matrix_alloc(d_size, c_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *Sigma_uu = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Sigma_ee = gsl_matrix_alloc(d_size, d_size);
+  gsl_vector *xHiy = gsl_vector_alloc(dc_size);
+  gsl_permutation *pmt = gsl_permutation_alloc(c_size);
+
+  double logl_const = 0.0, logl_old = 0.0, logl_new = 0.0;
+  double logdet_Q, logdet_Ve;
+  int sig;
+
+  // Calculate |XXt| and (XXt)^{-1}.
+  gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
+  for (size_t i = 0; i < c_size; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i));
+    }
+  }
+
+  LUDecomp(XXt, pmt, &sig);
+  LUInvert(XXt, pmt, XXti);
+
+  // Calculate the constant for logl.
+  if (func_name == 'R' || func_name == 'r') {
+    logl_const =
+        -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) +
+        0.5 * (double)d_size * LULndet(XXt);
+  } else {
+    logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI);
+  }
+
+  // Start EM.
+  for (size_t t = 0; t < max_iter; t++) {
+    logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+    logdet_Q = CalcQi(eval, D_l, X, Qi);
+
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+    CalcXHiY(eval, D_l, X, UltVehiY, xHiy);
+
+    // Calculate log likelihood/restricted likelihood value, and
+    // terminate if change is small.
+    logl_new = logl_const + MphCalcLogL(eval, xHiy, D_l, UltVehiY, Qi) -
+               0.5 * (double)n_size * logdet_Ve;
+    if (func_name == 'R' || func_name == 'r') {
+      logl_new += -0.5 * (logdet_Q - (double)c_size * logdet_Ve);
+    }
+    if (t != 0 && abs(logl_new - logl_old) < max_prec) {
+      break;
+    }
+    logl_old = logl_new;
+
+    CalcOmega(eval, D_l, OmegaU, OmegaE);
+
+    // Update UltVehiB, UltVehiU.
+    if (func_name == 'R' || func_name == 'r') {
+      UpdateRL_B(xHiy, Qi, UltVehiB);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+                     UltVehiBX);
+    } else if (t == 0) {
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, 0.0,
+                     UltVehiB);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+                     UltVehiBX);
+    }
+
+    UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU);
+
+    if (func_name == 'L' || func_name == 'l') {
+
+      // UltVehiBX is destroyed here.
+      UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+                     UltVehiBX);
+    }
+
+    UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE);
+
+    // Calculate U_hat, E_hat and B.
+    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiU, 0.0, U_hat);
+    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiE, 0.0, E_hat);
+    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiB, 0.0, B);
+
+    // Calculate Sigma_uu and Sigma_ee.
+    CalcSigma(func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, Qi, Sigma_uu,
+              Sigma_ee);
+
+    // Update V_g and V_e.
+    UpdateV(eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
+  }
+
+  gsl_matrix_free(XXt);
+  gsl_matrix_free(XXti);
+  gsl_vector_free(D_l);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_matrix_free(UltVehiB);
+  gsl_matrix_free(Qi);
+  gsl_matrix_free(Sigma_uu);
+  gsl_matrix_free(Sigma_ee);
+  gsl_vector_free(xHiy);
+  gsl_permutation_free(pmt);
+
+  return logl_new;
 }
 
 // Calculate p-value, beta (d by 1 vector) and V(beta).
-double MphCalcP (const gsl_vector *eval, const gsl_vector *x_vec, 
-		 const gsl_matrix *W, const gsl_matrix *Y, 
-		 const gsl_matrix *V_g, const gsl_matrix *V_e, 
-		 gsl_matrix *UltVehiY, gsl_vector *beta, gsl_matrix *Vbeta) {
-	size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1;
-	size_t dc_size=d_size*c_size;
-	double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value;
-
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *WHix=gsl_matrix_alloc (dc_size, d_size);
-	gsl_matrix *QiWHix=gsl_matrix_alloc(dc_size, d_size);
-
-	gsl_matrix *xPx=gsl_matrix_alloc (d_size, d_size);
-	gsl_vector *xPy=gsl_vector_alloc (d_size);
-	gsl_vector *WHiy=gsl_vector_alloc (dc_size);
-
-	gsl_matrix_set_zero (xPx);
-	gsl_matrix_set_zero (WHix);
-	gsl_vector_set_zero (xPy);
-	gsl_vector_set_zero (WHiy);
-
-	// Eigen decomposition and calculate log|Ve|.
-	logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
-	// Calculate Qi and log|Q|.
-	logdet_Q=CalcQi (eval, D_l, W, Qi);
-
-	// Calculate UltVehiY.
-	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 
-		       0.0, UltVehiY);
-
-	// Calculate WHix, WHiy, xHiy, xHix.
-	for (size_t i=0; i<d_size; i++) {
-		dl=gsl_vector_get(D_l, i);
-
-		d1=0.0; d2=0.0;
-		for (size_t k=0; k<n_size; k++) {
-			delta=gsl_vector_get(eval, k);
-			dx=gsl_vector_get(x_vec, k);
-			dy=gsl_matrix_get(UltVehiY, i, k);
-
-			d1+=dx*dy/(delta*dl+1.0);
-			d2+=dx*dx/(delta*dl+1.0);
-		}
-		gsl_vector_set (xPy, i, d1);
-		gsl_matrix_set (xPx, i, i, d2);
-
-		for (size_t j=0; j<c_size; j++) {
-			d1=0.0; d2=0.0;
-			for (size_t k=0; k<n_size; k++) {
-				delta=gsl_vector_get(eval, k);
-				dx=gsl_vector_get(x_vec, k);
-				dw=gsl_matrix_get(W, j, k);
-				dy=gsl_matrix_get(UltVehiY, i, k);
-
-				d1+=dx*dw/(delta*dl+1.0);
-				d2+=dy*dw/(delta*dl+1.0);
-			}
-			gsl_matrix_set(WHix, j*d_size+i, i, d1);
-			gsl_vector_set(WHiy, j*d_size+i, d2);
-		}
-	}
-
-	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix);
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx);
-	gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy);
-
-	// Calculate V(beta) and beta.
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (d_size);
-	LUDecomp (xPx, pmt, &sig);
-	LUSolve (xPx, pmt, xPy, D_l);
-	LUInvert (xPx, pmt, Vbeta);
-
-	// Need to multiply UltVehi on both sides or one side.
-	gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta);
-	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, 
-		       0.0, xPx);
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
-
-	// Calculate test statistic and p value.
-	gsl_blas_ddot(D_l, xPy, &d);
-
-	p_value=gsl_cdf_chisq_Q (d, (double)d_size);
-
-	gsl_vector_free(D_l);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_matrix_free(Qi);
-	gsl_matrix_free(WHix);
-	gsl_matrix_free(QiWHix);
-
-	gsl_matrix_free(xPx);
-	gsl_vector_free(xPy);
-	gsl_vector_free(WHiy);
-
-	gsl_permutation_free(pmt);
-
-	return p_value;
+double MphCalcP(const gsl_vector *eval, const gsl_vector *x_vec,
+                const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g,
+                const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_vector *beta,
+                gsl_matrix *Vbeta) {
+  size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
+  size_t dc_size = d_size * c_size;
+  double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value;
+
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *WHix = gsl_matrix_alloc(dc_size, d_size);
+  gsl_matrix *QiWHix = gsl_matrix_alloc(dc_size, d_size);
+
+  gsl_matrix *xPx = gsl_matrix_alloc(d_size, d_size);
+  gsl_vector *xPy = gsl_vector_alloc(d_size);
+  gsl_vector *WHiy = gsl_vector_alloc(dc_size);
+
+  gsl_matrix_set_zero(xPx);
+  gsl_matrix_set_zero(WHix);
+  gsl_vector_set_zero(xPy);
+  gsl_vector_set_zero(WHiy);
+
+  // Eigen decomposition and calculate log|Ve|.
+  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+  // Calculate Qi and log|Q|.
+  logdet_Q = CalcQi(eval, D_l, W, Qi);
+
+  // Calculate UltVehiY.
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+  // Calculate WHix, WHiy, xHiy, xHix.
+  for (size_t i = 0; i < d_size; i++) {
+    dl = gsl_vector_get(D_l, i);
+
+    d1 = 0.0;
+    d2 = 0.0;
+    for (size_t k = 0; k < n_size; k++) {
+      delta = gsl_vector_get(eval, k);
+      dx = gsl_vector_get(x_vec, k);
+      dy = gsl_matrix_get(UltVehiY, i, k);
+
+      d1 += dx * dy / (delta * dl + 1.0);
+      d2 += dx * dx / (delta * dl + 1.0);
+    }
+    gsl_vector_set(xPy, i, d1);
+    gsl_matrix_set(xPx, i, i, d2);
+
+    for (size_t j = 0; j < c_size; j++) {
+      d1 = 0.0;
+      d2 = 0.0;
+      for (size_t k = 0; k < n_size; k++) {
+        delta = gsl_vector_get(eval, k);
+        dx = gsl_vector_get(x_vec, k);
+        dw = gsl_matrix_get(W, j, k);
+        dy = gsl_matrix_get(UltVehiY, i, k);
+
+        d1 += dx * dw / (delta * dl + 1.0);
+        d2 += dy * dw / (delta * dl + 1.0);
+      }
+      gsl_matrix_set(WHix, j * d_size + i, i, d1);
+      gsl_vector_set(WHiy, j * d_size + i, d2);
+    }
+  }
+
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx);
+  gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy);
+
+  // Calculate V(beta) and beta.
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(d_size);
+  LUDecomp(xPx, pmt, &sig);
+  LUSolve(xPx, pmt, xPy, D_l);
+  LUInvert(xPx, pmt, Vbeta);
+
+  // Need to multiply UltVehi on both sides or one side.
+  gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, 0.0, xPx);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
+
+  // Calculate test statistic and p value.
+  gsl_blas_ddot(D_l, xPy, &d);
+
+  p_value = gsl_cdf_chisq_Q(d, (double)d_size);
+
+  gsl_vector_free(D_l);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_matrix_free(Qi);
+  gsl_matrix_free(WHix);
+  gsl_matrix_free(QiWHix);
+
+  gsl_matrix_free(xPx);
+  gsl_vector_free(xPy);
+  gsl_vector_free(WHiy);
+
+  gsl_permutation_free(pmt);
+
+  return p_value;
 }
 
 // Calculate B and its standard error (which is a matrix of the same
 // dimension as B).
-void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W, 
-		  const gsl_matrix *Y, const gsl_matrix *V_g, 
-		  const gsl_matrix *V_e, gsl_matrix *UltVehiY, 
-		  gsl_matrix *B, gsl_matrix *se_B) {
-	size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1;
-	size_t dc_size=d_size*c_size;
-	double delta, dl, d, dy, dw, logdet_Ve, logdet_Q;
-
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *Qi_temp=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_vector *WHiy=gsl_vector_alloc (dc_size);
-	gsl_vector *QiWHiy=gsl_vector_alloc (dc_size);
-	gsl_vector *beta=gsl_vector_alloc (dc_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (dc_size, dc_size);
-
-	gsl_vector_set_zero (WHiy);
-
-	// Eigen decomposition and calculate log|Ve|.
-	logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
-	// Calculate Qi and log|Q|.
-	logdet_Q=CalcQi (eval, D_l, W, Qi);
-
-	// Calculate UltVehiY.
-	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 
-		       0.0, UltVehiY);
-
-	// Calculate WHiy.
-	for (size_t i=0; i<d_size; i++) {
-		dl=gsl_vector_get(D_l, i);
-
-		for (size_t j=0; j<c_size; j++) {
-			d=0.0;
-			for (size_t k=0; k<n_size; k++) {
-				delta=gsl_vector_get(eval, k);
-				dw=gsl_matrix_get(W, j, k);
-				dy=gsl_matrix_get(UltVehiY, i, k);
-
-				d+=dy*dw/(delta*dl+1.0);
-			}
-			gsl_vector_set(WHiy, j*d_size+i, d);
-		}
-	}
-
-	gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy);
-
-	// Need to multiply I_c\otimes UltVehi on both sides or one side.
-	for (size_t i=0; i<c_size; i++) {
-	  gsl_vector_view QiWHiy_sub=
-	    gsl_vector_subvector(QiWHiy, i*d_size, d_size);
-	  gsl_vector_view beta_sub=gsl_vector_subvector(beta,i*d_size,d_size);
-	  gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0, 
-			 &beta_sub.vector);
-	  
-	  for (size_t j=0; j<c_size; j++) {
-	    gsl_matrix_view Qi_sub=
-	      gsl_matrix_submatrix (Qi, i*d_size, j*d_size, d_size, d_size);
-	    gsl_matrix_view Qitemp_sub=
-	      gsl_matrix_submatrix (Qi_temp,i*d_size,j*d_size,d_size,d_size);
-	    gsl_matrix_view Vbeta_sub=
-	      gsl_matrix_submatrix (Vbeta, i*d_size, j*d_size, d_size, d_size);
-	    
-	    if (j<i) {
-	      gsl_matrix_view Vbeta_sym=
-		gsl_matrix_submatrix(Vbeta,j*d_size,i*d_size,d_size,d_size);
-	      gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix,&Vbeta_sym.matrix);
-	    } else {
-	      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, 
-			     UltVeh, 0.0, &Qitemp_sub.matrix);
-	      gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, 
-			     &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix);
-	    }
-	  }
-	}
-
-	// Copy beta to B, and Vbeta to se_B.
-	for (size_t j=0; j<B->size2; j++) {
-	  for (size_t i=0; i<B->size1; i++) {
-	    gsl_matrix_set(B, i, j, gsl_vector_get(beta, j*d_size+i));
-	    gsl_matrix_set(se_B, i, j, 
-			   sqrt(gsl_matrix_get(Vbeta,j*d_size+i,j*d_size+i)));
-	  }
-	}
-	
-	// Free matrices.
-	gsl_vector_free(D_l);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_matrix_free(Qi);
-	gsl_matrix_free(Qi_temp);
-	gsl_vector_free(WHiy);
-	gsl_vector_free(QiWHiy);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	return;
+void MphCalcBeta(const gsl_vector *eval, const gsl_matrix *W,
+                 const gsl_matrix *Y, const gsl_matrix *V_g,
+                 const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_matrix *B,
+                 gsl_matrix *se_B) {
+  size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
+  size_t dc_size = d_size * c_size;
+  double delta, dl, d, dy, dw, logdet_Ve, logdet_Q;
+
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *Qi_temp = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_vector *WHiy = gsl_vector_alloc(dc_size);
+  gsl_vector *QiWHiy = gsl_vector_alloc(dc_size);
+  gsl_vector *beta = gsl_vector_alloc(dc_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(dc_size, dc_size);
+
+  gsl_vector_set_zero(WHiy);
+
+  // Eigen decomposition and calculate log|Ve|.
+  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+  // Calculate Qi and log|Q|.
+  logdet_Q = CalcQi(eval, D_l, W, Qi);
+
+  // Calculate UltVehiY.
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+  // Calculate WHiy.
+  for (size_t i = 0; i < d_size; i++) {
+    dl = gsl_vector_get(D_l, i);
+
+    for (size_t j = 0; j < c_size; j++) {
+      d = 0.0;
+      for (size_t k = 0; k < n_size; k++) {
+        delta = gsl_vector_get(eval, k);
+        dw = gsl_matrix_get(W, j, k);
+        dy = gsl_matrix_get(UltVehiY, i, k);
+
+        d += dy * dw / (delta * dl + 1.0);
+      }
+      gsl_vector_set(WHiy, j * d_size + i, d);
+    }
+  }
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy);
+
+  // Need to multiply I_c\otimes UltVehi on both sides or one side.
+  for (size_t i = 0; i < c_size; i++) {
+    gsl_vector_view QiWHiy_sub =
+        gsl_vector_subvector(QiWHiy, i * d_size, d_size);
+    gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size);
+    gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0,
+                   &beta_sub.vector);
+
+    for (size_t j = 0; j < c_size; j++) {
+      gsl_matrix_view Qi_sub =
+          gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size);
+      gsl_matrix_view Qitemp_sub =
+          gsl_matrix_submatrix(Qi_temp, i * d_size, j * d_size, d_size, d_size);
+      gsl_matrix_view Vbeta_sub =
+          gsl_matrix_submatrix(Vbeta, i * d_size, j * d_size, d_size, d_size);
+
+      if (j < i) {
+        gsl_matrix_view Vbeta_sym =
+            gsl_matrix_submatrix(Vbeta, j * d_size, i * d_size, d_size, d_size);
+        gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix, &Vbeta_sym.matrix);
+      } else {
+        gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh,
+                       0.0, &Qitemp_sub.matrix);
+        gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh,
+                       &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix);
+      }
+    }
+  }
+
+  // Copy beta to B, and Vbeta to se_B.
+  for (size_t j = 0; j < B->size2; j++) {
+    for (size_t i = 0; i < B->size1; i++) {
+      gsl_matrix_set(B, i, j, gsl_vector_get(beta, j * d_size + i));
+      gsl_matrix_set(se_B, i, j, sqrt(gsl_matrix_get(Vbeta, j * d_size + i,
+                                                     j * d_size + i)));
+    }
+  }
+
+  // Free matrices.
+  gsl_vector_free(D_l);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_matrix_free(Qi);
+  gsl_matrix_free(Qi_temp);
+  gsl_vector_free(WHiy);
+  gsl_vector_free(QiWHiy);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  return;
 }
 
 // Below are functions for Newton-Raphson's algorithm.
@@ -912,996 +931,962 @@ void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W,
 // Calculate all Hi and return logdet_H=\sum_{k=1}^{n}log|H_k|
 // and calculate Qi and return logdet_Q
 // and calculate yPy.
-void CalcHiQi (const gsl_vector *eval, const gsl_matrix *X,
-	       const gsl_matrix *V_g, const gsl_matrix *V_e,
-	       gsl_matrix *Hi_all, gsl_matrix *Qi, double &logdet_H,
-	       double &logdet_Q) {
-	gsl_matrix_set_zero (Hi_all);
-	gsl_matrix_set_zero (Qi);
-	logdet_H=0.0; logdet_Q=0.0;
-
-	size_t n_size=eval->size, c_size=X->size1, d_size=V_g->size1;
-	double logdet_Ve=0.0, delta, dl, d;
-
-	gsl_matrix *mat_dd=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-
-	// Calculate D_l, UltVeh and UltVehi.
-	logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
-	// Calculate each Hi and log|H_k|.
-	logdet_H=(double)n_size*logdet_Ve;
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-
-		gsl_matrix_memcpy (mat_dd, UltVehi);
-		for (size_t i=0; i<d_size; i++) {
-			dl=gsl_vector_get(D_l, i);
-			d=delta*dl+1.0;
-
-			gsl_vector_view mat_row=gsl_matrix_row (mat_dd, i);
-			gsl_vector_scale (&mat_row.vector, 1.0/d);
-
-			logdet_H+=log(d);
-		}
-
-		gsl_matrix_view Hi_k=
-		  gsl_matrix_submatrix(Hi_all, 0, k*d_size, d_size, d_size);
-		gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi,
-			       mat_dd, 0.0, &Hi_k.matrix);
-	}
-
-	// Calculate Qi, and multiply I\o times UtVeh on both side and
-	// calculate logdet_Q, don't forget to substract
-	// c_size*logdet_Ve.
-	logdet_Q=CalcQi (eval, D_l, X, Qi)-(double)c_size*logdet_Ve;
-
-	for (size_t i=0; i<c_size; i++) {
-		for (size_t j=0; j<c_size; j++) {
-		  gsl_matrix_view Qi_sub=
-		    gsl_matrix_submatrix(Qi,i*d_size,j*d_size,d_size,d_size);
-		  if (j<i) {
-		    gsl_matrix_view Qi_sym=
-		      gsl_matrix_submatrix(Qi,j*d_size,i*d_size,d_size,d_size);
-		    gsl_matrix_transpose_memcpy(&Qi_sub.matrix,&Qi_sym.matrix);
-		  } else {
-		    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-				   &Qi_sub.matrix, UltVeh, 0.0, mat_dd);
-		    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh,
-				   mat_dd, 0.0, &Qi_sub.matrix);
-		  }
-		}
-	}
-
-	// Free memory.
-	gsl_matrix_free(mat_dd);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_vector_free(D_l);
-
-	return;
+void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X,
+              const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *Hi_all,
+              gsl_matrix *Qi, double &logdet_H, double &logdet_Q) {
+  gsl_matrix_set_zero(Hi_all);
+  gsl_matrix_set_zero(Qi);
+  logdet_H = 0.0;
+  logdet_Q = 0.0;
+
+  size_t n_size = eval->size, c_size = X->size1, d_size = V_g->size1;
+  double logdet_Ve = 0.0, delta, dl, d;
+
+  gsl_matrix *mat_dd = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+
+  // Calculate D_l, UltVeh and UltVehi.
+  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+  // Calculate each Hi and log|H_k|.
+  logdet_H = (double)n_size * logdet_Ve;
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    gsl_matrix_memcpy(mat_dd, UltVehi);
+    for (size_t i = 0; i < d_size; i++) {
+      dl = gsl_vector_get(D_l, i);
+      d = delta * dl + 1.0;
+
+      gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i);
+      gsl_vector_scale(&mat_row.vector, 1.0 / d);
+
+      logdet_H += log(d);
+    }
+
+    gsl_matrix_view Hi_k =
+        gsl_matrix_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
+    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi, mat_dd, 0.0,
+                   &Hi_k.matrix);
+  }
+
+  // Calculate Qi, and multiply I\o times UtVeh on both side and
+  // calculate logdet_Q, don't forget to substract
+  // c_size*logdet_Ve.
+  logdet_Q = CalcQi(eval, D_l, X, Qi) - (double)c_size * logdet_Ve;
+
+  for (size_t i = 0; i < c_size; i++) {
+    for (size_t j = 0; j < c_size; j++) {
+      gsl_matrix_view Qi_sub =
+          gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size);
+      if (j < i) {
+        gsl_matrix_view Qi_sym =
+            gsl_matrix_submatrix(Qi, j * d_size, i * d_size, d_size, d_size);
+        gsl_matrix_transpose_memcpy(&Qi_sub.matrix, &Qi_sym.matrix);
+      } else {
+        gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh,
+                       0.0, mat_dd);
+        gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, mat_dd, 0.0,
+                       &Qi_sub.matrix);
+      }
+    }
+  }
+
+  // Free memory.
+  gsl_matrix_free(mat_dd);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_vector_free(D_l);
+
+  return;
 }
 
 // Calculate all Hiy.
-void Calc_Hiy_all (const gsl_matrix *Y, const gsl_matrix *Hi_all,
-		   gsl_matrix *Hiy_all) {
-	gsl_matrix_set_zero (Hiy_all);
+void Calc_Hiy_all(const gsl_matrix *Y, const gsl_matrix *Hi_all,
+                  gsl_matrix *Hiy_all) {
+  gsl_matrix_set_zero(Hiy_all);
 
-	size_t n_size=Y->size2, d_size=Y->size1;
+  size_t n_size = Y->size2, d_size = Y->size1;
 
-	for (size_t k=0; k<n_size; k++) {
-		gsl_matrix_const_view Hi_k=
-		  gsl_matrix_const_submatrix(Hi_all,0,k*d_size,d_size,d_size);
-		gsl_vector_const_view y_k=gsl_matrix_const_column(Y,k);
-		gsl_vector_view Hiy_k=gsl_matrix_column(Hiy_all, k);
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_matrix_const_view Hi_k =
+        gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
+    gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
+    gsl_vector_view Hiy_k = gsl_matrix_column(Hiy_all, k);
 
-		gsl_blas_dgemv(CblasNoTrans,1.0,&Hi_k.matrix,&y_k.vector,
-			       0.0,&Hiy_k.vector);
-	}
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Hi_k.matrix, &y_k.vector, 0.0,
+                   &Hiy_k.vector);
+  }
 
-	return;
+  return;
 }
 
 // Calculate all xHi.
-void Calc_xHi_all (const gsl_matrix *X, const gsl_matrix *Hi_all,
-		   gsl_matrix *xHi_all) {
-	gsl_matrix_set_zero (xHi_all);
+void Calc_xHi_all(const gsl_matrix *X, const gsl_matrix *Hi_all,
+                  gsl_matrix *xHi_all) {
+  gsl_matrix_set_zero(xHi_all);
 
-	size_t n_size=X->size2, c_size=X->size1, d_size=Hi_all->size1;
+  size_t n_size = X->size2, c_size = X->size1, d_size = Hi_all->size1;
 
-	double d;
+  double d;
 
-	for (size_t k=0; k<n_size; k++) {
-		gsl_matrix_const_view Hi_k=
-		  gsl_matrix_const_submatrix(Hi_all,0,k*d_size,d_size,d_size);
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_matrix_const_view Hi_k =
+        gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
 
-		for (size_t i=0; i<c_size; i++) {
-			d=gsl_matrix_get (X, i, k);
-			gsl_matrix_view xHi_sub=
-			  gsl_matrix_submatrix(xHi_all,i*d_size,k*d_size,
-					       d_size,d_size);
-			gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix);
-			gsl_matrix_scale(&xHi_sub.matrix, d);
-		}
-	}
+    for (size_t i = 0; i < c_size; i++) {
+      d = gsl_matrix_get(X, i, k);
+      gsl_matrix_view xHi_sub =
+          gsl_matrix_submatrix(xHi_all, i * d_size, k * d_size, d_size, d_size);
+      gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix);
+      gsl_matrix_scale(&xHi_sub.matrix, d);
+    }
+  }
 
-	return;
+  return;
 }
 
 // Calculate scalar yHiy.
-double Calc_yHiy (const gsl_matrix *Y, const gsl_matrix *Hiy_all) {
-	double yHiy=0.0, d;
-	size_t n_size=Y->size2;
+double Calc_yHiy(const gsl_matrix *Y, const gsl_matrix *Hiy_all) {
+  double yHiy = 0.0, d;
+  size_t n_size = Y->size2;
 
-	for (size_t k=0; k<n_size; k++) {
-	  gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
-	  gsl_vector_const_view Hiy_k=gsl_matrix_const_column(Hiy_all, k);
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
+    gsl_vector_const_view Hiy_k = gsl_matrix_const_column(Hiy_all, k);
 
-	  gsl_blas_ddot (&Hiy_k.vector, &y_k.vector, &d);
-	  yHiy+=d;
-	}
+    gsl_blas_ddot(&Hiy_k.vector, &y_k.vector, &d);
+    yHiy += d;
+  }
 
-	return yHiy;
+  return yHiy;
 }
 
 // Calculate the vector xHiy.
-void Calc_xHiy (const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) {
-	gsl_vector_set_zero (xHiy);
+void Calc_xHiy(const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) {
+  gsl_vector_set_zero(xHiy);
 
-	size_t n_size=Y->size2, d_size=Y->size1, dc_size=xHi->size1;
+  size_t n_size = Y->size2, d_size = Y->size1, dc_size = xHi->size1;
 
-	for (size_t k=0; k<n_size; k++) {
-		gsl_matrix_const_view xHi_k=
-		  gsl_matrix_const_submatrix(xHi,0,k*d_size,dc_size,d_size);
-		gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_matrix_const_view xHi_k =
+        gsl_matrix_const_submatrix(xHi, 0, k * d_size, dc_size, d_size);
+    gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
 
-		gsl_blas_dgemv(CblasNoTrans,1.0,&xHi_k.matrix,&y_k.vector,
-			       1.0,xHiy);
-	}
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &xHi_k.matrix, &y_k.vector, 1.0, xHiy);
+  }
 
-	return;
+  return;
 }
 
 // 0<=i,j<d_size
-size_t GetIndex (const size_t i, const size_t j, const size_t d_size) {
-	if (i>=d_size || j>=d_size) {
-	  cout<<"error in GetIndex."<<endl;
-	  return 0;
-	}
-
-	size_t s, l;
-	if (j<i) {
-	  s=j;
-	  l=i;
-	} else {
-	  s=i;
-	  l=j;
-	}
-
-	return (2*d_size-s+1)*s/2+l-s;
+size_t GetIndex(const size_t i, const size_t j, const size_t d_size) {
+  if (i >= d_size || j >= d_size) {
+    cout << "error in GetIndex." << endl;
+    return 0;
+  }
+
+  size_t s, l;
+  if (j < i) {
+    s = j;
+    l = i;
+  } else {
+    s = i;
+    l = j;
+  }
+
+  return (2 * d_size - s + 1) * s / 2 + l - s;
 }
 
-void Calc_yHiDHiy (const gsl_vector *eval, const gsl_matrix *Hiy,
-		   const size_t i, const size_t j, double &yHiDHiy_g,
-		   double &yHiDHiy_e) {
-	yHiDHiy_g=0.0;
-	yHiDHiy_e=0.0;
+void Calc_yHiDHiy(const gsl_vector *eval, const gsl_matrix *Hiy, const size_t i,
+                  const size_t j, double &yHiDHiy_g, double &yHiDHiy_e) {
+  yHiDHiy_g = 0.0;
+  yHiDHiy_e = 0.0;
 
-	size_t n_size=eval->size;
+  size_t n_size = eval->size;
 
-	double delta, d1, d2;
+  double delta, d1, d2;
 
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-		d1=gsl_matrix_get (Hiy, i, k);
-		d2=gsl_matrix_get (Hiy, j, k);
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    d1 = gsl_matrix_get(Hiy, i, k);
+    d2 = gsl_matrix_get(Hiy, j, k);
 
-		if (i==j) {
-			yHiDHiy_g+=delta*d1*d2;
-			yHiDHiy_e+=d1*d2;
-		} else {
-			yHiDHiy_g+=delta*d1*d2*2.0;
-			yHiDHiy_e+=d1*d2*2.0;
-		}
-	}
+    if (i == j) {
+      yHiDHiy_g += delta * d1 * d2;
+      yHiDHiy_e += d1 * d2;
+    } else {
+      yHiDHiy_g += delta * d1 * d2 * 2.0;
+      yHiDHiy_e += d1 * d2 * 2.0;
+    }
+  }
 
-	return;
+  return;
 }
 
-void Calc_xHiDHiy (const gsl_vector *eval, const gsl_matrix *xHi,
-		   const gsl_matrix *Hiy, const size_t i, const size_t j,
-		   gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) {
-	gsl_vector_set_zero(xHiDHiy_g);
-	gsl_vector_set_zero(xHiDHiy_e);
+void Calc_xHiDHiy(const gsl_vector *eval, const gsl_matrix *xHi,
+                  const gsl_matrix *Hiy, const size_t i, const size_t j,
+                  gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) {
+  gsl_vector_set_zero(xHiDHiy_g);
+  gsl_vector_set_zero(xHiDHiy_e);
+
+  size_t n_size = eval->size, d_size = Hiy->size1;
 
-	size_t n_size=eval->size, d_size=Hiy->size1;
+  double delta, d;
 
-	double delta, d;
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
 
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
+    gsl_vector_const_view xHi_col_i =
+        gsl_matrix_const_column(xHi, k * d_size + i);
+    d = gsl_matrix_get(Hiy, j, k);
 
-		gsl_vector_const_view xHi_col_i=
-		  gsl_matrix_const_column (xHi, k*d_size+i);
-		d=gsl_matrix_get (Hiy, j, k);
+    gsl_blas_daxpy(d * delta, &xHi_col_i.vector, xHiDHiy_g);
+    gsl_blas_daxpy(d, &xHi_col_i.vector, xHiDHiy_e);
 
-		gsl_blas_daxpy (d*delta, &xHi_col_i.vector, xHiDHiy_g);
-		gsl_blas_daxpy (d, &xHi_col_i.vector, xHiDHiy_e);
+    if (i != j) {
+      gsl_vector_const_view xHi_col_j =
+          gsl_matrix_const_column(xHi, k * d_size + j);
+      d = gsl_matrix_get(Hiy, i, k);
 
-		if (i!=j) {
-		  gsl_vector_const_view xHi_col_j=
-		    gsl_matrix_const_column (xHi, k*d_size+j);
-		  d=gsl_matrix_get (Hiy, i, k);
-		  
-		  gsl_blas_daxpy (d*delta, &xHi_col_j.vector, xHiDHiy_g);
-		  gsl_blas_daxpy (d, &xHi_col_j.vector, xHiDHiy_e);
-		}
-	}
+      gsl_blas_daxpy(d * delta, &xHi_col_j.vector, xHiDHiy_g);
+      gsl_blas_daxpy(d, &xHi_col_j.vector, xHiDHiy_e);
+    }
+  }
 
-	return;
+  return;
 }
 
-void Calc_xHiDHix (const gsl_vector *eval, const gsl_matrix *xHi,
-		   const size_t i, const size_t j, gsl_matrix *xHiDHix_g,
-		   gsl_matrix *xHiDHix_e) {
-	gsl_matrix_set_zero(xHiDHix_g);
-	gsl_matrix_set_zero(xHiDHix_e);
+void Calc_xHiDHix(const gsl_vector *eval, const gsl_matrix *xHi, const size_t i,
+                  const size_t j, gsl_matrix *xHiDHix_g,
+                  gsl_matrix *xHiDHix_e) {
+  gsl_matrix_set_zero(xHiDHix_g);
+  gsl_matrix_set_zero(xHiDHix_e);
 
-	size_t n_size=eval->size, dc_size=xHi->size1;
-	size_t d_size=xHi->size2/n_size;
+  size_t n_size = eval->size, dc_size = xHi->size1;
+  size_t d_size = xHi->size2 / n_size;
 
-	double delta;
+  double delta;
 
-	gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *mat_dcdc_t=gsl_matrix_alloc (dc_size, dc_size);
+  gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *mat_dcdc_t = gsl_matrix_alloc(dc_size, dc_size);
 
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
 
-		gsl_vector_const_view xHi_col_i=
-		  gsl_matrix_const_column (xHi, k*d_size+i);
-		gsl_vector_const_view xHi_col_j=
-		  gsl_matrix_const_column (xHi, k*d_size+j);
+    gsl_vector_const_view xHi_col_i =
+        gsl_matrix_const_column(xHi, k * d_size + i);
+    gsl_vector_const_view xHi_col_j =
+        gsl_matrix_const_column(xHi, k * d_size + j);
 
-		gsl_matrix_set_zero (mat_dcdc);
-		gsl_blas_dger(1.0,&xHi_col_i.vector,&xHi_col_j.vector,
-			      mat_dcdc);
+    gsl_matrix_set_zero(mat_dcdc);
+    gsl_blas_dger(1.0, &xHi_col_i.vector, &xHi_col_j.vector, mat_dcdc);
 
-		gsl_matrix_transpose_memcpy (mat_dcdc_t, mat_dcdc);
+    gsl_matrix_transpose_memcpy(mat_dcdc_t, mat_dcdc);
 
-		gsl_matrix_add (xHiDHix_e, mat_dcdc);
+    gsl_matrix_add(xHiDHix_e, mat_dcdc);
 
-		gsl_matrix_scale (mat_dcdc, delta);
-		gsl_matrix_add (xHiDHix_g, mat_dcdc);
+    gsl_matrix_scale(mat_dcdc, delta);
+    gsl_matrix_add(xHiDHix_g, mat_dcdc);
 
-		if (i!=j) {
-			gsl_matrix_add (xHiDHix_e, mat_dcdc_t);
+    if (i != j) {
+      gsl_matrix_add(xHiDHix_e, mat_dcdc_t);
 
-			gsl_matrix_scale (mat_dcdc_t, delta);
-			gsl_matrix_add (xHiDHix_g, mat_dcdc_t);
-		}
-	}
+      gsl_matrix_scale(mat_dcdc_t, delta);
+      gsl_matrix_add(xHiDHix_g, mat_dcdc_t);
+    }
+  }
 
-	gsl_matrix_free(mat_dcdc);
-	gsl_matrix_free(mat_dcdc_t);
+  gsl_matrix_free(mat_dcdc);
+  gsl_matrix_free(mat_dcdc_t);
 
-	return;
+  return;
 }
 
-void Calc_yHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi,
-		      const gsl_matrix *Hiy, const size_t i1,
-		      const size_t j1, const size_t i2, const size_t j2,
-		      double &yHiDHiDHiy_gg, double &yHiDHiDHiy_ee,
-		      double &yHiDHiDHiy_ge) {
-	yHiDHiDHiy_gg=0.0;
-	yHiDHiDHiy_ee=0.0;
-	yHiDHiDHiy_ge=0.0;
-
-	size_t n_size=eval->size, d_size=Hiy->size1;
-
-	double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2;
-	double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-
-		d_Hiy_i1=gsl_matrix_get (Hiy, i1, k);
-		d_Hiy_j1=gsl_matrix_get (Hiy, j1, k);
-		d_Hiy_i2=gsl_matrix_get (Hiy, i2, k);
-		d_Hiy_j2=gsl_matrix_get (Hiy, j2, k);
-
-		d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
-		d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
-		d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
-		d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
-		if (i1==j1) {
-		  yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
-		  yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
-		  yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
-		  
-		  if (i2!=j2) {
-		    yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
-		    yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
-		    yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
-		  }
-		} else {
-		  yHiDHiDHiy_gg+=
-		    delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2 +
-				 d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
-		  yHiDHiDHiy_ee+=
-		    (d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2+d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
-		  yHiDHiDHiy_ge+=
-		    delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2 +
-			   d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
-		  
-		  if (i2!=j2) {
-		    yHiDHiDHiy_gg+=
-		      delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
-				   d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
-		    yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
-				    d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
-		    yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
-					  d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
-		  }
-		}
-	}
-	
-	return;
+void Calc_yHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi,
+                     const gsl_matrix *Hiy, const size_t i1, const size_t j1,
+                     const size_t i2, const size_t j2, double &yHiDHiDHiy_gg,
+                     double &yHiDHiDHiy_ee, double &yHiDHiDHiy_ge) {
+  yHiDHiDHiy_gg = 0.0;
+  yHiDHiDHiy_ee = 0.0;
+  yHiDHiDHiy_ge = 0.0;
+
+  size_t n_size = eval->size, d_size = Hiy->size1;
+
+  double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2;
+  double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    d_Hiy_i1 = gsl_matrix_get(Hiy, i1, k);
+    d_Hiy_j1 = gsl_matrix_get(Hiy, j1, k);
+    d_Hiy_i2 = gsl_matrix_get(Hiy, i2, k);
+    d_Hiy_j2 = gsl_matrix_get(Hiy, j2, k);
+
+    d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+    d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+    d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+    d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+    if (i1 == j1) {
+      yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+      yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+      yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+
+      if (i2 != j2) {
+        yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+        yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+        yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+      }
+    } else {
+      yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 +
+                                        d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+      yHiDHiDHiy_ee +=
+          (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 + d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+      yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 +
+                                d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+
+      if (i2 != j2) {
+        yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 +
+                                          d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+        yHiDHiDHiy_ee +=
+            (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 + d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+        yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 +
+                                  d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+      }
+    }
+  }
+
+  return;
 }
 
-void Calc_xHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi,
-		      const gsl_matrix *xHi, const gsl_matrix *Hiy,
-		      const size_t i1, const size_t j1, const size_t i2,
-		      const size_t j2, gsl_vector *xHiDHiDHiy_gg,
-		      gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) {
-	gsl_vector_set_zero(xHiDHiDHiy_gg);
-	gsl_vector_set_zero(xHiDHiDHiy_ee);
-	gsl_vector_set_zero(xHiDHiDHiy_ge);
-
-	size_t n_size=eval->size, d_size=Hiy->size1;
-
-	double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2;
-	double d_Hi_j1i2, d_Hi_j1j2;
-
-	for (size_t k=0; k<n_size; k++) {
-	  delta=gsl_vector_get (eval, k);
-	  
-	  gsl_vector_const_view xHi_col_i=
-	    gsl_matrix_const_column (xHi, k*d_size+i1);
-	  gsl_vector_const_view xHi_col_j=
-	    gsl_matrix_const_column (xHi, k*d_size+j1);
-	  
-	  d_Hiy_i=gsl_matrix_get (Hiy, i2, k);
-	  d_Hiy_j=gsl_matrix_get (Hiy, j2, k);
-	  
-	  d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
-	  d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
-	  d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
-	  d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-	  
-	  if (i1==j1) {
-	    gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_gg);
-	    gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_ee);
-	    gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_ge);
-		  
-	    if (i2!=j2) {
-	      gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i,
-			      &xHi_col_i.vector, xHiDHiDHiy_gg);
-	      gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
-			      xHiDHiDHiy_ee);
-	      gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
-			      xHiDHiDHiy_ge);
-	    }
-	  } else {
-	    gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_gg);
-	    gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_ee);
-	    gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_ge);
-	    
-	    gsl_blas_daxpy (delta*delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
-			    xHiDHiDHiy_gg);
-	    gsl_blas_daxpy (d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
-			    xHiDHiDHiy_ee);
-	    gsl_blas_daxpy (delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
-			    xHiDHiDHiy_ge);
-	    
-	    if (i2!=j2) {
-	      gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i,
-			      &xHi_col_i.vector, xHiDHiDHiy_gg);
-	      gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
-			      xHiDHiDHiy_ee);
-	      gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
-			      xHiDHiDHiy_ge);
-	      
-	      gsl_blas_daxpy (delta*delta*d_Hi_i1j2*d_Hiy_i,
-			      &xHi_col_j.vector, xHiDHiDHiy_gg);
-	      gsl_blas_daxpy (d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector,
-			      xHiDHiDHiy_ee);
-	      gsl_blas_daxpy (delta*d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector,
-			      xHiDHiDHiy_ge);
-	    }
-	  }
-	}
-	
-	return;
+void Calc_xHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi,
+                     const gsl_matrix *xHi, const gsl_matrix *Hiy,
+                     const size_t i1, const size_t j1, const size_t i2,
+                     const size_t j2, gsl_vector *xHiDHiDHiy_gg,
+                     gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) {
+  gsl_vector_set_zero(xHiDHiDHiy_gg);
+  gsl_vector_set_zero(xHiDHiDHiy_ee);
+  gsl_vector_set_zero(xHiDHiDHiy_ge);
+
+  size_t n_size = eval->size, d_size = Hiy->size1;
+
+  double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2;
+  double d_Hi_j1i2, d_Hi_j1j2;
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    gsl_vector_const_view xHi_col_i =
+        gsl_matrix_const_column(xHi, k * d_size + i1);
+    gsl_vector_const_view xHi_col_j =
+        gsl_matrix_const_column(xHi, k * d_size + j1);
+
+    d_Hiy_i = gsl_matrix_get(Hiy, i2, k);
+    d_Hiy_j = gsl_matrix_get(Hiy, j2, k);
+
+    d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+    d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+    d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+    d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+    if (i1 == j1) {
+      gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+                     xHiDHiDHiy_gg);
+      gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
+      gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+                     xHiDHiDHiy_ge);
+
+      if (i2 != j2) {
+        gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+                       xHiDHiDHiy_gg);
+        gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
+        gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+                       xHiDHiDHiy_ge);
+      }
+    } else {
+      gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+                     xHiDHiDHiy_gg);
+      gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
+      gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+                     xHiDHiDHiy_ge);
+
+      gsl_blas_daxpy(delta * delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector,
+                     xHiDHiDHiy_gg);
+      gsl_blas_daxpy(d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector, xHiDHiDHiy_ee);
+      gsl_blas_daxpy(delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector,
+                     xHiDHiDHiy_ge);
+
+      if (i2 != j2) {
+        gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+                       xHiDHiDHiy_gg);
+        gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
+        gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+                       xHiDHiDHiy_ge);
+
+        gsl_blas_daxpy(delta * delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector,
+                       xHiDHiDHiy_gg);
+        gsl_blas_daxpy(d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector, xHiDHiDHiy_ee);
+        gsl_blas_daxpy(delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector,
+                       xHiDHiDHiy_ge);
+      }
+    }
+  }
+
+  return;
 }
 
+void Calc_xHiDHiDHix(const gsl_vector *eval, const gsl_matrix *Hi,
+                     const gsl_matrix *xHi, const size_t i1, const size_t j1,
+                     const size_t i2, const size_t j2,
+                     gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee,
+                     gsl_matrix *xHiDHiDHix_ge) {
+  gsl_matrix_set_zero(xHiDHiDHix_gg);
+  gsl_matrix_set_zero(xHiDHiDHix_ee);
+  gsl_matrix_set_zero(xHiDHiDHix_ge);
+
+  size_t n_size = eval->size, d_size = Hi->size1, dc_size = xHi->size1;
+
+  double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+  gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    gsl_vector_const_view xHi_col_i1 =
+        gsl_matrix_const_column(xHi, k * d_size + i1);
+    gsl_vector_const_view xHi_col_j1 =
+        gsl_matrix_const_column(xHi, k * d_size + j1);
+    gsl_vector_const_view xHi_col_i2 =
+        gsl_matrix_const_column(xHi, k * d_size + i2);
+    gsl_vector_const_view xHi_col_j2 =
+        gsl_matrix_const_column(xHi, k * d_size + j2);
+
+    d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+    d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+    d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+    d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+    if (i1 == j1) {
+      gsl_matrix_set_zero(mat_dcdc);
+      gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector,
+                    mat_dcdc);
+
+      gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+      if (i2 != j2) {
+        gsl_matrix_set_zero(mat_dcdc);
+        gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector,
+                      mat_dcdc);
+
+        gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+      }
+    } else {
+      gsl_matrix_set_zero(mat_dcdc);
+      gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector,
+                    mat_dcdc);
+
+      gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+      gsl_matrix_set_zero(mat_dcdc);
+      gsl_blas_dger(d_Hi_i1i2, &xHi_col_j1.vector, &xHi_col_j2.vector,
+                    mat_dcdc);
+
+      gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+      if (i2 != j2) {
+        gsl_matrix_set_zero(mat_dcdc);
+        gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector,
+                      mat_dcdc);
+
+        gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+        gsl_matrix_set_zero(mat_dcdc);
+        gsl_blas_dger(d_Hi_i1j2, &xHi_col_j1.vector, &xHi_col_i2.vector,
+                      mat_dcdc);
+
+        gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+      }
+    }
+  }
+
+  gsl_matrix_free(mat_dcdc);
 
-void Calc_xHiDHiDHix (const gsl_vector *eval, const gsl_matrix *Hi,
-		      const gsl_matrix *xHi, const size_t i1, const size_t j1,
-		      const size_t i2, const size_t j2,
-		      gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee,
-		      gsl_matrix *xHiDHiDHix_ge) {
-	gsl_matrix_set_zero(xHiDHiDHix_gg);
-	gsl_matrix_set_zero(xHiDHiDHix_ee);
-	gsl_matrix_set_zero(xHiDHiDHix_ge);
-
-	size_t n_size=eval->size, d_size=Hi->size1, dc_size=xHi->size1;
-
-	double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
-	gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-
-		gsl_vector_const_view xHi_col_i1=
-		  gsl_matrix_const_column (xHi, k*d_size+i1);
-		gsl_vector_const_view xHi_col_j1=
-		  gsl_matrix_const_column (xHi, k*d_size+j1);
-		gsl_vector_const_view xHi_col_i2=
-		  gsl_matrix_const_column (xHi, k*d_size+i2);
-		gsl_vector_const_view xHi_col_j2=
-		  gsl_matrix_const_column (xHi, k*d_size+j2);
-
-		d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
-		d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
-		d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
-		d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
-		if (i1==j1) {
-		  gsl_matrix_set_zero (mat_dcdc);
-		  gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector,
-				 &xHi_col_j2.vector, mat_dcdc);
-
-		  gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-		  gsl_matrix_scale(mat_dcdc, delta);
-		  gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-		  gsl_matrix_scale(mat_dcdc, delta);
-		  gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-		  
-		  if (i2!=j2) {
-		    gsl_matrix_set_zero (mat_dcdc);
-		    gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector,
-				   &xHi_col_i2.vector, mat_dcdc);
-		    
-		    gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-		    gsl_matrix_scale(mat_dcdc, delta);
-		    gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-		    gsl_matrix_scale(mat_dcdc, delta);
-		    gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-		  }
-		} else {
-		  gsl_matrix_set_zero (mat_dcdc);
-		  gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector,
-				 &xHi_col_j2.vector, mat_dcdc);
-		  
-		  gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-		  gsl_matrix_scale(mat_dcdc, delta);
-		  gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-		  gsl_matrix_scale(mat_dcdc, delta);
-		  gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-		  
-		  gsl_matrix_set_zero (mat_dcdc);
-		  gsl_blas_dger (d_Hi_i1i2, &xHi_col_j1.vector,
-				 &xHi_col_j2.vector, mat_dcdc);
-
-			gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-			gsl_matrix_scale(mat_dcdc, delta);
-			gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-			gsl_matrix_scale(mat_dcdc, delta);
-			gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
-			if (i2!=j2) {
-			  gsl_matrix_set_zero (mat_dcdc);
-			  gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector,
-					 &xHi_col_i2.vector, mat_dcdc);
-			  
-			  gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-			  gsl_matrix_scale(mat_dcdc, delta);
-			  gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-			  gsl_matrix_scale(mat_dcdc, delta);
-			  gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-			  
-			  gsl_matrix_set_zero (mat_dcdc);
-			  gsl_blas_dger (d_Hi_i1j2, &xHi_col_j1.vector,
-					 &xHi_col_i2.vector, mat_dcdc);
-			  
-			  gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-			  gsl_matrix_scale(mat_dcdc, delta);
-			  gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-			  gsl_matrix_scale(mat_dcdc, delta);
-			  gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-			}
-		}
-	}
-
-	gsl_matrix_free(mat_dcdc);
-
-	return;
+  return;
 }
 
-void Calc_traceHiD (const gsl_vector *eval, const gsl_matrix *Hi,
-		    const size_t i, const size_t j, double &tHiD_g,
-		    double &tHiD_e) {
-	tHiD_g=0.0;
-	tHiD_e=0.0;
-
-	size_t n_size=eval->size, d_size=Hi->size1;
-	double delta, d;
-
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-		d=gsl_matrix_get (Hi, j, k*d_size+i);
-
-		if (i==j) {
-			tHiD_g+=delta*d;
-			tHiD_e+=d;
-		} else {
-			tHiD_g+=delta*d*2.0;
-			tHiD_e+=d*2.0;
-		}
-	}
-
-	return;
+void Calc_traceHiD(const gsl_vector *eval, const gsl_matrix *Hi, const size_t i,
+                   const size_t j, double &tHiD_g, double &tHiD_e) {
+  tHiD_g = 0.0;
+  tHiD_e = 0.0;
+
+  size_t n_size = eval->size, d_size = Hi->size1;
+  double delta, d;
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    d = gsl_matrix_get(Hi, j, k * d_size + i);
+
+    if (i == j) {
+      tHiD_g += delta * d;
+      tHiD_e += d;
+    } else {
+      tHiD_g += delta * d * 2.0;
+      tHiD_e += d * 2.0;
+    }
+  }
+
+  return;
 }
 
-void Calc_traceHiDHiD (const gsl_vector *eval, const gsl_matrix *Hi,
-		       const size_t i1, const size_t j1, const size_t i2,
-		       const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee,
-		       double &tHiDHiD_ge) {
-	tHiDHiD_gg=0.0;
-	tHiDHiD_ee=0.0;
-	tHiDHiD_ge=0.0;
-
-	size_t n_size=eval->size, d_size=Hi->size1;
-	double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-
-		d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
-		d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
-		d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
-		d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
-		if (i1==j1) {
-		  tHiDHiD_gg+=delta*delta*d_Hi_i1j2*d_Hi_j1i2;
-		  tHiDHiD_ee+=d_Hi_i1j2*d_Hi_j1i2;
-		  tHiDHiD_ge+=delta*d_Hi_i1j2*d_Hi_j1i2;
-		  
-		  if (i2!=j2) {
-		    tHiDHiD_gg+=delta*delta*d_Hi_i1i2*d_Hi_j1j2;
-		    tHiDHiD_ee+=d_Hi_i1i2*d_Hi_j1j2;
-		    tHiDHiD_ge+=delta*d_Hi_i1i2*d_Hi_j1j2;
-		  }
-		} else {
-		  tHiDHiD_gg+=delta*delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*
-					   d_Hi_i1i2);
-		  tHiDHiD_ee+=(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
-		  tHiDHiD_ge+=delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
-		  
-		  if (i2!=j2) {
-		    tHiDHiD_gg+=delta*delta*(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*
-					     d_Hi_i1j2);
-		    tHiDHiD_ee+=(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*d_Hi_i1j2);
-		    tHiDHiD_ge+=delta*(d_Hi_i1i2*d_Hi_j1j2 +
-				       d_Hi_j1i2*d_Hi_i1j2);
-		  }
-		}
-	}
-
-	return;
+void Calc_traceHiDHiD(const gsl_vector *eval, const gsl_matrix *Hi,
+                      const size_t i1, const size_t j1, const size_t i2,
+                      const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee,
+                      double &tHiDHiD_ge) {
+  tHiDHiD_gg = 0.0;
+  tHiDHiD_ee = 0.0;
+  tHiDHiD_ge = 0.0;
+
+  size_t n_size = eval->size, d_size = Hi->size1;
+  double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+    d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+    d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+    d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+    if (i1 == j1) {
+      tHiDHiD_gg += delta * delta * d_Hi_i1j2 * d_Hi_j1i2;
+      tHiDHiD_ee += d_Hi_i1j2 * d_Hi_j1i2;
+      tHiDHiD_ge += delta * d_Hi_i1j2 * d_Hi_j1i2;
+
+      if (i2 != j2) {
+        tHiDHiD_gg += delta * delta * d_Hi_i1i2 * d_Hi_j1j2;
+        tHiDHiD_ee += d_Hi_i1i2 * d_Hi_j1j2;
+        tHiDHiD_ge += delta * d_Hi_i1i2 * d_Hi_j1j2;
+      }
+    } else {
+      tHiDHiD_gg +=
+          delta * delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+      tHiDHiD_ee += (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+      tHiDHiD_ge += delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+
+      if (i2 != j2) {
+        tHiDHiD_gg +=
+            delta * delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+        tHiDHiD_ee += (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+        tHiDHiD_ge += delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+      }
+    }
+  }
+
+  return;
 }
 
 // trace(PD) = trace((Hi-HixQixHi)D)=trace(HiD) - trace(HixQixHiD)
-void Calc_tracePD (const gsl_vector *eval, const gsl_matrix *Qi,
-		   const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g,
-		   const gsl_matrix *xHiDHix_all_e, const size_t i,
-		   const size_t j, double &tPD_g, double &tPD_e) {
-	size_t dc_size=Qi->size1, d_size=Hi->size1;
-	size_t v=GetIndex(i, j, d_size);
-
-	double d;
-
-	// Calculate the first part: trace(HiD).
-	Calc_traceHiD (eval, Hi, i, j, tPD_g, tPD_e);
-
-	// Calculate the second part: -trace(HixQixHiD).
-	for (size_t k=0; k<dc_size; k++) {
-		gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, k);
-		gsl_vector_const_view xHiDHix_g_col =
-		  gsl_matrix_const_column (xHiDHix_all_g, v*dc_size+k);
-		gsl_vector_const_view xHiDHix_e_col =
-		  gsl_matrix_const_column (xHiDHix_all_e, v*dc_size+k);
-
-		gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d);
-		tPD_g-=d;
-		gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d);
-		tPD_e-=d;
-	}
-
-	return;
+void Calc_tracePD(const gsl_vector *eval, const gsl_matrix *Qi,
+                  const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g,
+                  const gsl_matrix *xHiDHix_all_e, const size_t i,
+                  const size_t j, double &tPD_g, double &tPD_e) {
+  size_t dc_size = Qi->size1, d_size = Hi->size1;
+  size_t v = GetIndex(i, j, d_size);
+
+  double d;
+
+  // Calculate the first part: trace(HiD).
+  Calc_traceHiD(eval, Hi, i, j, tPD_g, tPD_e);
+
+  // Calculate the second part: -trace(HixQixHiD).
+  for (size_t k = 0; k < dc_size; k++) {
+    gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, k);
+    gsl_vector_const_view xHiDHix_g_col =
+        gsl_matrix_const_column(xHiDHix_all_g, v * dc_size + k);
+    gsl_vector_const_view xHiDHix_e_col =
+        gsl_matrix_const_column(xHiDHix_all_e, v * dc_size + k);
+
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d);
+    tPD_g -= d;
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d);
+    tPD_e -= d;
+  }
+
+  return;
 }
 
 // trace(PDPD) = trace((Hi-HixQixHi)D(Hi-HixQixHi)D)
 //             = trace(HiDHiD) - trace(HixQixHiDHiD)
 //               - trace(HiDHixQixHiD) + trace(HixQixHiDHixQixHiD)
-void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi,
-		     const gsl_matrix *Hi, const gsl_matrix *xHi,
-		     const gsl_matrix *QixHiDHix_all_g,
-		     const gsl_matrix *QixHiDHix_all_e,
-		     const gsl_matrix *xHiDHiDHix_all_gg,
-		     const gsl_matrix *xHiDHiDHix_all_ee,
-		     const gsl_matrix *xHiDHiDHix_all_ge,
-		     const size_t i1, const size_t j1,
-		     const size_t i2, const size_t j2,
-		     double &tPDPD_gg, double &tPDPD_ee,
-		     double &tPDPD_ge) {
-	size_t dc_size=Qi->size1, d_size=Hi->size1;
-	size_t v_size=d_size*(d_size+1)/2;
-	size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size);
-
-	double d;
-
-	// Calculate the first part: trace(HiDHiD).
-	Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee,
-			  tPDPD_ge);
-
-	// Calculate the second and third parts:
-	// -trace(HixQixHiDHiD) - trace(HiDHixQixHiD)
-	for (size_t i=0; i<dc_size; i++) {
-	  gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, i);
-	  gsl_vector_const_view xHiDHiDHix_gg_col=
-	    gsl_matrix_const_column(xHiDHiDHix_all_gg,
-				    (v1*v_size+v2)*dc_size+i);
-	  gsl_vector_const_view xHiDHiDHix_ee_col =
-	    gsl_matrix_const_column(xHiDHiDHix_all_ee,
-				    (v1*v_size+v2)*dc_size+i);
-	  gsl_vector_const_view xHiDHiDHix_ge_col =
-	    gsl_matrix_const_column(xHiDHiDHix_all_ge,
-				    (v1*v_size+v2)*dc_size+i);
-	  
-	  gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d);
-	  tPDPD_gg-=d*2.0;
-	  gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d);
-	  tPDPD_ee-=d*2.0;
-	  gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d);
-	  tPDPD_ge-=d*2.0;
-	}
-
-	// Calculate the fourth part: trace(HixQixHiDHixQixHiD).
-	for (size_t i=0; i<dc_size; i++) {
-
-	  gsl_vector_const_view QixHiDHix_g_fullrow1 =
-	    gsl_matrix_const_row (QixHiDHix_all_g, i);
-	  gsl_vector_const_view QixHiDHix_e_fullrow1 =
-	    gsl_matrix_const_row (QixHiDHix_all_e, i);
-	  gsl_vector_const_view QixHiDHix_g_row1 =
-	    gsl_vector_const_subvector (&QixHiDHix_g_fullrow1.vector,
-					v1*dc_size, dc_size);
-	  gsl_vector_const_view QixHiDHix_e_row1 =
-	    gsl_vector_const_subvector (&QixHiDHix_e_fullrow1.vector,
-					v1*dc_size, dc_size);
-	  
-	  gsl_vector_const_view QixHiDHix_g_col2 =
-	    gsl_matrix_const_column (QixHiDHix_all_g, v2*dc_size+i);
-	  gsl_vector_const_view QixHiDHix_e_col2 =
-	    gsl_matrix_const_column (QixHiDHix_all_e, v2*dc_size+i);
-	  
-	  gsl_blas_ddot(&QixHiDHix_g_row1.vector,&QixHiDHix_g_col2.vector,&d);
-	  tPDPD_gg+=d;
-	  gsl_blas_ddot(&QixHiDHix_e_row1.vector,&QixHiDHix_e_col2.vector,&d);
-	  tPDPD_ee+=d;
-	  gsl_blas_ddot(&QixHiDHix_g_row1.vector,&QixHiDHix_e_col2.vector,&d);
-	  tPDPD_ge+=d;
-	}
-
-	return;
+void Calc_tracePDPD(const gsl_vector *eval, const gsl_matrix *Qi,
+                    const gsl_matrix *Hi, const gsl_matrix *xHi,
+                    const gsl_matrix *QixHiDHix_all_g,
+                    const gsl_matrix *QixHiDHix_all_e,
+                    const gsl_matrix *xHiDHiDHix_all_gg,
+                    const gsl_matrix *xHiDHiDHix_all_ee,
+                    const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1,
+                    const size_t j1, const size_t i2, const size_t j2,
+                    double &tPDPD_gg, double &tPDPD_ee, double &tPDPD_ge) {
+  size_t dc_size = Qi->size1, d_size = Hi->size1;
+  size_t v_size = d_size * (d_size + 1) / 2;
+  size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size);
+
+  double d;
+
+  // Calculate the first part: trace(HiDHiD).
+  Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
+
+  // Calculate the second and third parts:
+  // -trace(HixQixHiDHiD) - trace(HiDHixQixHiD)
+  for (size_t i = 0; i < dc_size; i++) {
+    gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, i);
+    gsl_vector_const_view xHiDHiDHix_gg_col = gsl_matrix_const_column(
+        xHiDHiDHix_all_gg, (v1 * v_size + v2) * dc_size + i);
+    gsl_vector_const_view xHiDHiDHix_ee_col = gsl_matrix_const_column(
+        xHiDHiDHix_all_ee, (v1 * v_size + v2) * dc_size + i);
+    gsl_vector_const_view xHiDHiDHix_ge_col = gsl_matrix_const_column(
+        xHiDHiDHix_all_ge, (v1 * v_size + v2) * dc_size + i);
+
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d);
+    tPDPD_gg -= d * 2.0;
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d);
+    tPDPD_ee -= d * 2.0;
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d);
+    tPDPD_ge -= d * 2.0;
+  }
+
+  // Calculate the fourth part: trace(HixQixHiDHixQixHiD).
+  for (size_t i = 0; i < dc_size; i++) {
+
+    gsl_vector_const_view QixHiDHix_g_fullrow1 =
+        gsl_matrix_const_row(QixHiDHix_all_g, i);
+    gsl_vector_const_view QixHiDHix_e_fullrow1 =
+        gsl_matrix_const_row(QixHiDHix_all_e, i);
+    gsl_vector_const_view QixHiDHix_g_row1 = gsl_vector_const_subvector(
+        &QixHiDHix_g_fullrow1.vector, v1 * dc_size, dc_size);
+    gsl_vector_const_view QixHiDHix_e_row1 = gsl_vector_const_subvector(
+        &QixHiDHix_e_fullrow1.vector, v1 * dc_size, dc_size);
+
+    gsl_vector_const_view QixHiDHix_g_col2 =
+        gsl_matrix_const_column(QixHiDHix_all_g, v2 * dc_size + i);
+    gsl_vector_const_view QixHiDHix_e_col2 =
+        gsl_matrix_const_column(QixHiDHix_all_e, v2 * dc_size + i);
+
+    gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_g_col2.vector, &d);
+    tPDPD_gg += d;
+    gsl_blas_ddot(&QixHiDHix_e_row1.vector, &QixHiDHix_e_col2.vector, &d);
+    tPDPD_ee += d;
+    gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_e_col2.vector, &d);
+    tPDPD_ge += d;
+  }
+
+  return;
 }
 
 // Calculate (xHiDHiy) for every pair (i,j).
-void Calc_xHiDHiy_all (const gsl_vector *eval, const gsl_matrix *xHi,
-		       const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g,
-		       gsl_matrix *xHiDHiy_all_e) {
-	gsl_matrix_set_zero(xHiDHiy_all_g);
-	gsl_matrix_set_zero(xHiDHiy_all_e);
-
-	size_t d_size=Hiy->size1;
-	size_t v;
-
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<d_size; j++) {
-	    if (j<i) {continue;}
-	    v=GetIndex(i, j, d_size);
-	    
-	    gsl_vector_view xHiDHiy_g=gsl_matrix_column (xHiDHiy_all_g, v);
-	    gsl_vector_view xHiDHiy_e=gsl_matrix_column (xHiDHiy_all_e, v);
-	    
-	    Calc_xHiDHiy (eval, xHi, Hiy, i, j, &xHiDHiy_g.vector,
-			  &xHiDHiy_e.vector);
-	  }
-	}
-	return;
+void Calc_xHiDHiy_all(const gsl_vector *eval, const gsl_matrix *xHi,
+                      const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g,
+                      gsl_matrix *xHiDHiy_all_e) {
+  gsl_matrix_set_zero(xHiDHiy_all_g);
+  gsl_matrix_set_zero(xHiDHiy_all_e);
+
+  size_t d_size = Hiy->size1;
+  size_t v;
+
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < d_size; j++) {
+      if (j < i) {
+        continue;
+      }
+      v = GetIndex(i, j, d_size);
+
+      gsl_vector_view xHiDHiy_g = gsl_matrix_column(xHiDHiy_all_g, v);
+      gsl_vector_view xHiDHiy_e = gsl_matrix_column(xHiDHiy_all_e, v);
+
+      Calc_xHiDHiy(eval, xHi, Hiy, i, j, &xHiDHiy_g.vector, &xHiDHiy_e.vector);
+    }
+  }
+  return;
 }
 
 // Calculate (xHiDHix) for every pair (i,j).
-void Calc_xHiDHix_all (const gsl_vector *eval, const gsl_matrix *xHi,
-		       gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) {
+void Calc_xHiDHix_all(const gsl_vector *eval, const gsl_matrix *xHi,
+                      gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) {
   gsl_matrix_set_zero(xHiDHix_all_g);
   gsl_matrix_set_zero(xHiDHix_all_e);
-  
-  size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1;
+
+  size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1;
   size_t v;
-  
-  for (size_t i=0; i<d_size; i++) {
-    for (size_t j=0; j<d_size; j++) {
-      if (j<i) {continue;}
-      v=GetIndex(i, j, d_size);
-      
+
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < d_size; j++) {
+      if (j < i) {
+        continue;
+      }
+      v = GetIndex(i, j, d_size);
+
       gsl_matrix_view xHiDHix_g =
-	gsl_matrix_submatrix (xHiDHix_all_g, 0, v*dc_size, dc_size, dc_size);
+          gsl_matrix_submatrix(xHiDHix_all_g, 0, v * dc_size, dc_size, dc_size);
       gsl_matrix_view xHiDHix_e =
-	gsl_matrix_submatrix (xHiDHix_all_e, 0, v*dc_size, dc_size, dc_size);
-      
-      Calc_xHiDHix (eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix);
+          gsl_matrix_submatrix(xHiDHix_all_e, 0, v * dc_size, dc_size, dc_size);
+
+      Calc_xHiDHix(eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix);
     }
   }
   return;
 }
 
 // Calculate (xHiDHiy) for every pair (i,j).
-void Calc_xHiDHiDHiy_all (const size_t v_size, const gsl_vector *eval,
-			  const gsl_matrix *Hi, const gsl_matrix *xHi,
-			  const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg,
-			  gsl_matrix *xHiDHiDHiy_all_ee,
-			  gsl_matrix *xHiDHiDHiy_all_ge) {
-	gsl_matrix_set_zero(xHiDHiDHiy_all_gg);
-	gsl_matrix_set_zero(xHiDHiDHiy_all_ee);
-	gsl_matrix_set_zero(xHiDHiDHiy_all_ge);
-
-	size_t d_size=Hiy->size1;
-	size_t v1, v2;
-
-	for (size_t i1=0; i1<d_size; i1++) {
-	  for (size_t j1=0; j1<d_size; j1++) {
-	    if (j1<i1) {continue;}
-	    v1=GetIndex(i1, j1, d_size);
-	    
-	    for (size_t i2=0; i2<d_size; i2++) {
-	      for (size_t j2=0; j2<d_size; j2++) {
-		if (j2<i2) {continue;}
-		v2=GetIndex(i2, j2, d_size);
-		
-		gsl_vector_view xHiDHiDHiy_gg =
-		  gsl_matrix_column (xHiDHiDHiy_all_gg, v1*v_size+v2);
-		gsl_vector_view xHiDHiDHiy_ee =
-		  gsl_matrix_column (xHiDHiDHiy_all_ee, v1*v_size+v2);
-		gsl_vector_view xHiDHiDHiy_ge =
-		  gsl_matrix_column (xHiDHiDHiy_all_ge, v1*v_size+v2);
-		
-		Calc_xHiDHiDHiy (eval, Hi, xHi, Hiy, i1, j1, i2, j2, &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector, &xHiDHiDHiy_ge.vector);
-	      }
-	    }
-	  }
-	}
-	return;
+void Calc_xHiDHiDHiy_all(const size_t v_size, const gsl_vector *eval,
+                         const gsl_matrix *Hi, const gsl_matrix *xHi,
+                         const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg,
+                         gsl_matrix *xHiDHiDHiy_all_ee,
+                         gsl_matrix *xHiDHiDHiy_all_ge) {
+  gsl_matrix_set_zero(xHiDHiDHiy_all_gg);
+  gsl_matrix_set_zero(xHiDHiDHiy_all_ee);
+  gsl_matrix_set_zero(xHiDHiDHiy_all_ge);
+
+  size_t d_size = Hiy->size1;
+  size_t v1, v2;
+
+  for (size_t i1 = 0; i1 < d_size; i1++) {
+    for (size_t j1 = 0; j1 < d_size; j1++) {
+      if (j1 < i1) {
+        continue;
+      }
+      v1 = GetIndex(i1, j1, d_size);
+
+      for (size_t i2 = 0; i2 < d_size; i2++) {
+        for (size_t j2 = 0; j2 < d_size; j2++) {
+          if (j2 < i2) {
+            continue;
+          }
+          v2 = GetIndex(i2, j2, d_size);
+
+          gsl_vector_view xHiDHiDHiy_gg =
+              gsl_matrix_column(xHiDHiDHiy_all_gg, v1 * v_size + v2);
+          gsl_vector_view xHiDHiDHiy_ee =
+              gsl_matrix_column(xHiDHiDHiy_all_ee, v1 * v_size + v2);
+          gsl_vector_view xHiDHiDHiy_ge =
+              gsl_matrix_column(xHiDHiDHiy_all_ge, v1 * v_size + v2);
+
+          Calc_xHiDHiDHiy(eval, Hi, xHi, Hiy, i1, j1, i2, j2,
+                          &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector,
+                          &xHiDHiDHiy_ge.vector);
+        }
+      }
+    }
+  }
+  return;
 }
 
 // Calculate (xHiDHix) for every pair (i,j).
-void Calc_xHiDHiDHix_all (const size_t v_size, const gsl_vector *eval,
-			  const gsl_matrix *Hi, const gsl_matrix *xHi,
-			  gsl_matrix *xHiDHiDHix_all_gg,
-			  gsl_matrix *xHiDHiDHix_all_ee,
-			  gsl_matrix *xHiDHiDHix_all_ge) {
-	gsl_matrix_set_zero(xHiDHiDHix_all_gg);
-	gsl_matrix_set_zero(xHiDHiDHix_all_ee);
-	gsl_matrix_set_zero(xHiDHiDHix_all_ge);
-
-	size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1;
-	size_t v1, v2;
-
-	for (size_t i1=0; i1<d_size; i1++) {
-	  for (size_t j1=0; j1<d_size; j1++) {
-	    if (j1<i1) {continue;}
-	    v1=GetIndex(i1, j1, d_size);
-	    
-	    for (size_t i2=0; i2<d_size; i2++) {
-	      for (size_t j2=0; j2<d_size; j2++) {
-		if (j2<i2) {continue;}
-		v2=GetIndex(i2, j2, d_size);
-		
-		if (v2<v1) {continue;}
-		
-		gsl_matrix_view xHiDHiDHix_gg1 =
-		  gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0,
-					(v1*v_size+v2)*dc_size,
-					dc_size, dc_size);
-		gsl_matrix_view xHiDHiDHix_ee1 =
-		  gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0,
-					(v1*v_size+v2)*dc_size,
-					dc_size, dc_size);
-		gsl_matrix_view xHiDHiDHix_ge1 =
-		  gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0,
-					(v1*v_size+v2)*dc_size,
-					dc_size, dc_size);
-		
-		Calc_xHiDHiDHix (eval, Hi, xHi, i1, j1, i2, j2,
-				 &xHiDHiDHix_gg1.matrix,
-				 &xHiDHiDHix_ee1.matrix,
-				 &xHiDHiDHix_ge1.matrix);
-		
-		if (v2!=v1) {
-		  gsl_matrix_view xHiDHiDHix_gg2 =
-		    gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0,
-					  (v2*v_size+v1)*dc_size,
-					  dc_size, dc_size);
-		  gsl_matrix_view xHiDHiDHix_ee2 =
-		    gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0,
-					  (v2*v_size+v1)*dc_size,
-					  dc_size, dc_size);
-		  gsl_matrix_view xHiDHiDHix_ge2 =
-		    gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0,
-					  (v2*v_size+v1)*dc_size,
-					  dc_size, dc_size);
-
-		  gsl_matrix_memcpy (&xHiDHiDHix_gg2.matrix,
-				     &xHiDHiDHix_gg1.matrix);
-		  gsl_matrix_memcpy (&xHiDHiDHix_ee2.matrix,
-				     &xHiDHiDHix_ee1.matrix);
-		  gsl_matrix_memcpy (&xHiDHiDHix_ge2.matrix,
-				     &xHiDHiDHix_ge1.matrix);
-		}
-	      }
-	    }
-	  }
-	}
-
-	return;
+void Calc_xHiDHiDHix_all(const size_t v_size, const gsl_vector *eval,
+                         const gsl_matrix *Hi, const gsl_matrix *xHi,
+                         gsl_matrix *xHiDHiDHix_all_gg,
+                         gsl_matrix *xHiDHiDHix_all_ee,
+                         gsl_matrix *xHiDHiDHix_all_ge) {
+  gsl_matrix_set_zero(xHiDHiDHix_all_gg);
+  gsl_matrix_set_zero(xHiDHiDHix_all_ee);
+  gsl_matrix_set_zero(xHiDHiDHix_all_ge);
+
+  size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1;
+  size_t v1, v2;
+
+  for (size_t i1 = 0; i1 < d_size; i1++) {
+    for (size_t j1 = 0; j1 < d_size; j1++) {
+      if (j1 < i1) {
+        continue;
+      }
+      v1 = GetIndex(i1, j1, d_size);
+
+      for (size_t i2 = 0; i2 < d_size; i2++) {
+        for (size_t j2 = 0; j2 < d_size; j2++) {
+          if (j2 < i2) {
+            continue;
+          }
+          v2 = GetIndex(i2, j2, d_size);
+
+          if (v2 < v1) {
+            continue;
+          }
+
+          gsl_matrix_view xHiDHiDHix_gg1 = gsl_matrix_submatrix(
+              xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size,
+              dc_size);
+          gsl_matrix_view xHiDHiDHix_ee1 = gsl_matrix_submatrix(
+              xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size,
+              dc_size);
+          gsl_matrix_view xHiDHiDHix_ge1 = gsl_matrix_submatrix(
+              xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size,
+              dc_size);
+
+          Calc_xHiDHiDHix(eval, Hi, xHi, i1, j1, i2, j2, &xHiDHiDHix_gg1.matrix,
+                          &xHiDHiDHix_ee1.matrix, &xHiDHiDHix_ge1.matrix);
+
+          if (v2 != v1) {
+            gsl_matrix_view xHiDHiDHix_gg2 = gsl_matrix_submatrix(
+                xHiDHiDHix_all_gg, 0, (v2 * v_size + v1) * dc_size, dc_size,
+                dc_size);
+            gsl_matrix_view xHiDHiDHix_ee2 = gsl_matrix_submatrix(
+                xHiDHiDHix_all_ee, 0, (v2 * v_size + v1) * dc_size, dc_size,
+                dc_size);
+            gsl_matrix_view xHiDHiDHix_ge2 = gsl_matrix_submatrix(
+                xHiDHiDHix_all_ge, 0, (v2 * v_size + v1) * dc_size, dc_size,
+                dc_size);
+
+            gsl_matrix_memcpy(&xHiDHiDHix_gg2.matrix, &xHiDHiDHix_gg1.matrix);
+            gsl_matrix_memcpy(&xHiDHiDHix_ee2.matrix, &xHiDHiDHix_ee1.matrix);
+            gsl_matrix_memcpy(&xHiDHiDHix_ge2.matrix, &xHiDHiDHix_ge1.matrix);
+          }
+        }
+      }
+    }
+  }
+
+  return;
 }
 
 // Calculate (xHiDHix)Qi(xHiy) for every pair (i,j).
-void Calc_xHiDHixQixHiy_all (const gsl_matrix *xHiDHix_all_g,
-			     const gsl_matrix *xHiDHix_all_e,
-			     const gsl_vector *QixHiy,
-			     gsl_matrix *xHiDHixQixHiy_all_g,
-			     gsl_matrix *xHiDHixQixHiy_all_e) {
-	size_t dc_size=xHiDHix_all_g->size1;
-	size_t v_size=xHiDHix_all_g->size2/dc_size;
-
-	for (size_t i=0; i<v_size; i++) {
-		gsl_matrix_const_view xHiDHix_g =
-		  gsl_matrix_const_submatrix (xHiDHix_all_g, 0, i*dc_size,
-					      dc_size, dc_size);
-		gsl_matrix_const_view xHiDHix_e =
-		  gsl_matrix_const_submatrix (xHiDHix_all_e, 0, i*dc_size,
-					      dc_size, dc_size);
-
-		gsl_vector_view xHiDHixQixHiy_g =
-		  gsl_matrix_column (xHiDHixQixHiy_all_g, i);
-		gsl_vector_view xHiDHixQixHiy_e =
-		  gsl_matrix_column (xHiDHixQixHiy_all_e, i);
-
-		gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_g.matrix,
-				QixHiy, 0.0, &xHiDHixQixHiy_g.vector);
-		gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_e.matrix,
-				QixHiy, 0.0, &xHiDHixQixHiy_e.vector);
-	}
-
-	return;
+void Calc_xHiDHixQixHiy_all(const gsl_matrix *xHiDHix_all_g,
+                            const gsl_matrix *xHiDHix_all_e,
+                            const gsl_vector *QixHiy,
+                            gsl_matrix *xHiDHixQixHiy_all_g,
+                            gsl_matrix *xHiDHixQixHiy_all_e) {
+  size_t dc_size = xHiDHix_all_g->size1;
+  size_t v_size = xHiDHix_all_g->size2 / dc_size;
+
+  for (size_t i = 0; i < v_size; i++) {
+    gsl_matrix_const_view xHiDHix_g = gsl_matrix_const_submatrix(
+        xHiDHix_all_g, 0, i * dc_size, dc_size, dc_size);
+    gsl_matrix_const_view xHiDHix_e = gsl_matrix_const_submatrix(
+        xHiDHix_all_e, 0, i * dc_size, dc_size, dc_size);
+
+    gsl_vector_view xHiDHixQixHiy_g = gsl_matrix_column(xHiDHixQixHiy_all_g, i);
+    gsl_vector_view xHiDHixQixHiy_e = gsl_matrix_column(xHiDHixQixHiy_all_e, i);
+
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_g.matrix, QixHiy, 0.0,
+                   &xHiDHixQixHiy_g.vector);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_e.matrix, QixHiy, 0.0,
+                   &xHiDHixQixHiy_e.vector);
+  }
+
+  return;
 }
 
 // Calculate Qi(xHiDHiy) and Qi(xHiDHix)Qi(xHiy) for each pair of i,j (i<=j).
-void Calc_QiVec_all (const gsl_matrix *Qi, const gsl_matrix *vec_all_g,
-		     const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g,
-		     gsl_matrix *Qivec_all_e) {
-	for (size_t i=0; i<vec_all_g->size2; i++) {
-	  gsl_vector_const_view vec_g=gsl_matrix_const_column (vec_all_g, i);
-	  gsl_vector_const_view vec_e=gsl_matrix_const_column (vec_all_e, i);
-	  
-	  gsl_vector_view Qivec_g=gsl_matrix_column (Qivec_all_g, i);
-	  gsl_vector_view Qivec_e=gsl_matrix_column (Qivec_all_e, i);
-	  
-	  gsl_blas_dgemv(CblasNoTrans,1.0,Qi,&vec_g.vector,0.0,
-			 &Qivec_g.vector);
-	  gsl_blas_dgemv(CblasNoTrans,1.0,Qi,&vec_e.vector,0.0,
-			 &Qivec_e.vector);
-	}
-
-	return;
+void Calc_QiVec_all(const gsl_matrix *Qi, const gsl_matrix *vec_all_g,
+                    const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g,
+                    gsl_matrix *Qivec_all_e) {
+  for (size_t i = 0; i < vec_all_g->size2; i++) {
+    gsl_vector_const_view vec_g = gsl_matrix_const_column(vec_all_g, i);
+    gsl_vector_const_view vec_e = gsl_matrix_const_column(vec_all_e, i);
+
+    gsl_vector_view Qivec_g = gsl_matrix_column(Qivec_all_g, i);
+    gsl_vector_view Qivec_e = gsl_matrix_column(Qivec_all_e, i);
+
+    gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_g.vector, 0.0, &Qivec_g.vector);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_e.vector, 0.0, &Qivec_e.vector);
+  }
+
+  return;
 }
 
 // Calculate Qi(xHiDHix) for each pair of i,j (i<=j).
-void Calc_QiMat_all (const gsl_matrix *Qi, const gsl_matrix *mat_all_g,
-		     const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g,
-		     gsl_matrix *Qimat_all_e) {
-	size_t dc_size=Qi->size1;
-	size_t v_size=mat_all_g->size2/mat_all_g->size1;
-
-	for (size_t i=0; i<v_size; i++) {
-		gsl_matrix_const_view mat_g =
-		  gsl_matrix_const_submatrix (mat_all_g, 0, i*dc_size,
-					      dc_size, dc_size);
-		gsl_matrix_const_view mat_e =
-		  gsl_matrix_const_submatrix (mat_all_e, 0, i*dc_size,
-					      dc_size, dc_size);
-
-		gsl_matrix_view Qimat_g =
-		  gsl_matrix_submatrix (Qimat_all_g, 0, i*dc_size, dc_size,
-					dc_size);
-		gsl_matrix_view Qimat_e =
-		  gsl_matrix_submatrix (Qimat_all_e, 0, i*dc_size, dc_size,
-					dc_size);
-
-		gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi,
-				&mat_g.matrix, 0.0, &Qimat_g.matrix);
-		gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi,
-				&mat_e.matrix, 0.0, &Qimat_e.matrix);
-	}
-
-	return;
+void Calc_QiMat_all(const gsl_matrix *Qi, const gsl_matrix *mat_all_g,
+                    const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g,
+                    gsl_matrix *Qimat_all_e) {
+  size_t dc_size = Qi->size1;
+  size_t v_size = mat_all_g->size2 / mat_all_g->size1;
+
+  for (size_t i = 0; i < v_size; i++) {
+    gsl_matrix_const_view mat_g =
+        gsl_matrix_const_submatrix(mat_all_g, 0, i * dc_size, dc_size, dc_size);
+    gsl_matrix_const_view mat_e =
+        gsl_matrix_const_submatrix(mat_all_e, 0, i * dc_size, dc_size, dc_size);
+
+    gsl_matrix_view Qimat_g =
+        gsl_matrix_submatrix(Qimat_all_g, 0, i * dc_size, dc_size, dc_size);
+    gsl_matrix_view Qimat_e =
+        gsl_matrix_submatrix(Qimat_all_e, 0, i * dc_size, dc_size, dc_size);
+
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_g.matrix, 0.0,
+                   &Qimat_g.matrix);
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_e.matrix, 0.0,
+                   &Qimat_e.matrix);
+  }
+
+  return;
 }
 
 // Calculate yPDPy
 // yPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)y
 //       = ytHiDHiy - (yHix)Qi(xHiDHiy) - (yHiDHix)Qi(xHiy)
 //         + (yHix)Qi(xHiDHix)Qi(xtHiy)
-void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy,
-		 const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
-		 const gsl_matrix *xHiDHiy_all_e,
-		 const gsl_matrix *xHiDHixQixHiy_all_g,
-		 const gsl_matrix *xHiDHixQixHiy_all_e,
-		 const size_t i, const size_t j,
-		 double &yPDPy_g, double &yPDPy_e) {
-	size_t d_size=Hiy->size1;
-	size_t v=GetIndex(i, j, d_size);
-
-	double d;
-
-	// First part: ytHiDHiy.
-	Calc_yHiDHiy (eval, Hiy, i, j, yPDPy_g, yPDPy_e);
-
-	// Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
-	gsl_vector_const_view xHiDHiy_g =
-	  gsl_matrix_const_column (xHiDHiy_all_g, v);
-	gsl_vector_const_view xHiDHiy_e =
-	  gsl_matrix_const_column (xHiDHiy_all_e, v);
-
-	gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d);
-	yPDPy_g-=d*2.0;
-	gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d);
-	yPDPy_e-=d*2.0;
-
-	// Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy).
-	gsl_vector_const_view xHiDHixQixHiy_g =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_g, v);
-	gsl_vector_const_view xHiDHixQixHiy_e =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_e, v);
-
-	gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d);
-	yPDPy_g+=d;
-	gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d);
-	yPDPy_e+=d;
-
-	return;
+void Calc_yPDPy(const gsl_vector *eval, const gsl_matrix *Hiy,
+                const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
+                const gsl_matrix *xHiDHiy_all_e,
+                const gsl_matrix *xHiDHixQixHiy_all_g,
+                const gsl_matrix *xHiDHixQixHiy_all_e, const size_t i,
+                const size_t j, double &yPDPy_g, double &yPDPy_e) {
+  size_t d_size = Hiy->size1;
+  size_t v = GetIndex(i, j, d_size);
+
+  double d;
+
+  // First part: ytHiDHiy.
+  Calc_yHiDHiy(eval, Hiy, i, j, yPDPy_g, yPDPy_e);
+
+  // Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
+  gsl_vector_const_view xHiDHiy_g = gsl_matrix_const_column(xHiDHiy_all_g, v);
+  gsl_vector_const_view xHiDHiy_e = gsl_matrix_const_column(xHiDHiy_all_e, v);
+
+  gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d);
+  yPDPy_g -= d * 2.0;
+  gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d);
+  yPDPy_e -= d * 2.0;
+
+  // Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy).
+  gsl_vector_const_view xHiDHixQixHiy_g =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_g, v);
+  gsl_vector_const_view xHiDHixQixHiy_e =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_e, v);
+
+  gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d);
+  yPDPy_g += d;
+  gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d);
+  yPDPy_e += d;
+
+  return;
 }
 
 // calculate yPDPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)D(Hi-HixQixHi)y
@@ -1912,3445 +1897,3503 @@ void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy,
 //                     + (yHiDHix)Qi(xHiDHix)Qi(xHiy)
 //                     + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
 //                     - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy)
-void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi,
-		   const gsl_matrix *xHi, const gsl_matrix *Hiy,
-		   const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
-		   const gsl_matrix *xHiDHiy_all_e,
-		   const gsl_matrix *QixHiDHiy_all_g,
-		   const gsl_matrix *QixHiDHiy_all_e,
-		   const gsl_matrix *xHiDHixQixHiy_all_g,
-		   const gsl_matrix *xHiDHixQixHiy_all_e,
-		   const gsl_matrix *QixHiDHixQixHiy_all_g,
-		   const gsl_matrix *QixHiDHixQixHiy_all_e,
-		   const gsl_matrix *xHiDHiDHiy_all_gg,
-		   const gsl_matrix *xHiDHiDHiy_all_ee,
-		   const gsl_matrix *xHiDHiDHiy_all_ge,
-		   const gsl_matrix *xHiDHiDHix_all_gg,
-		   const gsl_matrix *xHiDHiDHix_all_ee,
-		   const gsl_matrix *xHiDHiDHix_all_ge,
-		   const size_t i1, const size_t j1, const size_t i2,
-		   const size_t j2, double &yPDPDPy_gg, double &yPDPDPy_ee,
-		   double &yPDPDPy_ge) {
-	size_t d_size=Hi->size1, dc_size=xHi->size1;
-	size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size);
-	size_t v_size=d_size*(d_size+1)/2;
-
-	double d;
-
-	gsl_vector *xHiDHiDHixQixHiy=gsl_vector_alloc (dc_size);
-
-	// First part: yHiDHiDHiy.
-	Calc_yHiDHiDHiy (eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg,
-			 yPDPDPy_ee, yPDPDPy_ge);
-
-	// Second and third parts:
-	// -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy).
-	gsl_vector_const_view xHiDHiDHiy_gg1 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_gg, v1*v_size+v2);
-	gsl_vector_const_view xHiDHiDHiy_ee1 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_ee, v1*v_size+v2);
-	gsl_vector_const_view xHiDHiDHiy_ge1 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_ge, v1*v_size+v2);
-
-	gsl_vector_const_view xHiDHiDHiy_gg2 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_gg, v2*v_size+v1);
-	gsl_vector_const_view xHiDHiDHiy_ee2 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_ee, v2*v_size+v1);
-	gsl_vector_const_view xHiDHiDHiy_ge2 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_ge, v2*v_size+v1);
-
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
-	yPDPDPy_gg-=d;
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
-	yPDPDPy_ee-=d;
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
-	yPDPDPy_ge-=d;
-
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
-	yPDPDPy_gg-=d;
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
-	yPDPDPy_ee-=d;
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
-	yPDPDPy_ge-=d;
-
-	// Fourth part: - (yHiDHix)Qi(xHiDHiy).
-	gsl_vector_const_view xHiDHiy_g1 =
-	  gsl_matrix_const_column (xHiDHiy_all_g, v1);
-	gsl_vector_const_view xHiDHiy_e1 =
-	  gsl_matrix_const_column (xHiDHiy_all_e, v1);
-	gsl_vector_const_view QixHiDHiy_g2 =
-	  gsl_matrix_const_column (QixHiDHiy_all_g, v2);
-	gsl_vector_const_view QixHiDHiy_e2 =
-	  gsl_matrix_const_column (QixHiDHiy_all_e, v2);
-
-	gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
-	yPDPDPy_gg-=d;
-	gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
-	yPDPDPy_ee-=d;
-	gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
-	yPDPDPy_ge-=d;
-
-	// Fifth and sixth parts:
-	//   + (yHix)Qi(xHiDHix)Qi(xHiDHiy) +
-	//   (yHiDHix)Qi(xHiDHix)Qi(xHiy)
-	gsl_vector_const_view QixHiDHiy_g1 =
-	  gsl_matrix_const_column (QixHiDHiy_all_g, v1);
-	gsl_vector_const_view QixHiDHiy_e1 =
-	  gsl_matrix_const_column (QixHiDHiy_all_e, v1);
-
-	gsl_vector_const_view xHiDHixQixHiy_g1 =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_g, v1);
-	gsl_vector_const_view xHiDHixQixHiy_e1 =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_e, v1);
-	gsl_vector_const_view xHiDHixQixHiy_g2 =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_g, v2);
-	gsl_vector_const_view xHiDHixQixHiy_e2 =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_e, v2);
-
-	gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
-	yPDPDPy_gg+=d;
-	gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d);
-	yPDPDPy_gg+=d;
-
-	gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
-	yPDPDPy_ee+=d;
-	gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d);
-	yPDPDPy_ee+=d;
-
-	gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
-	yPDPDPy_ge+=d;
-	gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d);
-	yPDPDPy_ge+=d;
-
-	// Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
-	gsl_matrix_const_view xHiDHiDHix_gg =
-	  gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0,
-				      (v1*v_size+v2)*dc_size,
-				      dc_size, dc_size);
-	gsl_matrix_const_view xHiDHiDHix_ee =
-	  gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0,
-				      (v1*v_size+v2)*dc_size,
-				      dc_size, dc_size);
-	gsl_matrix_const_view xHiDHiDHix_ge =
-	  gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0,
-				      (v1*v_size+v2)*dc_size,
-				      dc_size, dc_size);
-
-	gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix,
-			QixHiy, 0.0, xHiDHiDHixQixHiy);
-	gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
-	yPDPDPy_gg+=d;
-	gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix,
-			QixHiy, 0.0, xHiDHiDHixQixHiy);
-	gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
-	yPDPDPy_ee+=d;
-	gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix,
-			QixHiy, 0.0, xHiDHiDHixQixHiy);
-	gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
-	yPDPDPy_ge+=d;
-
-	// Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy).
-	gsl_vector_const_view QixHiDHixQixHiy_g1 =
-	  gsl_matrix_const_column (QixHiDHixQixHiy_all_g, v1);
-	gsl_vector_const_view QixHiDHixQixHiy_e1 =
-	  gsl_matrix_const_column (QixHiDHixQixHiy_all_e, v1);
-
-	gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector,&xHiDHixQixHiy_g2.vector,&d);
-	yPDPDPy_gg-=d;
-	gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector,&xHiDHixQixHiy_e2.vector,&d);
-	yPDPDPy_ee-=d;
-	gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector,&xHiDHixQixHiy_e2.vector,&d);
-	yPDPDPy_ge-=d;
-
-	// Free memory.
-	gsl_vector_free(xHiDHiDHixQixHiy);
-
-	return;
+void Calc_yPDPDPy(
+    const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matrix *xHi,
+    const gsl_matrix *Hiy, const gsl_vector *QixHiy,
+    const gsl_matrix *xHiDHiy_all_g, const gsl_matrix *xHiDHiy_all_e,
+    const gsl_matrix *QixHiDHiy_all_g, const gsl_matrix *QixHiDHiy_all_e,
+    const gsl_matrix *xHiDHixQixHiy_all_g,
+    const gsl_matrix *xHiDHixQixHiy_all_e,
+    const gsl_matrix *QixHiDHixQixHiy_all_g,
+    const gsl_matrix *QixHiDHixQixHiy_all_e,
+    const gsl_matrix *xHiDHiDHiy_all_gg, const gsl_matrix *xHiDHiDHiy_all_ee,
+    const gsl_matrix *xHiDHiDHiy_all_ge, const gsl_matrix *xHiDHiDHix_all_gg,
+    const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge,
+    const size_t i1, const size_t j1, const size_t i2, const size_t j2,
+    double &yPDPDPy_gg, double &yPDPDPy_ee, double &yPDPDPy_ge) {
+  size_t d_size = Hi->size1, dc_size = xHi->size1;
+  size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size);
+  size_t v_size = d_size * (d_size + 1) / 2;
+
+  double d;
+
+  gsl_vector *xHiDHiDHixQixHiy = gsl_vector_alloc(dc_size);
+
+  // First part: yHiDHiDHiy.
+  Calc_yHiDHiDHiy(eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee,
+                  yPDPDPy_ge);
+
+  // Second and third parts:
+  // -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy).
+  gsl_vector_const_view xHiDHiDHiy_gg1 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_gg, v1 * v_size + v2);
+  gsl_vector_const_view xHiDHiDHiy_ee1 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_ee, v1 * v_size + v2);
+  gsl_vector_const_view xHiDHiDHiy_ge1 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_ge, v1 * v_size + v2);
+
+  gsl_vector_const_view xHiDHiDHiy_gg2 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_gg, v2 * v_size + v1);
+  gsl_vector_const_view xHiDHiDHiy_ee2 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_ee, v2 * v_size + v1);
+  gsl_vector_const_view xHiDHiDHiy_ge2 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_ge, v2 * v_size + v1);
+
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
+  yPDPDPy_gg -= d;
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
+  yPDPDPy_ee -= d;
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
+  yPDPDPy_ge -= d;
+
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
+  yPDPDPy_gg -= d;
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
+  yPDPDPy_ee -= d;
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
+  yPDPDPy_ge -= d;
+
+  // Fourth part: - (yHiDHix)Qi(xHiDHiy).
+  gsl_vector_const_view xHiDHiy_g1 = gsl_matrix_const_column(xHiDHiy_all_g, v1);
+  gsl_vector_const_view xHiDHiy_e1 = gsl_matrix_const_column(xHiDHiy_all_e, v1);
+  gsl_vector_const_view QixHiDHiy_g2 =
+      gsl_matrix_const_column(QixHiDHiy_all_g, v2);
+  gsl_vector_const_view QixHiDHiy_e2 =
+      gsl_matrix_const_column(QixHiDHiy_all_e, v2);
+
+  gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
+  yPDPDPy_gg -= d;
+  gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
+  yPDPDPy_ee -= d;
+  gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
+  yPDPDPy_ge -= d;
+
+  // Fifth and sixth parts:
+  //   + (yHix)Qi(xHiDHix)Qi(xHiDHiy) +
+  //   (yHiDHix)Qi(xHiDHix)Qi(xHiy)
+  gsl_vector_const_view QixHiDHiy_g1 =
+      gsl_matrix_const_column(QixHiDHiy_all_g, v1);
+  gsl_vector_const_view QixHiDHiy_e1 =
+      gsl_matrix_const_column(QixHiDHiy_all_e, v1);
+
+  gsl_vector_const_view xHiDHixQixHiy_g1 =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_g, v1);
+  gsl_vector_const_view xHiDHixQixHiy_e1 =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_e, v1);
+  gsl_vector_const_view xHiDHixQixHiy_g2 =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_g, v2);
+  gsl_vector_const_view xHiDHixQixHiy_e2 =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_e, v2);
+
+  gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
+  yPDPDPy_gg += d;
+  gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d);
+  yPDPDPy_gg += d;
+
+  gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
+  yPDPDPy_ee += d;
+  gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d);
+  yPDPDPy_ee += d;
+
+  gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
+  yPDPDPy_ge += d;
+  gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d);
+  yPDPDPy_ge += d;
+
+  // Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
+  gsl_matrix_const_view xHiDHiDHix_gg = gsl_matrix_const_submatrix(
+      xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+  gsl_matrix_const_view xHiDHiDHix_ee = gsl_matrix_const_submatrix(
+      xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+  gsl_matrix_const_view xHiDHiDHix_ge = gsl_matrix_const_submatrix(
+      xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix, QixHiy, 0.0,
+                 xHiDHiDHixQixHiy);
+  gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+  yPDPDPy_gg += d;
+  gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix, QixHiy, 0.0,
+                 xHiDHiDHixQixHiy);
+  gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+  yPDPDPy_ee += d;
+  gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix, QixHiy, 0.0,
+                 xHiDHiDHixQixHiy);
+  gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+  yPDPDPy_ge += d;
+
+  // Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy).
+  gsl_vector_const_view QixHiDHixQixHiy_g1 =
+      gsl_matrix_const_column(QixHiDHixQixHiy_all_g, v1);
+  gsl_vector_const_view QixHiDHixQixHiy_e1 =
+      gsl_matrix_const_column(QixHiDHixQixHiy_all_e, v1);
+
+  gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_g2.vector, &d);
+  yPDPDPy_gg -= d;
+  gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector, &xHiDHixQixHiy_e2.vector, &d);
+  yPDPDPy_ee -= d;
+  gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_e2.vector, &d);
+  yPDPDPy_ge -= d;
+
+  // Free memory.
+  gsl_vector_free(xHiDHiDHixQixHiy);
+
+  return;
 }
 
 // Calculate Edgeworth correctation factors for small samples notation
 // and method follows Thomas J. Rothenberg, Econometirca 1984; 52 (4)
 // M=xHiDHix
-void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi,
-	      const gsl_matrix *QixHiDHix_all_g,
-	      const gsl_matrix *QixHiDHix_all_e,
-	      const gsl_matrix *xHiDHiDHix_all_gg,
-	      const gsl_matrix *xHiDHiDHix_all_ee,
-	      const gsl_matrix *xHiDHiDHix_all_ge,
-	      const size_t d_size, double &crt_a,
-	      double &crt_b, double &crt_c) {
-	crt_a=0.0; crt_b=0.0; crt_c=0.0;
-
-	size_t dc_size=Qi->size1, v_size=Hessian_inv->size1/2;
-	size_t c_size=dc_size/d_size;
-	double h_gg, h_ge, h_ee, d, B=0.0, C=0.0, D=0.0;
-	double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee;
-	double trCC_gg, trCC_ge, trCC_ee, trD_gg=0.0, trD_ge=0.0, trD_ee=0.0;
-
-	gsl_matrix *QiMQi_g1=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQi_e1=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQi_g2=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQi_e2=gsl_matrix_alloc (dc_size, dc_size);
-
-	gsl_matrix *QiMQisQisi_g1=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *QiMQisQisi_e1=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *QiMQisQisi_g2=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *QiMQisQisi_e2=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_matrix *QiMQiMQi_gg=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQiMQi_ge=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQiMQi_ee=gsl_matrix_alloc (dc_size, dc_size);
-
-	gsl_matrix *QiMMQi_gg=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMMQi_ge=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMMQi_ee=gsl_matrix_alloc (dc_size, dc_size);
-
-	gsl_matrix *Qi_si=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_matrix *M_dd=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *M_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-
-	// Invert Qi_sub to Qi_si.
-	gsl_matrix *Qi_sub=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_matrix_const_view Qi_s =
-	  gsl_matrix_const_submatrix (Qi, (c_size-1)*d_size,
-				      (c_size-1)*d_size, d_size, d_size);
-
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (d_size);
-
-	gsl_matrix_memcpy (Qi_sub, &Qi_s.matrix);
-	LUDecomp (Qi_sub, pmt, &sig);
-	LUInvert (Qi_sub, pmt, Qi_si);
-
-	gsl_permutation_free(pmt);
-	gsl_matrix_free(Qi_sub);
-
-	// Calculate correction factors.
-	for (size_t v1=0; v1<v_size; v1++) {
-	  
-	  // Calculate Qi(xHiDHix)Qi, and subpart of it.
-	  gsl_matrix_const_view QiM_g1 =
-	    gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v1*dc_size,
-					dc_size, dc_size);
-	  gsl_matrix_const_view QiM_e1 =
-	    gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v1*dc_size,
-					dc_size, dc_size);
-	  
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
-			 Qi, 0.0, QiMQi_g1);
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
-			 Qi, 0.0, QiMQi_e1);
-	  
-	  gsl_matrix_view QiMQi_g1_s =
-	    gsl_matrix_submatrix (QiMQi_g1, (c_size-1)*d_size,
-				  (c_size-1)*d_size, d_size, d_size);
-	  gsl_matrix_view QiMQi_e1_s =
-	    gsl_matrix_submatrix (QiMQi_e1, (c_size-1)*d_size,
-				  (c_size-1)*d_size, d_size, d_size);
-
-	  // Calculate trCg1 and trCe1.
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix,
-			 Qi_si, 0.0, QiMQisQisi_g1);
-	  trCg1=0.0;
-	  for (size_t k=0; k<d_size; k++) {
-	    trCg1-=gsl_matrix_get (QiMQisQisi_g1, k, k);
-	  }
-
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix,
-			 Qi_si, 0.0, QiMQisQisi_e1);
-	  trCe1=0.0;
-	  for (size_t k=0; k<d_size; k++) {
-	    trCe1-=gsl_matrix_get (QiMQisQisi_e1, k, k);
-	  }
-
-	  for (size_t v2=0; v2<v_size; v2++) {
-	    if (v2<v1) {continue;}
-	    
-	    // Calculate Qi(xHiDHix)Qi, and subpart of it.
-	    gsl_matrix_const_view QiM_g2 =
-	      gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v2*dc_size,
-					  dc_size, dc_size);
-	    gsl_matrix_const_view QiM_e2 =
-	      gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v2*dc_size,
-					  dc_size, dc_size);
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix,
-			   Qi, 0.0, QiMQi_g2);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix,
-			   Qi, 0.0, QiMQi_e2);
-	    
-	    gsl_matrix_view QiMQi_g2_s =
-	      gsl_matrix_submatrix (QiMQi_g2, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMQi_e2_s =
-	      gsl_matrix_submatrix (QiMQi_e2, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    
-	    // Calculate trCg2 and trCe2.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQi_g2_s.matrix, Qi_si, 0.0, QiMQisQisi_g2);
-	    trCg2=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCg2-=gsl_matrix_get (QiMQisQisi_g2, k, k);
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQi_e2_s.matrix, Qi_si, 0.0, QiMQisQisi_e2);
-	    trCe2=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCe2-=gsl_matrix_get (QiMQisQisi_e2, k, k);
-	    }
-	    
-	    // Calculate trCC_gg, trCC_ge, trCC_ee.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   QiMQisQisi_g1, QiMQisQisi_g2, 0.0, M_dd);
-	    trCC_gg=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCC_gg+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
-			   QiMQisQisi_e2, 0.0, M_dd);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
-			   QiMQisQisi_g2, 1.0, M_dd);
-	    trCC_ge=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCC_ge+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
-			   QiMQisQisi_e2, 0.0, M_dd);
-	    trCC_ee=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCC_ee+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    
-	    // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
-			   QiMQi_g2, 0.0, QiMQiMQi_gg);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
-			   QiMQi_e2, 0.0, QiMQiMQi_ge);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
-			   QiMQi_g2, 1.0, QiMQiMQi_ge);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
-			   QiMQi_e2, 0.0, QiMQiMQi_ee);
-	    
-	    gsl_matrix_view QiMQiMQi_gg_s =
-	      gsl_matrix_submatrix (QiMQiMQi_gg, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMQiMQi_ge_s =
-	      gsl_matrix_submatrix (QiMQiMQi_ge, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMQiMQi_ee_s =
-	      gsl_matrix_submatrix (QiMQiMQi_ee, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    
-	    // and part of trB_gg, trB_ge, trB_ee.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQiMQi_gg_s.matrix, Qi_si, 0.0, M_dd);
-	    trB_gg=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      d=gsl_matrix_get (M_dd, k, k);
-	      trB_gg-=d;
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQiMQi_ge_s.matrix, Qi_si, 0.0, M_dd);
-	    trB_ge=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      d=gsl_matrix_get (M_dd, k, k);
-	      trB_ge-=d;
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQiMQi_ee_s.matrix, Qi_si, 0.0, M_dd);
-	    trB_ee=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      d=gsl_matrix_get (M_dd, k, k);
-	      trB_ee-=d;
-	    }
-	    
-	    // Calculate Qi(xHiDHiDHix)Qi, and subpart of it.
-	    gsl_matrix_const_view MM_gg =
-	      gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0,
-					  (v1*v_size+v2)*dc_size, dc_size,
-					  dc_size);
-	    gsl_matrix_const_view MM_ge =
-	      gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0,
-					  (v1*v_size+v2)*dc_size, dc_size,
-					  dc_size);
-	    gsl_matrix_const_view MM_ee =
-	      gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0,
-					  (v1*v_size+v2)*dc_size, dc_size,
-					  dc_size);
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
-			   &MM_gg.matrix, 0.0, M_dcdc);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
-			   QiMMQi_gg);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
-			   &MM_ge.matrix, 0.0, M_dcdc);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc,
-			   Qi, 0.0, QiMMQi_ge);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
-			   &MM_ee.matrix, 0.0, M_dcdc);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi,
-			   0.0, QiMMQi_ee);
-	    
-	    gsl_matrix_view QiMMQi_gg_s =
-	      gsl_matrix_submatrix (QiMMQi_gg, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMMQi_ge_s =
-	      gsl_matrix_submatrix (QiMMQi_ge, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMMQi_ee_s =
-	      gsl_matrix_submatrix (QiMMQi_ee, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    
-	    // Calculate the other part of trB_gg, trB_ge, trB_ee.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMMQi_gg_s.matrix, Qi_si, 0.0, M_dd);
-	    for (size_t k=0; k<d_size; k++) {
-	      trB_gg+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMMQi_ge_s.matrix, Qi_si, 0.0, M_dd);
-	    for (size_t k=0; k<d_size; k++) {
-	      trB_ge+=2.0*gsl_matrix_get (M_dd, k, k);
-	    }
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMMQi_ee_s.matrix, Qi_si, 0.0, M_dd);
-	    for (size_t k=0; k<d_size; k++) {
-	      trB_ee+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    
-	    // Calculate trD_gg, trD_ge, trD_ee.
-	    trD_gg=2.0*trB_gg;
-	    trD_ge=2.0*trB_ge;
-	    trD_ee=2.0*trB_ee;
-	    
-	    //calculate B, C and D
-	    h_gg=-1.0*gsl_matrix_get (Hessian_inv, v1, v2);
-	    h_ge=-1.0*gsl_matrix_get (Hessian_inv, v1, v2+v_size);
-	    h_ee=-1.0*gsl_matrix_get (Hessian_inv, v1+v_size, v2+v_size);
-	    
-	    B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee;
-	    C+=h_gg*(trCC_gg+0.5*trCg1*trCg2) +
-	      h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2) +
-	      h_ee*(trCC_ee+0.5*trCe1*trCe2);
-	    D+=h_gg*(trCC_gg+0.5*trD_gg) +
-	      h_ge*(trCC_ge+0.5*trD_ge) + h_ee*(trCC_ee+0.5*trD_ee);
-	    
-	    if (v1!=v2) {
-	      B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee;
-	      C+=h_gg*(trCC_gg+0.5*trCg1*trCg2) +
-		h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2) +
-		h_ee*(trCC_ee+0.5*trCe1*trCe2);
-	      D+=h_gg*(trCC_gg+0.5*trD_gg) +
-		h_ge*(trCC_ge+0.5*trD_ge) +
-		h_ee*(trCC_ee+0.5*trD_ee);
-	    }
-	  }
-	}
-
-	// Calculate a, b, c from B C D.
-	crt_a=2.0*D-C;
-	crt_b=2.0*B;
-	crt_c=C;
-
-	// Free matrix memory.
-	gsl_matrix_free(QiMQi_g1);
-	gsl_matrix_free(QiMQi_e1);
-	gsl_matrix_free(QiMQi_g2);
-	gsl_matrix_free(QiMQi_e2);
-
-	gsl_matrix_free(QiMQisQisi_g1);
-	gsl_matrix_free(QiMQisQisi_e1);
-	gsl_matrix_free(QiMQisQisi_g2);
-	gsl_matrix_free(QiMQisQisi_e2);
-
-	gsl_matrix_free(QiMQiMQi_gg);
-	gsl_matrix_free(QiMQiMQi_ge);
-	gsl_matrix_free(QiMQiMQi_ee);
-
-	gsl_matrix_free(QiMMQi_gg);
-	gsl_matrix_free(QiMMQi_ge);
-	gsl_matrix_free(QiMMQi_ee);
-
-	gsl_matrix_free(Qi_si);
-
-	gsl_matrix_free(M_dd);
-	gsl_matrix_free(M_dcdc);
-
-	return;
+void CalcCRT(const gsl_matrix *Hessian_inv, const gsl_matrix *Qi,
+             const gsl_matrix *QixHiDHix_all_g,
+             const gsl_matrix *QixHiDHix_all_e,
+             const gsl_matrix *xHiDHiDHix_all_gg,
+             const gsl_matrix *xHiDHiDHix_all_ee,
+             const gsl_matrix *xHiDHiDHix_all_ge, const size_t d_size,
+             double &crt_a, double &crt_b, double &crt_c) {
+  crt_a = 0.0;
+  crt_b = 0.0;
+  crt_c = 0.0;
+
+  size_t dc_size = Qi->size1, v_size = Hessian_inv->size1 / 2;
+  size_t c_size = dc_size / d_size;
+  double h_gg, h_ge, h_ee, d, B = 0.0, C = 0.0, D = 0.0;
+  double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee;
+  double trCC_gg, trCC_ge, trCC_ee, trD_gg = 0.0, trD_ge = 0.0, trD_ee = 0.0;
+
+  gsl_matrix *QiMQi_g1 = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQi_e1 = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQi_g2 = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQi_e2 = gsl_matrix_alloc(dc_size, dc_size);
+
+  gsl_matrix *QiMQisQisi_g1 = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *QiMQisQisi_e1 = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *QiMQisQisi_g2 = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *QiMQisQisi_e2 = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_matrix *QiMQiMQi_gg = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQiMQi_ge = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQiMQi_ee = gsl_matrix_alloc(dc_size, dc_size);
+
+  gsl_matrix *QiMMQi_gg = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMMQi_ge = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMMQi_ee = gsl_matrix_alloc(dc_size, dc_size);
+
+  gsl_matrix *Qi_si = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_matrix *M_dd = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *M_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+
+  // Invert Qi_sub to Qi_si.
+  gsl_matrix *Qi_sub = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_matrix_const_view Qi_s = gsl_matrix_const_submatrix(
+      Qi, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(d_size);
+
+  gsl_matrix_memcpy(Qi_sub, &Qi_s.matrix);
+  LUDecomp(Qi_sub, pmt, &sig);
+  LUInvert(Qi_sub, pmt, Qi_si);
+
+  gsl_permutation_free(pmt);
+  gsl_matrix_free(Qi_sub);
+
+  // Calculate correction factors.
+  for (size_t v1 = 0; v1 < v_size; v1++) {
+
+    // Calculate Qi(xHiDHix)Qi, and subpart of it.
+    gsl_matrix_const_view QiM_g1 = gsl_matrix_const_submatrix(
+        QixHiDHix_all_g, 0, v1 * dc_size, dc_size, dc_size);
+    gsl_matrix_const_view QiM_e1 = gsl_matrix_const_submatrix(
+        QixHiDHix_all_e, 0, v1 * dc_size, dc_size, dc_size);
+
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, Qi, 0.0,
+                   QiMQi_g1);
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, Qi, 0.0,
+                   QiMQi_e1);
+
+    gsl_matrix_view QiMQi_g1_s = gsl_matrix_submatrix(
+        QiMQi_g1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+    gsl_matrix_view QiMQi_e1_s = gsl_matrix_submatrix(
+        QiMQi_e1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+
+    // Calculate trCg1 and trCe1.
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix, Qi_si,
+                   0.0, QiMQisQisi_g1);
+    trCg1 = 0.0;
+    for (size_t k = 0; k < d_size; k++) {
+      trCg1 -= gsl_matrix_get(QiMQisQisi_g1, k, k);
+    }
+
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix, Qi_si,
+                   0.0, QiMQisQisi_e1);
+    trCe1 = 0.0;
+    for (size_t k = 0; k < d_size; k++) {
+      trCe1 -= gsl_matrix_get(QiMQisQisi_e1, k, k);
+    }
+
+    for (size_t v2 = 0; v2 < v_size; v2++) {
+      if (v2 < v1) {
+        continue;
+      }
+
+      // Calculate Qi(xHiDHix)Qi, and subpart of it.
+      gsl_matrix_const_view QiM_g2 = gsl_matrix_const_submatrix(
+          QixHiDHix_all_g, 0, v2 * dc_size, dc_size, dc_size);
+      gsl_matrix_const_view QiM_e2 = gsl_matrix_const_submatrix(
+          QixHiDHix_all_e, 0, v2 * dc_size, dc_size, dc_size);
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix, Qi, 0.0,
+                     QiMQi_g2);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix, Qi, 0.0,
+                     QiMQi_e2);
+
+      gsl_matrix_view QiMQi_g2_s =
+          gsl_matrix_submatrix(QiMQi_g2, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMQi_e2_s =
+          gsl_matrix_submatrix(QiMQi_e2, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+
+      // Calculate trCg2 and trCe2.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g2_s.matrix, Qi_si,
+                     0.0, QiMQisQisi_g2);
+      trCg2 = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCg2 -= gsl_matrix_get(QiMQisQisi_g2, k, k);
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e2_s.matrix, Qi_si,
+                     0.0, QiMQisQisi_e2);
+      trCe2 = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCe2 -= gsl_matrix_get(QiMQisQisi_e2, k, k);
+      }
+
+      // Calculate trCC_gg, trCC_ge, trCC_ee.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
+                     QiMQisQisi_g2, 0.0, M_dd);
+      trCC_gg = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCC_gg += gsl_matrix_get(M_dd, k, k);
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
+                     QiMQisQisi_e2, 0.0, M_dd);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
+                     QiMQisQisi_g2, 1.0, M_dd);
+      trCC_ge = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCC_ge += gsl_matrix_get(M_dd, k, k);
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
+                     QiMQisQisi_e2, 0.0, M_dd);
+      trCC_ee = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCC_ee += gsl_matrix_get(M_dd, k, k);
+      }
+
+      // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_g2,
+                     0.0, QiMQiMQi_gg);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_e2,
+                     0.0, QiMQiMQi_ge);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_g2,
+                     1.0, QiMQiMQi_ge);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_e2,
+                     0.0, QiMQiMQi_ee);
+
+      gsl_matrix_view QiMQiMQi_gg_s =
+          gsl_matrix_submatrix(QiMQiMQi_gg, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMQiMQi_ge_s =
+          gsl_matrix_submatrix(QiMQiMQi_ge, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMQiMQi_ee_s =
+          gsl_matrix_submatrix(QiMQiMQi_ee, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+
+      // and part of trB_gg, trB_ge, trB_ee.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_gg_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      trB_gg = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        d = gsl_matrix_get(M_dd, k, k);
+        trB_gg -= d;
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ge_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      trB_ge = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        d = gsl_matrix_get(M_dd, k, k);
+        trB_ge -= d;
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ee_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      trB_ee = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        d = gsl_matrix_get(M_dd, k, k);
+        trB_ee -= d;
+      }
+
+      // Calculate Qi(xHiDHiDHix)Qi, and subpart of it.
+      gsl_matrix_const_view MM_gg = gsl_matrix_const_submatrix(
+          xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+      gsl_matrix_const_view MM_ge = gsl_matrix_const_submatrix(
+          xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+      gsl_matrix_const_view MM_ee = gsl_matrix_const_submatrix(
+          xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_gg.matrix, 0.0,
+                     M_dcdc);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+                     QiMMQi_gg);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ge.matrix, 0.0,
+                     M_dcdc);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+                     QiMMQi_ge);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ee.matrix, 0.0,
+                     M_dcdc);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+                     QiMMQi_ee);
+
+      gsl_matrix_view QiMMQi_gg_s =
+          gsl_matrix_submatrix(QiMMQi_gg, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMMQi_ge_s =
+          gsl_matrix_submatrix(QiMMQi_ge, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMMQi_ee_s =
+          gsl_matrix_submatrix(QiMMQi_ee, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+
+      // Calculate the other part of trB_gg, trB_ge, trB_ee.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_gg_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      for (size_t k = 0; k < d_size; k++) {
+        trB_gg += gsl_matrix_get(M_dd, k, k);
+      }
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ge_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      for (size_t k = 0; k < d_size; k++) {
+        trB_ge += 2.0 * gsl_matrix_get(M_dd, k, k);
+      }
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ee_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      for (size_t k = 0; k < d_size; k++) {
+        trB_ee += gsl_matrix_get(M_dd, k, k);
+      }
+
+      // Calculate trD_gg, trD_ge, trD_ee.
+      trD_gg = 2.0 * trB_gg;
+      trD_ge = 2.0 * trB_ge;
+      trD_ee = 2.0 * trB_ee;
+
+      // calculate B, C and D
+      h_gg = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2);
+      h_ge = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2 + v_size);
+      h_ee = -1.0 * gsl_matrix_get(Hessian_inv, v1 + v_size, v2 + v_size);
+
+      B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee;
+      C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) +
+           h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) +
+           h_ee * (trCC_ee + 0.5 * trCe1 * trCe2);
+      D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) +
+           h_ee * (trCC_ee + 0.5 * trD_ee);
+
+      if (v1 != v2) {
+        B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee;
+        C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) +
+             h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) +
+             h_ee * (trCC_ee + 0.5 * trCe1 * trCe2);
+        D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) +
+             h_ee * (trCC_ee + 0.5 * trD_ee);
+      }
+    }
+  }
+
+  // Calculate a, b, c from B C D.
+  crt_a = 2.0 * D - C;
+  crt_b = 2.0 * B;
+  crt_c = C;
+
+  // Free matrix memory.
+  gsl_matrix_free(QiMQi_g1);
+  gsl_matrix_free(QiMQi_e1);
+  gsl_matrix_free(QiMQi_g2);
+  gsl_matrix_free(QiMQi_e2);
+
+  gsl_matrix_free(QiMQisQisi_g1);
+  gsl_matrix_free(QiMQisQisi_e1);
+  gsl_matrix_free(QiMQisQisi_g2);
+  gsl_matrix_free(QiMQisQisi_e2);
+
+  gsl_matrix_free(QiMQiMQi_gg);
+  gsl_matrix_free(QiMQiMQi_ge);
+  gsl_matrix_free(QiMQiMQi_ee);
+
+  gsl_matrix_free(QiMMQi_gg);
+  gsl_matrix_free(QiMMQi_ge);
+  gsl_matrix_free(QiMMQi_ee);
+
+  gsl_matrix_free(Qi_si);
+
+  gsl_matrix_free(M_dd);
+  gsl_matrix_free(M_dcdc);
+
+  return;
 }
 
 // Calculate first-order and second-order derivatives.
-void CalcDev (const char func_name, const gsl_vector *eval,
-	      const gsl_matrix *Qi, const gsl_matrix *Hi,
-	      const gsl_matrix *xHi, const gsl_matrix *Hiy,
-	      const gsl_vector *QixHiy, gsl_vector *gradient,
-	      gsl_matrix *Hessian_inv, double &crt_a, double &crt_b,
-	      double &crt_c) {
-	if (func_name!='R' && func_name!='L' && func_name!='r' &&
-	    func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for " <<
-	    "log-restricted likelihood, 'L' for log-likelihood."<<endl;
-	  return;
-	}
-
-	size_t dc_size=Qi->size1, d_size=Hi->size1;
-	size_t c_size=dc_size/d_size;
-	size_t v_size=d_size*(d_size+1)/2;
-	size_t v1, v2;
-	double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge;
-
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_matrix *xHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *xHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *xHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size);
-	gsl_matrix *xHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
-	gsl_matrix *xHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *xHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-
-	gsl_matrix *QixHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *QixHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *QixHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size);
-	gsl_matrix *QixHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
-	gsl_matrix *QixHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *QixHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-
-	gsl_matrix *xHiDHiDHiy_all_gg =
-	  gsl_matrix_alloc (dc_size, v_size*v_size);
-	gsl_matrix *xHiDHiDHiy_all_ee =
-	  gsl_matrix_alloc (dc_size, v_size*v_size);
-	gsl_matrix *xHiDHiDHiy_all_ge =
-	  gsl_matrix_alloc (dc_size, v_size*v_size);
-	gsl_matrix *xHiDHiDHix_all_gg =
-	  gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
-	gsl_matrix *xHiDHiDHix_all_ee =
-	  gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
-	gsl_matrix *xHiDHiDHix_all_ge =
-	  gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
-
-	// Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all.
-	Calc_xHiDHiy_all (eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
-	Calc_xHiDHix_all (eval, xHi, xHiDHix_all_g, xHiDHix_all_e);
-	Calc_xHiDHixQixHiy_all (xHiDHix_all_g, xHiDHix_all_e, QixHiy,
-				xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e);
-
-	Calc_xHiDHiDHiy_all (v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg,
-			     xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge);
-	Calc_xHiDHiDHix_all (v_size, eval, Hi, xHi, xHiDHiDHix_all_gg,
-			     xHiDHiDHix_all_ee, xHiDHiDHix_all_ge);
-
-	// Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all.
-	Calc_QiVec_all (Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g,
-			QixHiDHiy_all_e);
-	Calc_QiVec_all (Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
-			QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e);
-	Calc_QiMat_all (Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g,
-			QixHiDHix_all_e);
-
-	double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee;
-	double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge;
-	double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge;
-
-	// Calculate gradient and Hessian for Vg.
-	for (size_t i1=0; i1<d_size; i1++) {
-	  for (size_t j1=0; j1<d_size; j1++) {
-	    if (j1<i1) {continue;}
-	    v1=GetIndex (i1, j1, d_size);
-	    
-	    Calc_yPDPy (eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
-			xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1,
-			yPDPy_g, yPDPy_e);
-	    
-	    if (func_name=='R' || func_name=='r') {
-	      Calc_tracePD (eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e,
-			    i1, j1, tPD_g, tPD_e);
-
-	      dev1_g=-0.5*tPD_g+0.5*yPDPy_g;
-	      dev1_e=-0.5*tPD_e+0.5*yPDPy_e;
-	    } else {
-	      Calc_traceHiD (eval, Hi, i1, j1, tHiD_g, tHiD_e);
-	      
-	      dev1_g=-0.5*tHiD_g+0.5*yPDPy_g;
-	      dev1_e=-0.5*tHiD_e+0.5*yPDPy_e;
-	    }
-	    
-	    gsl_vector_set (gradient, v1, dev1_g);
-	    gsl_vector_set (gradient, v1+v_size, dev1_e);
-	    
-	    for (size_t i2=0; i2<d_size; i2++) {
-	      for (size_t j2=0; j2<d_size; j2++) {
-		if (j2<i2) {continue;}
-		v2=GetIndex (i2, j2, d_size);
-		
-		if (v2<v1) {continue;}
-
-		Calc_yPDPDPy (eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g,
-			      xHiDHiy_all_e, QixHiDHiy_all_g, QixHiDHiy_all_e,
-			      xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
-			      QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e,
-			      xHiDHiDHiy_all_gg, xHiDHiDHiy_all_ee,
-			      xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg,
-			      xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1,
-			      i2, j2, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
-		
-		// AI for REML.
-		if (func_name=='R' || func_name=='r') {
-		  Calc_tracePDPD (eval, Qi, Hi, xHi, QixHiDHix_all_g,
-				  QixHiDHix_all_e, xHiDHiDHix_all_gg,
-				  xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1,
-				  i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
-		  
-		  dev2_gg=0.5*tPDPD_gg-yPDPDPy_gg;
-		  dev2_ee=0.5*tPDPD_ee-yPDPDPy_ee;
-		  dev2_ge=0.5*tPDPD_ge-yPDPDPy_ge;
-		} else {
-		  Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tHiDHiD_gg,
-				    tHiDHiD_ee, tHiDHiD_ge);
-		  
-		  dev2_gg=0.5*tHiDHiD_gg-yPDPDPy_gg;
-		  dev2_ee=0.5*tHiDHiD_ee-yPDPDPy_ee;
-		  dev2_ge=0.5*tHiDHiD_ge-yPDPDPy_ge;
-		}
-		
-		// Set up Hessian.
-		gsl_matrix_set (Hessian, v1, v2, dev2_gg);
-		gsl_matrix_set (Hessian, v1+v_size, v2+v_size, dev2_ee);
-		gsl_matrix_set (Hessian, v1, v2+v_size, dev2_ge);
-		gsl_matrix_set (Hessian, v2+v_size, v1, dev2_ge);
-		
-		if (v1!=v2) {
-		  gsl_matrix_set (Hessian, v2, v1, dev2_gg);
-		  gsl_matrix_set (Hessian, v2+v_size, v1+v_size, dev2_ee);
-		  gsl_matrix_set (Hessian, v2, v1+v_size, dev2_ge);
-		  gsl_matrix_set (Hessian, v1+v_size, v2, dev2_ge);
-		}
-	      }
-	    }
-	  }
-	}
-	
-	// Invert Hessian.
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (v_size*2);
-
-	LUDecomp (Hessian, pmt, &sig);
-	LUInvert (Hessian, pmt, Hessian_inv);
-	
-	gsl_permutation_free(pmt);
-	gsl_matrix_free(Hessian);
-
-	// Calculate Edgeworth correction factors after inverting
-	// Hessian.
-	if (c_size>1) {
-	  CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e,
-		  xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge,
-		  d_size, crt_a, crt_b, crt_c);
-	} else {
-	  crt_a=0.0; crt_b=0.0; crt_c=0.0;
-	}
-	
-	gsl_matrix_free(xHiDHiy_all_g);
-	gsl_matrix_free(xHiDHiy_all_e);
-	gsl_matrix_free(xHiDHix_all_g);
-	gsl_matrix_free(xHiDHix_all_e);
-	gsl_matrix_free(xHiDHixQixHiy_all_g);
-	gsl_matrix_free(xHiDHixQixHiy_all_e);
-
-	gsl_matrix_free(QixHiDHiy_all_g);
-	gsl_matrix_free(QixHiDHiy_all_e);
-	gsl_matrix_free(QixHiDHix_all_g);
-	gsl_matrix_free(QixHiDHix_all_e);
-	gsl_matrix_free(QixHiDHixQixHiy_all_g);
-	gsl_matrix_free(QixHiDHixQixHiy_all_e);
-
-	gsl_matrix_free(xHiDHiDHiy_all_gg);
-	gsl_matrix_free(xHiDHiDHiy_all_ee);
-	gsl_matrix_free(xHiDHiDHiy_all_ge);
-	gsl_matrix_free(xHiDHiDHix_all_gg);
-	gsl_matrix_free(xHiDHiDHix_all_ee);
-	gsl_matrix_free(xHiDHiDHix_all_ge);
-
-	return;
+void CalcDev(const char func_name, const gsl_vector *eval, const gsl_matrix *Qi,
+             const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *Hiy,
+             const gsl_vector *QixHiy, gsl_vector *gradient,
+             gsl_matrix *Hessian_inv, double &crt_a, double &crt_b,
+             double &crt_c) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for "
+         << "log-restricted likelihood, 'L' for log-likelihood." << endl;
+    return;
+  }
+
+  size_t dc_size = Qi->size1, d_size = Hi->size1;
+  size_t c_size = dc_size / d_size;
+  size_t v_size = d_size * (d_size + 1) / 2;
+  size_t v1, v2;
+  double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge;
+
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_matrix *xHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *xHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *xHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size);
+  gsl_matrix *xHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size);
+  gsl_matrix *xHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *xHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+
+  gsl_matrix *QixHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *QixHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *QixHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size);
+  gsl_matrix *QixHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size);
+  gsl_matrix *QixHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *QixHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+
+  gsl_matrix *xHiDHiDHiy_all_gg = gsl_matrix_alloc(dc_size, v_size * v_size);
+  gsl_matrix *xHiDHiDHiy_all_ee = gsl_matrix_alloc(dc_size, v_size * v_size);
+  gsl_matrix *xHiDHiDHiy_all_ge = gsl_matrix_alloc(dc_size, v_size * v_size);
+  gsl_matrix *xHiDHiDHix_all_gg =
+      gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+  gsl_matrix *xHiDHiDHix_all_ee =
+      gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+  gsl_matrix *xHiDHiDHix_all_ge =
+      gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+
+  // Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all.
+  Calc_xHiDHiy_all(eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
+  Calc_xHiDHix_all(eval, xHi, xHiDHix_all_g, xHiDHix_all_e);
+  Calc_xHiDHixQixHiy_all(xHiDHix_all_g, xHiDHix_all_e, QixHiy,
+                         xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e);
+
+  Calc_xHiDHiDHiy_all(v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg,
+                      xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge);
+  Calc_xHiDHiDHix_all(v_size, eval, Hi, xHi, xHiDHiDHix_all_gg,
+                      xHiDHiDHix_all_ee, xHiDHiDHix_all_ge);
+
+  // Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all.
+  Calc_QiVec_all(Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g,
+                 QixHiDHiy_all_e);
+  Calc_QiVec_all(Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
+                 QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e);
+  Calc_QiMat_all(Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g,
+                 QixHiDHix_all_e);
+
+  double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee;
+  double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge;
+  double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge;
+
+  // Calculate gradient and Hessian for Vg.
+  for (size_t i1 = 0; i1 < d_size; i1++) {
+    for (size_t j1 = 0; j1 < d_size; j1++) {
+      if (j1 < i1) {
+        continue;
+      }
+      v1 = GetIndex(i1, j1, d_size);
+
+      Calc_yPDPy(eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
+                 xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1, yPDPy_g,
+                 yPDPy_e);
+
+      if (func_name == 'R' || func_name == 'r') {
+        Calc_tracePD(eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e, i1, j1, tPD_g,
+                     tPD_e);
+
+        dev1_g = -0.5 * tPD_g + 0.5 * yPDPy_g;
+        dev1_e = -0.5 * tPD_e + 0.5 * yPDPy_e;
+      } else {
+        Calc_traceHiD(eval, Hi, i1, j1, tHiD_g, tHiD_e);
+
+        dev1_g = -0.5 * tHiD_g + 0.5 * yPDPy_g;
+        dev1_e = -0.5 * tHiD_e + 0.5 * yPDPy_e;
+      }
+
+      gsl_vector_set(gradient, v1, dev1_g);
+      gsl_vector_set(gradient, v1 + v_size, dev1_e);
+
+      for (size_t i2 = 0; i2 < d_size; i2++) {
+        for (size_t j2 = 0; j2 < d_size; j2++) {
+          if (j2 < i2) {
+            continue;
+          }
+          v2 = GetIndex(i2, j2, d_size);
+
+          if (v2 < v1) {
+            continue;
+          }
+
+          Calc_yPDPDPy(eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
+                       QixHiDHiy_all_g, QixHiDHiy_all_e, xHiDHixQixHiy_all_g,
+                       xHiDHixQixHiy_all_e, QixHiDHixQixHiy_all_g,
+                       QixHiDHixQixHiy_all_e, xHiDHiDHiy_all_gg,
+                       xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg,
+                       xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, i2, j2,
+                       yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
+
+          // AI for REML.
+          if (func_name == 'R' || func_name == 'r') {
+            Calc_tracePDPD(eval, Qi, Hi, xHi, QixHiDHix_all_g, QixHiDHix_all_e,
+                           xHiDHiDHix_all_gg, xHiDHiDHix_all_ee,
+                           xHiDHiDHix_all_ge, i1, j1, i2, j2, tPDPD_gg,
+                           tPDPD_ee, tPDPD_ge);
+
+            dev2_gg = 0.5 * tPDPD_gg - yPDPDPy_gg;
+            dev2_ee = 0.5 * tPDPD_ee - yPDPDPy_ee;
+            dev2_ge = 0.5 * tPDPD_ge - yPDPDPy_ge;
+          } else {
+            Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tHiDHiD_gg, tHiDHiD_ee,
+                             tHiDHiD_ge);
+
+            dev2_gg = 0.5 * tHiDHiD_gg - yPDPDPy_gg;
+            dev2_ee = 0.5 * tHiDHiD_ee - yPDPDPy_ee;
+            dev2_ge = 0.5 * tHiDHiD_ge - yPDPDPy_ge;
+          }
+
+          // Set up Hessian.
+          gsl_matrix_set(Hessian, v1, v2, dev2_gg);
+          gsl_matrix_set(Hessian, v1 + v_size, v2 + v_size, dev2_ee);
+          gsl_matrix_set(Hessian, v1, v2 + v_size, dev2_ge);
+          gsl_matrix_set(Hessian, v2 + v_size, v1, dev2_ge);
+
+          if (v1 != v2) {
+            gsl_matrix_set(Hessian, v2, v1, dev2_gg);
+            gsl_matrix_set(Hessian, v2 + v_size, v1 + v_size, dev2_ee);
+            gsl_matrix_set(Hessian, v2, v1 + v_size, dev2_ge);
+            gsl_matrix_set(Hessian, v1 + v_size, v2, dev2_ge);
+          }
+        }
+      }
+    }
+  }
+
+  // Invert Hessian.
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(v_size * 2);
+
+  LUDecomp(Hessian, pmt, &sig);
+  LUInvert(Hessian, pmt, Hessian_inv);
+
+  gsl_permutation_free(pmt);
+  gsl_matrix_free(Hessian);
+
+  // Calculate Edgeworth correction factors after inverting
+  // Hessian.
+  if (c_size > 1) {
+    CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e,
+            xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, d_size,
+            crt_a, crt_b, crt_c);
+  } else {
+    crt_a = 0.0;
+    crt_b = 0.0;
+    crt_c = 0.0;
+  }
+
+  gsl_matrix_free(xHiDHiy_all_g);
+  gsl_matrix_free(xHiDHiy_all_e);
+  gsl_matrix_free(xHiDHix_all_g);
+  gsl_matrix_free(xHiDHix_all_e);
+  gsl_matrix_free(xHiDHixQixHiy_all_g);
+  gsl_matrix_free(xHiDHixQixHiy_all_e);
+
+  gsl_matrix_free(QixHiDHiy_all_g);
+  gsl_matrix_free(QixHiDHiy_all_e);
+  gsl_matrix_free(QixHiDHix_all_g);
+  gsl_matrix_free(QixHiDHix_all_e);
+  gsl_matrix_free(QixHiDHixQixHiy_all_g);
+  gsl_matrix_free(QixHiDHixQixHiy_all_e);
+
+  gsl_matrix_free(xHiDHiDHiy_all_gg);
+  gsl_matrix_free(xHiDHiDHiy_all_ee);
+  gsl_matrix_free(xHiDHiDHiy_all_ge);
+  gsl_matrix_free(xHiDHiDHix_all_gg);
+  gsl_matrix_free(xHiDHiDHix_all_ee);
+  gsl_matrix_free(xHiDHiDHix_all_ge);
+
+  return;
 }
 
 // Update Vg, Ve.
-void UpdateVgVe (const gsl_matrix *Hessian_inv, const gsl_vector *gradient,
-		 const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) {
-	size_t v_size=gradient->size/2, d_size=V_g->size1;
-	size_t v;
+void UpdateVgVe(const gsl_matrix *Hessian_inv, const gsl_vector *gradient,
+                const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) {
+  size_t v_size = gradient->size / 2, d_size = V_g->size1;
+  size_t v;
 
-	gsl_vector *vec_v=gsl_vector_alloc (v_size*2);
+  gsl_vector *vec_v = gsl_vector_alloc(v_size * 2);
 
-	double d;
+  double d;
 
-	// Vectorize Vg and Ve.
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<d_size; j++) {
-			if (j<i) {continue;}
-			v=GetIndex(i, j, d_size);
+  // Vectorize Vg and Ve.
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < d_size; j++) {
+      if (j < i) {
+        continue;
+      }
+      v = GetIndex(i, j, d_size);
 
-			d=gsl_matrix_get (V_g, i, j);
-			gsl_vector_set (vec_v, v, d);
+      d = gsl_matrix_get(V_g, i, j);
+      gsl_vector_set(vec_v, v, d);
 
-			d=gsl_matrix_get (V_e, i, j);
-			gsl_vector_set (vec_v, v+v_size, d);
-		}
-	}
+      d = gsl_matrix_get(V_e, i, j);
+      gsl_vector_set(vec_v, v + v_size, d);
+    }
+  }
 
-	gsl_blas_dgemv (CblasNoTrans, -1.0*step_scale, Hessian_inv,
-			gradient, 1.0, vec_v);
+  gsl_blas_dgemv(CblasNoTrans, -1.0 * step_scale, Hessian_inv, gradient, 1.0,
+                 vec_v);
 
-	// Save Vg and Ve.
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<d_size; j++) {
-			if (j<i) {continue;}
-			v=GetIndex(i, j, d_size);
+  // Save Vg and Ve.
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < d_size; j++) {
+      if (j < i) {
+        continue;
+      }
+      v = GetIndex(i, j, d_size);
 
-			d=gsl_vector_get (vec_v, v);
-			gsl_matrix_set (V_g, i, j, d);
-			gsl_matrix_set (V_g, j, i, d);
+      d = gsl_vector_get(vec_v, v);
+      gsl_matrix_set(V_g, i, j, d);
+      gsl_matrix_set(V_g, j, i, d);
 
-			d=gsl_vector_get (vec_v, v+v_size);
-			gsl_matrix_set (V_e, i, j, d);
-			gsl_matrix_set (V_e, j, i, d);
-		}
-	}
+      d = gsl_vector_get(vec_v, v + v_size);
+      gsl_matrix_set(V_e, i, j, d);
+      gsl_matrix_set(V_e, j, i, d);
+    }
+  }
 
-	gsl_vector_free(vec_v);
+  gsl_vector_free(vec_v);
 
-	return;
+  return;
 }
 
-double MphNR (const char func_name, const size_t max_iter,
-	      const double max_prec, const gsl_vector *eval,
-	      const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *Hi_all,
-	      gsl_matrix *xHi_all, gsl_matrix *Hiy_all, gsl_matrix *V_g,
-	      gsl_matrix *V_e, gsl_matrix *Hessian_inv, double &crt_a,
-	      double &crt_b, double &crt_c) {
-	if (func_name!='R' && func_name!='L' && func_name!='r' &&
-	    func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
-	    "likelihood, 'L' for log-likelihood."<<endl;
-	  return 0.0;
-	}
-	size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
-	size_t dc_size=d_size*c_size;
-	size_t v_size=d_size*(d_size+1)/2;
-
-	double logdet_H, logdet_Q, yPy, logl_const;
-	double logl_old=0.0, logl_new=0.0, step_scale;
-	int sig;
-	size_t step_iter, flag_pd;
-
-	gsl_matrix *Vg_save=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Ve_save=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_temp=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *U_temp=gsl_matrix_alloc (d_size, d_size);
-	gsl_vector *D_temp=gsl_vector_alloc (d_size);
-	gsl_vector *xHiy=gsl_vector_alloc (dc_size);
-	gsl_vector *QixHiy=gsl_vector_alloc (dc_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size);
-
-	gsl_vector *gradient=gsl_vector_alloc (v_size*2);
-
-	// Calculate |XXt| and (XXt)^{-1}.
-	gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
-	for (size_t i=0; i<c_size; ++i) {
-	  for (size_t j=0; j<i; ++j) {
-	    gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i));
-	  }
-	}
-
-	gsl_permutation * pmt=gsl_permutation_alloc (c_size);
-	LUDecomp (XXt, pmt, &sig);
-	gsl_permutation_free (pmt);
-
-	// Calculate the constant for logl.
-	if (func_name=='R' || func_name=='r') {
-	  logl_const=-0.5*(double)(n_size-c_size) *
-	    (double)d_size*log(2.0*M_PI) +
-	    0.5*(double)d_size*LULndet (XXt);
-	} else {
-	  logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI);
-	}
-	
-	// Optimization iterations.
-	for (size_t t=0; t<max_iter; t++) {
-		gsl_matrix_memcpy (Vg_save, V_g);
-		gsl_matrix_memcpy (Ve_save, V_e);
-
-		step_scale=1.0; step_iter=0;
-		do {
-		  gsl_matrix_memcpy (V_g, Vg_save);
-		  gsl_matrix_memcpy (V_e, Ve_save);
-		  
-		  // Update Vg, Ve, and invert Hessian.
-		  if (t!=0) {
-		    UpdateVgVe (Hessian_inv, gradient, step_scale, V_g, V_e);
-		  }
-		  
-		  // Check if both Vg and Ve are positive definite.
-		  flag_pd=1;
-		  gsl_matrix_memcpy (V_temp, V_e);
-		  EigenDecomp(V_temp, U_temp, D_temp, 0);
-		  for (size_t i=0; i<d_size; i++) {
-		    if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;}
-		  }
-		  gsl_matrix_memcpy (V_temp, V_g);
-		  EigenDecomp(V_temp, U_temp, D_temp, 0);
-		  for (size_t i=0; i<d_size; i++) {
-		    if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;}
-		  }
-		  
-		  // If flag_pd==1, continue to calculate quantities
-		  // and logl.
-		  if (flag_pd==1) {
-		    CalcHiQi(eval,X,V_g,V_e,Hi_all,Qi,logdet_H,logdet_Q);
-		    Calc_Hiy_all (Y, Hi_all, Hiy_all);
-		    Calc_xHi_all (X, Hi_all, xHi_all);
-		    
-		    // Calculate QixHiy and yPy.
-		    Calc_xHiy (Y, xHi_all, xHiy);
-		    gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy);
-		    
-		    gsl_blas_ddot (QixHiy, xHiy, &yPy);
-		    yPy=Calc_yHiy (Y, Hiy_all)-yPy;
-		    
-		    // Calculate log likelihood/restricted likelihood value.
-		    if (func_name=='R' || func_name=='r') {
-		      logl_new=logl_const-0.5*logdet_H-0.5*logdet_Q-0.5*yPy;
-		    } else {
-		      logl_new=logl_const-0.5*logdet_H-0.5*yPy;
-		    }
-		  }
-		  
-		  step_scale/=2.0;
-		  step_iter++;
-		  
-		} while ( (flag_pd==0 || logl_new<logl_old ||
-			   logl_new-logl_old>10 ) && step_iter<10 && t!=0);
-
-		// Terminate if change is small.
-		if (t!=0) {
-			if (logl_new<logl_old || flag_pd==0) {
-				gsl_matrix_memcpy (V_g, Vg_save);
-				gsl_matrix_memcpy (V_e, Ve_save);
-				break;
-			}
-
-			if (logl_new-logl_old<max_prec) {
-				break;
-			}
-		}
-
-		logl_old=logl_new;
-
-		CalcDev (func_name, eval, Qi, Hi_all, xHi_all, Hiy_all,
-			 QixHiy, gradient, Hessian_inv, crt_a, crt_b, crt_c);
-	}
-
-	// Mutiply Hessian_inv with -1.0.
-	// Now Hessian_inv is the variance matrix.
-	gsl_matrix_scale (Hessian_inv, -1.0);
-
-	gsl_matrix_free(Vg_save);
-	gsl_matrix_free(Ve_save);
-	gsl_matrix_free(V_temp);
-	gsl_matrix_free(U_temp);
-	gsl_vector_free(D_temp);
-	gsl_vector_free(xHiy);
-	gsl_vector_free(QixHiy);
-
-	gsl_matrix_free(Qi);
-	gsl_matrix_free(XXt);
-
-	gsl_vector_free(gradient);
-
-	return logl_new;
+double MphNR(const char func_name, const size_t max_iter, const double max_prec,
+             const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y,
+             gsl_matrix *Hi_all, gsl_matrix *xHi_all, gsl_matrix *Hiy_all,
+             gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *Hessian_inv,
+             double &crt_a, double &crt_b, double &crt_c) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+         << "likelihood, 'L' for log-likelihood." << endl;
+    return 0.0;
+  }
+  size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+  size_t dc_size = d_size * c_size;
+  size_t v_size = d_size * (d_size + 1) / 2;
+
+  double logdet_H, logdet_Q, yPy, logl_const;
+  double logl_old = 0.0, logl_new = 0.0, step_scale;
+  int sig;
+  size_t step_iter, flag_pd;
+
+  gsl_matrix *Vg_save = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Ve_save = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_temp = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *U_temp = gsl_matrix_alloc(d_size, d_size);
+  gsl_vector *D_temp = gsl_vector_alloc(d_size);
+  gsl_vector *xHiy = gsl_vector_alloc(dc_size);
+  gsl_vector *QixHiy = gsl_vector_alloc(dc_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size);
+
+  gsl_vector *gradient = gsl_vector_alloc(v_size * 2);
+
+  // Calculate |XXt| and (XXt)^{-1}.
+  gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
+  for (size_t i = 0; i < c_size; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i));
+    }
+  }
+
+  gsl_permutation *pmt = gsl_permutation_alloc(c_size);
+  LUDecomp(XXt, pmt, &sig);
+  gsl_permutation_free(pmt);
+
+  // Calculate the constant for logl.
+  if (func_name == 'R' || func_name == 'r') {
+    logl_const =
+        -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) +
+        0.5 * (double)d_size * LULndet(XXt);
+  } else {
+    logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI);
+  }
+
+  // Optimization iterations.
+  for (size_t t = 0; t < max_iter; t++) {
+    gsl_matrix_memcpy(Vg_save, V_g);
+    gsl_matrix_memcpy(Ve_save, V_e);
+
+    step_scale = 1.0;
+    step_iter = 0;
+    do {
+      gsl_matrix_memcpy(V_g, Vg_save);
+      gsl_matrix_memcpy(V_e, Ve_save);
+
+      // Update Vg, Ve, and invert Hessian.
+      if (t != 0) {
+        UpdateVgVe(Hessian_inv, gradient, step_scale, V_g, V_e);
+      }
+
+      // Check if both Vg and Ve are positive definite.
+      flag_pd = 1;
+      gsl_matrix_memcpy(V_temp, V_e);
+      EigenDecomp(V_temp, U_temp, D_temp, 0);
+      for (size_t i = 0; i < d_size; i++) {
+        if (gsl_vector_get(D_temp, i) <= 0) {
+          flag_pd = 0;
+        }
+      }
+      gsl_matrix_memcpy(V_temp, V_g);
+      EigenDecomp(V_temp, U_temp, D_temp, 0);
+      for (size_t i = 0; i < d_size; i++) {
+        if (gsl_vector_get(D_temp, i) <= 0) {
+          flag_pd = 0;
+        }
+      }
+
+      // If flag_pd==1, continue to calculate quantities
+      // and logl.
+      if (flag_pd == 1) {
+        CalcHiQi(eval, X, V_g, V_e, Hi_all, Qi, logdet_H, logdet_Q);
+        Calc_Hiy_all(Y, Hi_all, Hiy_all);
+        Calc_xHi_all(X, Hi_all, xHi_all);
+
+        // Calculate QixHiy and yPy.
+        Calc_xHiy(Y, xHi_all, xHiy);
+        gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy);
+
+        gsl_blas_ddot(QixHiy, xHiy, &yPy);
+        yPy = Calc_yHiy(Y, Hiy_all) - yPy;
+
+        // Calculate log likelihood/restricted likelihood value.
+        if (func_name == 'R' || func_name == 'r') {
+          logl_new = logl_const - 0.5 * logdet_H - 0.5 * logdet_Q - 0.5 * yPy;
+        } else {
+          logl_new = logl_const - 0.5 * logdet_H - 0.5 * yPy;
+        }
+      }
+
+      step_scale /= 2.0;
+      step_iter++;
+
+    } while (
+        (flag_pd == 0 || logl_new < logl_old || logl_new - logl_old > 10) &&
+        step_iter < 10 && t != 0);
+
+    // Terminate if change is small.
+    if (t != 0) {
+      if (logl_new < logl_old || flag_pd == 0) {
+        gsl_matrix_memcpy(V_g, Vg_save);
+        gsl_matrix_memcpy(V_e, Ve_save);
+        break;
+      }
+
+      if (logl_new - logl_old < max_prec) {
+        break;
+      }
+    }
+
+    logl_old = logl_new;
+
+    CalcDev(func_name, eval, Qi, Hi_all, xHi_all, Hiy_all, QixHiy, gradient,
+            Hessian_inv, crt_a, crt_b, crt_c);
+  }
+
+  // Mutiply Hessian_inv with -1.0.
+  // Now Hessian_inv is the variance matrix.
+  gsl_matrix_scale(Hessian_inv, -1.0);
+
+  gsl_matrix_free(Vg_save);
+  gsl_matrix_free(Ve_save);
+  gsl_matrix_free(V_temp);
+  gsl_matrix_free(U_temp);
+  gsl_vector_free(D_temp);
+  gsl_vector_free(xHiy);
+  gsl_vector_free(QixHiy);
+
+  gsl_matrix_free(Qi);
+  gsl_matrix_free(XXt);
+
+  gsl_vector_free(gradient);
+
+  return logl_new;
 }
 
 // Initialize Vg, Ve and B.
 void MphInitial(const size_t em_iter, const double em_prec,
-		const size_t nr_iter, const double nr_prec,
-		const gsl_vector *eval, const gsl_matrix *X,
-		const gsl_matrix *Y, const double l_min, const double l_max,
-		const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e,
-		gsl_matrix *B) {
-  
-	gsl_matrix_set_zero (V_g);
-	gsl_matrix_set_zero (V_e);
-	gsl_matrix_set_zero (B);
-
-	size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
-	double a, b, c;
-	double lambda, logl, vg, ve;
-
-	// Initialize the diagonal elements of Vg and Ve using univariate
-	// LMM and REML estimates.
-	gsl_matrix *Xt=gsl_matrix_alloc (n_size, c_size);
-	gsl_vector *beta_temp=gsl_vector_alloc(c_size);
-	gsl_vector *se_beta_temp=gsl_vector_alloc(c_size);
-
-	gsl_matrix_transpose_memcpy (Xt, X);
-
-	for (size_t i=0; i<d_size; i++) {
-	  gsl_vector_const_view Y_row=gsl_matrix_const_row (Y, i);
-	  CalcLambda ('R', eval, Xt, &Y_row.vector, l_min, l_max,
-		      n_region, lambda, logl);
-	  CalcLmmVgVeBeta (eval, Xt, &Y_row.vector, lambda, vg, ve,
-			   beta_temp, se_beta_temp);
-	  
-	  gsl_matrix_set(V_g, i, i, vg);
-	  gsl_matrix_set(V_e, i, i, ve);
-	}
-	
-	gsl_matrix_free (Xt);
-	gsl_vector_free (beta_temp);
-	gsl_vector_free (se_beta_temp);
-
-	// If number of phenotypes is above four, then obtain the off
-	// diagonal elements with two trait models.
-	if (d_size>4) {
-	  
-	  // First obtain good initial values.
-	  // Large matrices for EM.
-	  gsl_matrix *U_hat=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *E_hat=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *OmegaU=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *OmegaE=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *UltVehiY=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *UltVehiBX=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *UltVehiU=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *UltVehiE=gsl_matrix_alloc (2, n_size);
-	  
-	  // Large matrices for NR. Each dxd block is H_k^{-1}.
-	  gsl_matrix *Hi_all=gsl_matrix_alloc (2, 2*n_size);
-
-	  // Each column is H_k^{-1}y_k.
-	  gsl_matrix *Hiy_all=gsl_matrix_alloc (2, n_size);
-
-	  // Each dcxdc block is x_k\otimes H_k^{-1}.
-	  gsl_matrix *xHi_all=gsl_matrix_alloc (2*c_size, 2*n_size);
-	  gsl_matrix *Hessian=gsl_matrix_alloc (6, 6);
-	  
-	  // 2 by n matrix of Y.
-	  gsl_matrix *Y_sub=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *Vg_sub=gsl_matrix_alloc (2, 2);
-	  gsl_matrix *Ve_sub=gsl_matrix_alloc (2, 2);
-	  gsl_matrix *B_sub=gsl_matrix_alloc (2, c_size);
-	  
-	  for (size_t i=0; i<d_size; i++) {
-	    gsl_vector_view Y_sub1=gsl_matrix_row (Y_sub, 0);
-	    gsl_vector_const_view Y_1=gsl_matrix_const_row (Y, i);
-	    gsl_vector_memcpy (&Y_sub1.vector, &Y_1.vector);
-	    
-	    for (size_t j=i+1; j<d_size; j++) {
-	      gsl_vector_view Y_sub2=gsl_matrix_row (Y_sub, 1);
-	      gsl_vector_const_view Y_2=gsl_matrix_const_row (Y, j);
-	      gsl_vector_memcpy (&Y_sub2.vector, &Y_2.vector);
-	      
-	      gsl_matrix_set_zero (Vg_sub);
-	      gsl_matrix_set_zero (Ve_sub);
-	      gsl_matrix_set (Vg_sub, 0, 0, gsl_matrix_get (V_g, i, i));
-	      gsl_matrix_set (Ve_sub, 0, 0, gsl_matrix_get (V_e, i, i));
-	      gsl_matrix_set (Vg_sub, 1, 1, gsl_matrix_get (V_g, j, j));
-	      gsl_matrix_set (Ve_sub, 1, 1, gsl_matrix_get (V_e, j, j));
-	      
-	      logl=MphEM ('R', em_iter, em_prec, eval, X, Y_sub, U_hat,
-			  E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
-			  UltVehiU, UltVehiE, Vg_sub, Ve_sub, B_sub);
-	      logl=MphNR ('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all,
-			  xHi_all, Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c);
-	      
-	      gsl_matrix_set(V_g, i, j, gsl_matrix_get (Vg_sub, 0, 1));
-	      gsl_matrix_set(V_g, j, i, gsl_matrix_get (Vg_sub, 0, 1));
-	      
-	      gsl_matrix_set(V_e, i, j, ve=gsl_matrix_get (Ve_sub, 0, 1));
-	      gsl_matrix_set(V_e, j, i, ve=gsl_matrix_get (Ve_sub, 0, 1));
-	    }
-	  }
-	  
-	  // Free matrices.
-	  gsl_matrix_free(U_hat);
-	  gsl_matrix_free(E_hat);
-	  gsl_matrix_free(OmegaU);
-	  gsl_matrix_free(OmegaE);
-	  gsl_matrix_free(UltVehiY);
-	  gsl_matrix_free(UltVehiBX);
-	  gsl_matrix_free(UltVehiU);
-	  gsl_matrix_free(UltVehiE);
-	  
-	  gsl_matrix_free(Hi_all);
-	  gsl_matrix_free(Hiy_all);
-	  gsl_matrix_free(xHi_all);
-	  gsl_matrix_free(Hessian);
-	  
-	  gsl_matrix_free(Y_sub);
-	  gsl_matrix_free(Vg_sub);
-	  gsl_matrix_free(Ve_sub);
-	  gsl_matrix_free(B_sub);
-	}
-
-	// Calculate B hat using GSL estimate.
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (d_size*c_size, d_size*c_size);
-	gsl_vector *XHiy=gsl_vector_alloc (d_size*c_size);
-	gsl_vector *beta=gsl_vector_alloc (d_size*c_size);
-
-	gsl_vector_set_zero (XHiy);
-
-	double logdet_Ve, logdet_Q, dl, d, delta, dx, dy;
-
-	// Eigen decomposition and calculate log|Ve|.
-	logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
-	// Calculate Qi and log|Q|.
-	logdet_Q=CalcQi (eval, D_l, X, Qi);
-
-	// Calculate UltVehiY.
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,UltVehi,Y,0.0,UltVehiY);
-
-	//calculate XHiy
-	for (size_t i=0; i<d_size; i++) {
-	  dl=gsl_vector_get(D_l, i);
-	  
-	  for (size_t j=0; j<c_size; j++) {
-	    d=0.0;
-	    for (size_t k=0; k<n_size; k++) {
-	      delta=gsl_vector_get(eval, k);
-	      dx=gsl_matrix_get(X, j, k);
-	      dy=gsl_matrix_get(UltVehiY, i, k);
-	      d+=dy*dx/(delta*dl+1.0);
-	    }
-	    gsl_vector_set(XHiy, j*d_size+i, d);
-	  }
-	}
-
-	gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta);
-
-	// Multiply beta by UltVeh and save to B.
-	for (size_t i=0; i<c_size; i++) {
-	  gsl_vector_view B_col=gsl_matrix_column (B, i);
-	  gsl_vector_view beta_sub=gsl_vector_subvector(beta,i*d_size,d_size);
-	  gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0,
-			 &B_col.vector);
-	}
-
-	// Free memory.
-	gsl_matrix_free(UltVehiY);
-
-	gsl_vector_free(D_l);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_matrix_free(Qi);
-	gsl_vector_free(XHiy);
-	gsl_vector_free(beta);
-
-	return;
+                const size_t nr_iter, const double nr_prec,
+                const gsl_vector *eval, const gsl_matrix *X,
+                const gsl_matrix *Y, const double l_min, const double l_max,
+                const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e,
+                gsl_matrix *B) {
+
+  gsl_matrix_set_zero(V_g);
+  gsl_matrix_set_zero(V_e);
+  gsl_matrix_set_zero(B);
+
+  size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+  double a, b, c;
+  double lambda, logl, vg, ve;
+
+  // Initialize the diagonal elements of Vg and Ve using univariate
+  // LMM and REML estimates.
+  gsl_matrix *Xt = gsl_matrix_alloc(n_size, c_size);
+  gsl_vector *beta_temp = gsl_vector_alloc(c_size);
+  gsl_vector *se_beta_temp = gsl_vector_alloc(c_size);
+
+  gsl_matrix_transpose_memcpy(Xt, X);
+
+  for (size_t i = 0; i < d_size; i++) {
+    gsl_vector_const_view Y_row = gsl_matrix_const_row(Y, i);
+    CalcLambda('R', eval, Xt, &Y_row.vector, l_min, l_max, n_region, lambda,
+               logl);
+    CalcLmmVgVeBeta(eval, Xt, &Y_row.vector, lambda, vg, ve, beta_temp,
+                    se_beta_temp);
+
+    gsl_matrix_set(V_g, i, i, vg);
+    gsl_matrix_set(V_e, i, i, ve);
+  }
+
+  gsl_matrix_free(Xt);
+  gsl_vector_free(beta_temp);
+  gsl_vector_free(se_beta_temp);
+
+  // If number of phenotypes is above four, then obtain the off
+  // diagonal elements with two trait models.
+  if (d_size > 4) {
+
+    // First obtain good initial values.
+    // Large matrices for EM.
+    gsl_matrix *U_hat = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *E_hat = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *OmegaU = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *OmegaE = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *UltVehiY = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *UltVehiBX = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *UltVehiU = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *UltVehiE = gsl_matrix_alloc(2, n_size);
+
+    // Large matrices for NR. Each dxd block is H_k^{-1}.
+    gsl_matrix *Hi_all = gsl_matrix_alloc(2, 2 * n_size);
+
+    // Each column is H_k^{-1}y_k.
+    gsl_matrix *Hiy_all = gsl_matrix_alloc(2, n_size);
+
+    // Each dcxdc block is x_k\otimes H_k^{-1}.
+    gsl_matrix *xHi_all = gsl_matrix_alloc(2 * c_size, 2 * n_size);
+    gsl_matrix *Hessian = gsl_matrix_alloc(6, 6);
+
+    // 2 by n matrix of Y.
+    gsl_matrix *Y_sub = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *Vg_sub = gsl_matrix_alloc(2, 2);
+    gsl_matrix *Ve_sub = gsl_matrix_alloc(2, 2);
+    gsl_matrix *B_sub = gsl_matrix_alloc(2, c_size);
+
+    for (size_t i = 0; i < d_size; i++) {
+      gsl_vector_view Y_sub1 = gsl_matrix_row(Y_sub, 0);
+      gsl_vector_const_view Y_1 = gsl_matrix_const_row(Y, i);
+      gsl_vector_memcpy(&Y_sub1.vector, &Y_1.vector);
+
+      for (size_t j = i + 1; j < d_size; j++) {
+        gsl_vector_view Y_sub2 = gsl_matrix_row(Y_sub, 1);
+        gsl_vector_const_view Y_2 = gsl_matrix_const_row(Y, j);
+        gsl_vector_memcpy(&Y_sub2.vector, &Y_2.vector);
+
+        gsl_matrix_set_zero(Vg_sub);
+        gsl_matrix_set_zero(Ve_sub);
+        gsl_matrix_set(Vg_sub, 0, 0, gsl_matrix_get(V_g, i, i));
+        gsl_matrix_set(Ve_sub, 0, 0, gsl_matrix_get(V_e, i, i));
+        gsl_matrix_set(Vg_sub, 1, 1, gsl_matrix_get(V_g, j, j));
+        gsl_matrix_set(Ve_sub, 1, 1, gsl_matrix_get(V_e, j, j));
+
+        logl = MphEM('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat,
+                     OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                     Vg_sub, Ve_sub, B_sub);
+        logl = MphNR('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all, xHi_all,
+                     Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c);
+
+        gsl_matrix_set(V_g, i, j, gsl_matrix_get(Vg_sub, 0, 1));
+        gsl_matrix_set(V_g, j, i, gsl_matrix_get(Vg_sub, 0, 1));
+
+        gsl_matrix_set(V_e, i, j, ve = gsl_matrix_get(Ve_sub, 0, 1));
+        gsl_matrix_set(V_e, j, i, ve = gsl_matrix_get(Ve_sub, 0, 1));
+      }
+    }
+
+    // Free matrices.
+    gsl_matrix_free(U_hat);
+    gsl_matrix_free(E_hat);
+    gsl_matrix_free(OmegaU);
+    gsl_matrix_free(OmegaE);
+    gsl_matrix_free(UltVehiY);
+    gsl_matrix_free(UltVehiBX);
+    gsl_matrix_free(UltVehiU);
+    gsl_matrix_free(UltVehiE);
+
+    gsl_matrix_free(Hi_all);
+    gsl_matrix_free(Hiy_all);
+    gsl_matrix_free(xHi_all);
+    gsl_matrix_free(Hessian);
+
+    gsl_matrix_free(Y_sub);
+    gsl_matrix_free(Vg_sub);
+    gsl_matrix_free(Ve_sub);
+    gsl_matrix_free(B_sub);
+  }
+
+  // Calculate B hat using GSL estimate.
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(d_size * c_size, d_size * c_size);
+  gsl_vector *XHiy = gsl_vector_alloc(d_size * c_size);
+  gsl_vector *beta = gsl_vector_alloc(d_size * c_size);
+
+  gsl_vector_set_zero(XHiy);
+
+  double logdet_Ve, logdet_Q, dl, d, delta, dx, dy;
+
+  // Eigen decomposition and calculate log|Ve|.
+  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+  // Calculate Qi and log|Q|.
+  logdet_Q = CalcQi(eval, D_l, X, Qi);
+
+  // Calculate UltVehiY.
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+  // calculate XHiy
+  for (size_t i = 0; i < d_size; i++) {
+    dl = gsl_vector_get(D_l, i);
+
+    for (size_t j = 0; j < c_size; j++) {
+      d = 0.0;
+      for (size_t k = 0; k < n_size; k++) {
+        delta = gsl_vector_get(eval, k);
+        dx = gsl_matrix_get(X, j, k);
+        dy = gsl_matrix_get(UltVehiY, i, k);
+        d += dy * dx / (delta * dl + 1.0);
+      }
+      gsl_vector_set(XHiy, j * d_size + i, d);
+    }
+  }
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta);
+
+  // Multiply beta by UltVeh and save to B.
+  for (size_t i = 0; i < c_size; i++) {
+    gsl_vector_view B_col = gsl_matrix_column(B, i);
+    gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size);
+    gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0,
+                   &B_col.vector);
+  }
+
+  // Free memory.
+  gsl_matrix_free(UltVehiY);
+
+  gsl_vector_free(D_l);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_matrix_free(Qi);
+  gsl_vector_free(XHiy);
+  gsl_vector_free(beta);
+
+  return;
 }
 
 // p-value correction
 // mode=1 Wald; mode=2 LRT; mode=3 SCORE;
-double PCRT (const size_t mode, const size_t d_size, const double p_value,
-	     const double crt_a, const double crt_b, const double crt_c) {
-	double p_crt=0.0, chisq_crt=0.0, q=(double)d_size;
-	double chisq=gsl_cdf_chisq_Qinv(p_value, (double)d_size );
-
-	if (mode==1) {
-		double a=crt_c/(2.0*q*(q+2.0));
-		double b=1.0+(crt_a+crt_b)/(2.0*q);
-		chisq_crt=(-1.0*b+sqrt(b*b+4.0*a*chisq))/(2.0*a);
-	} else if (mode==2) {
-		chisq_crt=chisq/(1.0+crt_a/(2.0*q) );
-	} else {
-		chisq_crt=chisq;
-	}
-
-	p_crt=gsl_cdf_chisq_Q (chisq_crt, (double)d_size );
-
-	return p_crt;
+double PCRT(const size_t mode, const size_t d_size, const double p_value,
+            const double crt_a, const double crt_b, const double crt_c) {
+  double p_crt = 0.0, chisq_crt = 0.0, q = (double)d_size;
+  double chisq = gsl_cdf_chisq_Qinv(p_value, (double)d_size);
+
+  if (mode == 1) {
+    double a = crt_c / (2.0 * q * (q + 2.0));
+    double b = 1.0 + (crt_a + crt_b) / (2.0 * q);
+    chisq_crt = (-1.0 * b + sqrt(b * b + 4.0 * a * chisq)) / (2.0 * a);
+  } else if (mode == 2) {
+    chisq_crt = chisq / (1.0 + crt_a / (2.0 * q));
+  } else {
+    chisq_crt = chisq;
+  }
+
+  p_crt = gsl_cdf_chisq_Q(chisq_crt, (double)d_size);
+
+  return p_crt;
 }
 
 // WJA added.
-void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
-			 const gsl_matrix *UtW, const gsl_matrix *UtY) {
-	string file_bgen=file_oxford+".bgen";
-	ifstream infile (file_bgen.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bgen file:"<<file_bgen<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	string line;
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_miss, c_phen;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR. Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-	gsl_vector *x=gsl_vector_alloc (n_size);
-	gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub =
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-
-	gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
-	gsl_vector_view X_row=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row.vector);
-	gsl_vector_view B_col=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
-		       Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size,
-						     c+v_size) );
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<=i; j++) {
-			cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-		}
-		cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<=i; j++) {
-			c=GetIndex(i, j, d_size);
-			cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-		}
-		cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<=i; j++) {
-			cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-		}
-		cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
-		       Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-
-
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-
-	// Read in header.
-	uint32_t bgen_snp_block_offset;
-	uint32_t bgen_header_length;
-	uint32_t bgen_nsamples;
-	uint32_t bgen_nsnps;
-	uint32_t bgen_flags;
-	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
-	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
-	bgen_snp_block_offset-=4;
-	infile.ignore(4+bgen_header_length-20);
-	bgen_snp_block_offset-=4+bgen_header_length-20;
-	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
-	bgen_snp_block_offset-=4;
-	bool CompressedSNPBlocks=bgen_flags&0x1;
-
-	infile.ignore(bgen_snp_block_offset);
-
-	double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
-	double bgen_geno_prob_non_miss;
-
-	uint32_t bgen_N;
-	uint16_t bgen_LS;
-	uint16_t bgen_LR;
-	uint16_t bgen_LC;
-	uint32_t bgen_SNP_pos;
-	uint32_t bgen_LA;
-	std::string bgen_A_allele;
-	uint32_t bgen_LB;
-	std::string bgen_B_allele;
-	uint32_t bgen_P;
-	size_t unzipped_data_size;
-	string id;
-	string rs;
-	string chr;
-	std::cout<<"Warning: WJA hard coded SNP missingness threshold "<<
-	  "of 10%"<<std::endl;
-
-	// Start reading genotypes and analyze.
-	size_t csnp=0, t_last=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (t%d_pace==0 || t==(ns_total-1)) {
-	    ProgressBar ("Reading SNPs  ", t, ns_total-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-	  
-	  // Read SNP header.
-	  id.clear();
-	  rs.clear();
-	  chr.clear();
-	  bgen_A_allele.clear();
-	  bgen_B_allele.clear();
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_N),4);
-	  infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-	  
-	  id.resize(bgen_LS);
-	  infile.read(&id[0], bgen_LS);
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_LR),2);
-	  rs.resize(bgen_LR);
-	  infile.read(&rs[0], bgen_LR);
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_LC),2);
-	  chr.resize(bgen_LC);
-	  infile.read(&chr[0], bgen_LC);
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_LA),4);
-	  bgen_A_allele.resize(bgen_LA);
-	  infile.read(&bgen_A_allele[0], bgen_LA);
-		
-	  infile.read(reinterpret_cast<char*>(&bgen_LB),4);
-	  bgen_B_allele.resize(bgen_LB);
-	  infile.read(&bgen_B_allele[0], bgen_LB);
-	  
-	  uint16_t unzipped_data[3*bgen_N];
-	  
-	  if (indicator_snp[t]==0) {
-	    if(CompressedSNPBlocks)
-	      infile.read(reinterpret_cast<char*>(&bgen_P),4);
-	    else
-	      bgen_P=6*bgen_N;
-	    
-	    infile.ignore(static_cast<size_t>(bgen_P));
-	    
-	    continue;
-	  }
-
-	  if(CompressedSNPBlocks) {
-
-	    infile.read(reinterpret_cast<char*>(&bgen_P),4);
-	    uint8_t zipped_data[bgen_P];
-	    
-	    unzipped_data_size=6*bgen_N;
-	    
-	    infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
-	    
-	    int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data),
-	      reinterpret_cast<uLongf*>(&unzipped_data_size),
-	      reinterpret_cast<Bytef*>(zipped_data),
-              static_cast<uLong> (bgen_P));
-	    assert(result == Z_OK);
-	    
-	  } else {
-	    
-	    bgen_P=6*bgen_N;
-	    infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
-	  }
-	  
-	  x_mean=0.0; c_phen=0; n_miss=0;
-	  gsl_vector_set_zero(x_miss);
-	  for (size_t i=0; i<bgen_N; ++i) {
-	    if (indicator_idv[i]==0) {continue;}
-	    
-	    bgen_geno_prob_AA =
-	      static_cast<double>(unzipped_data[i*3])/32768.0;
-	    bgen_geno_prob_AB =
-	      static_cast<double>(unzipped_data[i*3+1])/32768.0;
-	    bgen_geno_prob_BB =
-	      static_cast<double>(unzipped_data[i*3+2])/32768.0;
-	    
-	    // WJA.
-	    bgen_geno_prob_non_miss=bgen_geno_prob_AA +
-	      bgen_geno_prob_AB+bgen_geno_prob_BB;
-	    if (bgen_geno_prob_non_miss<0.9) {
-	      gsl_vector_set(x_miss, c_phen, 0.0);
-	      n_miss++;
-	    }
-	    else {
-	      
-	      bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
-	      bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
-	      bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-	      
-	      geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-	      
-	      gsl_vector_set(x, c_phen, geno);
-	      gsl_vector_set(x_miss, c_phen, 1.0);
-	      x_mean+=geno;
-	    }
-	    c_phen++;
-	  }
-	  
-	  x_mean/=static_cast<double>(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
-	  }
-
-	  gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
-	  gsl_vector_memcpy (&Xlarge_col.vector, x);
-	  csnp++;
-
-	  if (csnp%msize==0 || csnp==t_last ) {
-	    size_t l=0;
-	    if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-	    
-	    gsl_matrix_view Xlarge_sub =
-	      gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-	    gsl_matrix_view UtXlarge_sub =
-	      gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-	    
-	    time_start=clock();
-	    eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-			    &UtXlarge_sub.matrix);
-	    time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	    
-	    gsl_matrix_set_zero (Xlarge);
-	    
-	    for (size_t i=0; i<l; i++) {
-	      gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
-	      gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-	      
-	      // Initial values.
-	      gsl_matrix_memcpy (V_g, V_g_null);
-	      gsl_matrix_memcpy (V_e, V_e_null);
-	      gsl_matrix_memcpy (B, B_null);
-	      
-	      time_start=clock();
-		    
-	      // 3 is before 1.
-	      if (a_mode==3 || a_mode==4) {
-		p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				  V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-		if (p_score<p_nr && crt==1) {
-		  logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
-				 xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
-				 crt_b, crt_c);
-		  p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-		}
-	      }
-	      
-	      if (a_mode==2 || a_mode==4) {
-		logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		
-		// Calculate beta and Vbeta.
-		p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				V_e, UltVehiY, beta, Vbeta);
-		p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-		
-		if (p_lrt<p_nr) {
-		  logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  
-		  // Calculate beta and Vbeta.
-		  p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				  V_e, UltVehiY, beta, Vbeta);
-		  p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
-					 (double)d_size );
-		  
-		  if (crt==1) {
-		    p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      if (a_mode==1 || a_mode==4) {
-		logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat,
-			       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
-			       UltVehiU, UltVehiE, V_g, V_e, B);
-		p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				 V_e, UltVehiY, beta, Vbeta);
-		
-		if (p_wald<p_nr) {
-		  logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				   V_g, V_e, UltVehiY, beta, Vbeta);
-		  
-		  if (crt==1) {
-		    p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-
-	      time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	      // Store summary data.
-	      for (size_t i=0; i<d_size; i++) {
-		v_beta[i]=gsl_vector_get (beta, i);
-	      }
-	      
-	      c=0;
-	      for (size_t i=0; i<d_size; i++) {
-		for (size_t j=i; j<d_size; j++) {
-		  v_Vg[c]=gsl_matrix_get (V_g, i, j);
-		  v_Ve[c]=gsl_matrix_get (V_e, i, j);
-		  v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-		  c++;
-		}
-	      }
-	      
-	      MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve,
-			       v_Vbeta};
-	      sumStat.push_back(SNPs);
-	    }
-	  }
-	}
-	cout<<endl;
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-	gsl_vector_free(x_miss);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null);
-
-	gsl_matrix_free(Xlarge);
-	gsl_matrix_free(UtXlarge);
-
-	return;
+void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+                        const gsl_matrix *UtW, const gsl_matrix *UtY) {
+  string file_bgen = file_oxford + ".bgen";
+  ifstream infile(file_bgen.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bgen file:" << file_bgen << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  string line;
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_miss, c_phen;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR. Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+  gsl_vector *x = gsl_vector_alloc(n_size);
+  gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+
+  gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+  gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row.vector);
+  gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub.matrix);
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Read in header.
+  uint32_t bgen_snp_block_offset;
+  uint32_t bgen_header_length;
+  uint32_t bgen_nsamples;
+  uint32_t bgen_nsnps;
+  uint32_t bgen_flags;
+  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+  bgen_snp_block_offset -= 4;
+  infile.ignore(4 + bgen_header_length - 20);
+  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+  bgen_snp_block_offset -= 4;
+  bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+  infile.ignore(bgen_snp_block_offset);
+
+  double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
+  double bgen_geno_prob_non_miss;
+
+  uint32_t bgen_N;
+  uint16_t bgen_LS;
+  uint16_t bgen_LR;
+  uint16_t bgen_LC;
+  uint32_t bgen_SNP_pos;
+  uint32_t bgen_LA;
+  std::string bgen_A_allele;
+  uint32_t bgen_LB;
+  std::string bgen_B_allele;
+  uint32_t bgen_P;
+  size_t unzipped_data_size;
+  string id;
+  string rs;
+  string chr;
+  std::cout << "Warning: WJA hard coded SNP missingness threshold "
+            << "of 10%" << std::endl;
+
+  // Start reading genotypes and analyze.
+  size_t csnp = 0, t_last = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // Read SNP header.
+    id.clear();
+    rs.clear();
+    chr.clear();
+    bgen_A_allele.clear();
+    bgen_B_allele.clear();
+
+    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+    id.resize(bgen_LS);
+    infile.read(&id[0], bgen_LS);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+    rs.resize(bgen_LR);
+    infile.read(&rs[0], bgen_LR);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+    chr.resize(bgen_LC);
+    infile.read(&chr[0], bgen_LC);
+
+    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+    bgen_A_allele.resize(bgen_LA);
+    infile.read(&bgen_A_allele[0], bgen_LA);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+    bgen_B_allele.resize(bgen_LB);
+    infile.read(&bgen_B_allele[0], bgen_LB);
+
+    uint16_t unzipped_data[3 * bgen_N];
+
+    if (indicator_snp[t] == 0) {
+      if (CompressedSNPBlocks)
+        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      else
+        bgen_P = 6 * bgen_N;
+
+      infile.ignore(static_cast<size_t>(bgen_P));
+
+      continue;
+    }
+
+    if (CompressedSNPBlocks) {
+
+      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      uint8_t zipped_data[bgen_P];
+
+      unzipped_data_size = 6 * bgen_N;
+
+      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+
+      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+                              reinterpret_cast<uLongf *>(&unzipped_data_size),
+                              reinterpret_cast<Bytef *>(zipped_data),
+                              static_cast<uLong>(bgen_P));
+      assert(result == Z_OK);
+
+    } else {
+
+      bgen_P = 6 * bgen_N;
+      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+    }
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < bgen_N; ++i) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+      bgen_geno_prob_AB =
+          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+      bgen_geno_prob_BB =
+          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+
+      // WJA.
+      bgen_geno_prob_non_miss =
+          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+      if (bgen_geno_prob_non_miss < 0.9) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+
+        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= static_cast<double>(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    csnp++;
+
+    if (csnp % msize == 0 || csnp == t_last) {
+      size_t l = 0;
+      if (csnp % msize == 0) {
+        l = msize;
+      } else {
+        l = csnp % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+        // Initial values.
+        gsl_matrix_memcpy(V_g, V_g_null);
+        gsl_matrix_memcpy(V_e, V_e_null);
+        gsl_matrix_memcpy(B, B_null);
+
+        time_start = clock();
+
+        // 3 is before 1.
+        if (a_mode == 3 || a_mode == 4) {
+          p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+                             V_e_null, UltVehiY, beta, Vbeta);
+          if (p_score < p_nr && crt == 1) {
+            logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                            Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+          }
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+
+          // Calculate beta and Vbeta.
+          p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                           UltVehiY, beta, Vbeta);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+          if (p_lrt < p_nr) {
+            logl_H1 =
+                MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+            // Calculate beta and Vbeta.
+            p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                             UltVehiY, beta, Vbeta);
+            p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+            if (crt == 1) {
+              p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+          p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                            UltVehiY, beta, Vbeta);
+
+          if (p_wald < p_nr) {
+            logl_H1 =
+                MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                              UltVehiY, beta, Vbeta);
+
+            if (crt == 1) {
+              p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        for (size_t i = 0; i < d_size; i++) {
+          v_beta[i] = gsl_vector_get(beta, i);
+        }
+
+        c = 0;
+        for (size_t i = 0; i < d_size; i++) {
+          for (size_t j = i; j < d_size; j++) {
+            v_Vg[c] = gsl_matrix_get(V_g, i, j);
+            v_Ve[c] = gsl_matrix_get(V_e, i, j);
+            v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+            c++;
+          }
+        }
+
+        MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  return;
 }
 
-void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
-			   const gsl_matrix *UtW, const gsl_matrix *UtY) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	string line;
-	char *ch_ptr;
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_miss, c_phen;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
-	// Each dcxdc block is x_k \otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_vector *x=gsl_vector_alloc (n_size);
-	gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub =
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-
-	gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
-	gsl_vector_view X_row=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row.vector);
-	gsl_vector_view B_col=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
-		       &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian,
-		       crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size,
-						     c+v_size) );
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
-		       Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-
-	// Start reading genotypes and analyze.
-	size_t csnp=0, t_last=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  !safeGetline(infile, line).eof();
-	  if (t%d_pace==0 || t==(ns_total-1)) {
-	    ProgressBar ("Reading SNPs  ", t, ns_total-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-	  
-	  ch_ptr=strtok ((char *)line.c_str(), " , \t");
-	  ch_ptr=strtok (NULL, " , \t");
-	  ch_ptr=strtok (NULL, " , \t");
-	  
-	  x_mean=0.0; c_phen=0; n_miss=0;
-	  gsl_vector_set_zero(x_miss);
-	  for (size_t i=0; i<ni_total; ++i) {
-	    ch_ptr=strtok (NULL, " , \t");
-	    if (indicator_idv[i]==0) {continue;}
-	    
-	    if (strcmp(ch_ptr, "NA")==0) {
-	      gsl_vector_set(x_miss, c_phen, 0.0);
-	      n_miss++;
-	    }
-	    else {
-	      geno=atof(ch_ptr);
-	      
-	      gsl_vector_set(x, c_phen, geno);
-	      gsl_vector_set(x_miss, c_phen, 1.0);
-	      x_mean+=geno;
-	    }
-	    c_phen++;
-	  }
-	  
-	  x_mean/=(double)(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
-	    geno=gsl_vector_get(x, i);
-	  }
-
-	  gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
-	  gsl_vector_memcpy (&Xlarge_col.vector, x);
-	  csnp++;
-	  
-	  if (csnp%msize==0 || csnp==t_last ) {
-	    size_t l=0;
-	    if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-	    
-	    gsl_matrix_view Xlarge_sub =
-	      gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-	    gsl_matrix_view UtXlarge_sub =
-	      gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-	    
-	    time_start=clock();
-	    eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-			    &UtXlarge_sub.matrix);
-	    time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	    
-	    gsl_matrix_set_zero (Xlarge);
-	    
-	    for (size_t i=0; i<l; i++) {
-	      gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
-	      gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-	      
-	      // Initial values.
-	      gsl_matrix_memcpy (V_g, V_g_null);
-	      gsl_matrix_memcpy (V_e, V_e_null);
-	      gsl_matrix_memcpy (B, B_null);
-	      
-	      time_start=clock();
-	      
-	      // 3 is before 1.
-	      if (a_mode==3 || a_mode==4) {
-		p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				  V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-		if (p_score<p_nr && crt==1) {
-		  logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
-				 xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
-				 crt_b, crt_c);
-		  p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-		}
-	      }
-
-	      if (a_mode==2 || a_mode==4) {
-		logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		
-		// Calculate beta and Vbeta.
-		p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				V_g, V_e, UltVehiY, beta, Vbeta);
-		p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-		
-		if (p_lrt<p_nr) {
-		  logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  
-		  // Calculate beta and Vbeta.
-		  p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				  V_g, V_e, UltVehiY, beta, Vbeta);
-		  p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
-					 (double)d_size );
-
-		  if (crt==1) {
-		    p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      if (a_mode==1 || a_mode==4) {
-		logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				 V_e, UltVehiY, beta, Vbeta);
-		
-		if (p_wald<p_nr) {
-		  logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e,
-				 Hessian, crt_a, crt_b, crt_c);
-		  p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				   V_g, V_e, UltVehiY, beta, Vbeta);
-		  
-		  if (crt==1) {
-		    p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	      // Store summary data.
-	      for (size_t i=0; i<d_size; i++) {
-		v_beta[i]=gsl_vector_get (beta, i);
-	      }
-	      
-	      c=0;
-	      for (size_t i=0; i<d_size; i++) {
-		for (size_t j=i; j<d_size; j++) {
-		  v_Vg[c]=gsl_matrix_get (V_g, i, j);
-		  v_Ve[c]=gsl_matrix_get (V_e, i, j);
-		  v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-		  c++;
-		}
-	      }
-	      
-	      MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
-			       v_Ve, v_Vbeta};
-	      sumStat.push_back(SNPs);
-	    }
-	  }
-	}
-	cout<<endl;
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-	gsl_vector_free(x_miss);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null);
-
-	gsl_matrix_free(Xlarge);
-	gsl_matrix_free(UtXlarge);
-
-	return;
+void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+                          const gsl_matrix *UtW, const gsl_matrix *UtY) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  string line;
+  char *ch_ptr;
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_miss, c_phen;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k \otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_vector *x = gsl_vector_alloc(n_size);
+  gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+
+  gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+  gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row.vector);
+  gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub.matrix);
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Start reading genotypes and analyze.
+  size_t csnp = 0, t_last = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+      geno = gsl_vector_get(x, i);
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    csnp++;
+
+    if (csnp % msize == 0 || csnp == t_last) {
+      size_t l = 0;
+      if (csnp % msize == 0) {
+        l = msize;
+      } else {
+        l = csnp % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+        // Initial values.
+        gsl_matrix_memcpy(V_g, V_g_null);
+        gsl_matrix_memcpy(V_e, V_e_null);
+        gsl_matrix_memcpy(B, B_null);
+
+        time_start = clock();
+
+        // 3 is before 1.
+        if (a_mode == 3 || a_mode == 4) {
+          p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+                             V_e_null, UltVehiY, beta, Vbeta);
+          if (p_score < p_nr && crt == 1) {
+            logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                            Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+          }
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+
+          // Calculate beta and Vbeta.
+          p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                           UltVehiY, beta, Vbeta);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+          if (p_lrt < p_nr) {
+            logl_H1 =
+                MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+            // Calculate beta and Vbeta.
+            p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                             UltVehiY, beta, Vbeta);
+            p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+            if (crt == 1) {
+              p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+          p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                            UltVehiY, beta, Vbeta);
+
+          if (p_wald < p_nr) {
+            logl_H1 =
+                MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                              UltVehiY, beta, Vbeta);
+
+            if (crt == 1) {
+              p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        for (size_t i = 0; i < d_size; i++) {
+          v_beta[i] = gsl_vector_get(beta, i);
+        }
+
+        c = 0;
+        for (size_t i = 0; i < d_size; i++) {
+          for (size_t j = i; j < d_size; j++) {
+            v_Vg[c] = gsl_matrix_get(V_g, i, j);
+            v_Ve[c] = gsl_matrix_get(V_e, i, j);
+            v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+            c++;
+          }
+        }
+
+        MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  return;
 }
 
-void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
-			  const gsl_matrix *UtW, const gsl_matrix *UtY) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	char ch[1];
-	bitset<8> b;
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_bit, n_miss, ci_total, ci_test;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-	
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_vector *x=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub =
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-	gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
-	gsl_vector_view X_row=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row.vector);
-	gsl_vector_view B_col=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
-
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
-		       &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian,
-		       crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
-						    c+v_size));
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-		cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y,
-		       U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
-		       UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
-		       Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-	
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-	
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-	
-	// Start reading genotypes and analyze.
-	// Calculate n_bit and c, the number of bit for each snp.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-	  infile.read(ch,1);
-	  b=ch[0];
-	}
-
-	size_t csnp=0, t_last=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-	  if (t%d_pace==0 || t==snpInfo.size()-1) {
-	    ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-
-	  // n_bit, and 3 is the number of magic numbers.
-	  infile.seekg(t*n_bit+3);		
-
-	  //read genotypes
-	  x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0;
-	  for (int i=0; i<n_bit; ++i) {
-	    infile.read(ch,1);
-	    b=ch[0];
-
-	    // Minor allele homozygous: 2.0; major: 0.0;
-	    for (size_t j=0; j<4; ++j) {                
-	      if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
-	      if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-	      
-	      if (b[2*j]==0) {
-		if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
-		else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
-	      }
-	      else {
-		if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
-		else {gsl_vector_set(x, ci_test, -9); n_miss++; }
-	      }
-	      
-	      ci_total++;
-	      ci_test++;
-			}
-	  }
-	  
-	  x_mean/=(double)(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    geno=gsl_vector_get(x,i);
-	    if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
-	  }
-
-	  gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
-	  gsl_vector_memcpy (&Xlarge_col.vector, x);
-	  csnp++;
-	  
-	  if (csnp%msize==0 || csnp==t_last ) {
-	    size_t l=0;
-	    if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-	    
-	    gsl_matrix_view Xlarge_sub =
-	      gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-	    gsl_matrix_view UtXlarge_sub =
-	      gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-	    
-	    time_start=clock();
-	    eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-			    &UtXlarge_sub.matrix);
-	    time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	    
-	    gsl_matrix_set_zero (Xlarge);
-	    
-	    for (size_t i=0; i<l; i++) {
-	      gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
-	      gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-	      
-	      // Initial values.
-	      gsl_matrix_memcpy (V_g, V_g_null);
-	      gsl_matrix_memcpy (V_e, V_e_null);
-	      gsl_matrix_memcpy (B, B_null);
-	      
-	      time_start=clock();
-	      
-	      // 3 is before 1.
-	      if (a_mode==3 || a_mode==4) {
-		p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				  V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-		
-		if (p_score<p_nr && crt==1) {
-		  logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
-				 xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
-				 crt_b, crt_c);
-		  p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-		}
-	      }
-	      
-	      if (a_mode==2 || a_mode==4) {
-		logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		
-		// Calculate beta and Vbeta.
-		p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				V_e, UltVehiY, beta, Vbeta);
-		p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-		
-		if (p_lrt<p_nr) {
-		  logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  
-		  // Calculate beta and Vbeta.
-		  p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				  V_e, UltVehiY, beta, Vbeta);
-		  p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
-					 (double)d_size );
-		  if (crt==1) {
-		    p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      if (a_mode==1 || a_mode==4) {
-		logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				 V_e, UltVehiY, beta, Vbeta);
-		
-		if (p_wald<p_nr) {
-		  logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				   V_g, V_e, UltVehiY, beta, Vbeta);
-		  
-		  if (crt==1) {
-		    p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	      // Store summary data.
-	      for (size_t i=0; i<d_size; i++) {
-		v_beta[i]=gsl_vector_get (beta, i);
-	      }
-	      
-	      c=0;
-	      for (size_t i=0; i<d_size; i++) {
-		for (size_t j=i; j<d_size; j++) {
-		  v_Vg[c]=gsl_matrix_get (V_g, i, j);
-		  v_Ve[c]=gsl_matrix_get (V_e, i, j);
-		  v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-		  c++;
-		}
-	      }
-	      
-	      MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
-			       v_Ve, v_Vbeta};
-	      sumStat.push_back(SNPs);
-	    }
-	  }
-	}
-	cout<<endl;
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null);
-
-	gsl_matrix_free(Xlarge);
-	gsl_matrix_free(UtXlarge);
-
-	return;
+void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+                         const gsl_matrix *UtW, const gsl_matrix *UtY) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  char ch[1];
+  bitset<8> b;
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_bit, n_miss, ci_total, ci_test;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Create a large matrix.
+  size_t msize = 10000;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_vector *x = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+  gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+  gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row.vector);
+  gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub.matrix);
+
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Start reading genotypes and analyze.
+  // Calculate n_bit and c, the number of bit for each snp.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  size_t csnp = 0, t_last = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // read genotypes
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0;
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(x, ci_test, 2);
+            x_mean += 2.0;
+          } else {
+            gsl_vector_set(x, ci_test, 1);
+            x_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(x, ci_test, 0);
+          } else {
+            gsl_vector_set(x, ci_test, -9);
+            n_miss++;
+          }
+        }
+
+        ci_total++;
+        ci_test++;
+      }
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    csnp++;
+
+    if (csnp % msize == 0 || csnp == t_last) {
+      size_t l = 0;
+      if (csnp % msize == 0) {
+        l = msize;
+      } else {
+        l = csnp % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+        // Initial values.
+        gsl_matrix_memcpy(V_g, V_g_null);
+        gsl_matrix_memcpy(V_e, V_e_null);
+        gsl_matrix_memcpy(B, B_null);
+
+        time_start = clock();
+
+        // 3 is before 1.
+        if (a_mode == 3 || a_mode == 4) {
+          p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+                             V_e_null, UltVehiY, beta, Vbeta);
+
+          if (p_score < p_nr && crt == 1) {
+            logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                            Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+          }
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+
+          // Calculate beta and Vbeta.
+          p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                           UltVehiY, beta, Vbeta);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+          if (p_lrt < p_nr) {
+            logl_H1 =
+                MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+            // Calculate beta and Vbeta.
+            p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                             UltVehiY, beta, Vbeta);
+            p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+            if (crt == 1) {
+              p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+          p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                            UltVehiY, beta, Vbeta);
+
+          if (p_wald < p_nr) {
+            logl_H1 =
+                MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                              UltVehiY, beta, Vbeta);
+
+            if (crt == 1) {
+              p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        for (size_t i = 0; i < d_size; i++) {
+          v_beta[i] = gsl_vector_get(beta, i);
+        }
+
+        c = 0;
+        for (size_t i = 0; i < d_size; i++) {
+          for (size_t j = i; j < d_size; j++) {
+            v_Vg[c] = gsl_matrix_get(V_g, i, j);
+            v_Ve[c] = gsl_matrix_get(V_e, i, j);
+            v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+            c++;
+          }
+        }
+
+        MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  return;
 }
 
 // Calculate Vg, Ve, B, se(B) in the null mvLMM model.
 // Both B and se_B are d by c matrices.
-void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
-			const gsl_matrix *UtY, const size_t em_iter,
-			const size_t nr_iter, const double em_prec,
-			const double nr_prec, const double l_min,
-			const double l_max, const size_t n_region,
-			gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
-			gsl_matrix *se_B) {
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-	size_t dc_size=d_size*c_size, v_size=d_size*(d_size+1)/2;
-
-	double logl, crt_a, crt_b, crt_c;
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-	
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);		
-
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);	
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	// Transpose matrices.
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *W=gsl_matrix_alloc (c_size, n_size);
-	gsl_matrix_transpose_memcpy (Y, UtY);
-	gsl_matrix_transpose_memcpy (W, UtW);
-
-	// Initial, EM, NR, and calculate B.
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, 
-		   l_min, l_max, n_region, V_g, V_e, B);
-	logl=MphEM ('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, 
-		    OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-		    UltVehiE, V_g, V_e, B);
-	logl=MphNR ('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, 
-		    Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
-
-	// Free matrices.
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(W);
-
-	return;
+void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+                       const gsl_matrix *UtY, const size_t em_iter,
+                       const size_t nr_iter, const double em_prec,
+                       const double nr_prec, const double l_min,
+                       const double l_max, const size_t n_region,
+                       gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
+                       gsl_matrix *se_B) {
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+  size_t dc_size = d_size * c_size, v_size = d_size * (d_size + 1) / 2;
+
+  double logl, crt_a, crt_b, crt_c;
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  // Transpose matrices.
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *W = gsl_matrix_alloc(c_size, n_size);
+  gsl_matrix_transpose_memcpy(Y, UtY);
+  gsl_matrix_transpose_memcpy(W, UtW);
+
+  // Initial, EM, NR, and calculate B.
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max,
+             n_region, V_g, V_e, B);
+  logl = MphEM('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE,
+               UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+  logl = MphNR('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g,
+               V_e, Hessian, crt_a, crt_b, crt_c);
+  MphCalcBeta(eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
+
+  // Free matrices.
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(W);
+
+  return;
 }
 
-void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, 
-			      const gsl_matrix *UtW, const gsl_matrix *UtY, 
-			      const gsl_vector *env) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl; 
-	  return;
-	}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	string line;
-	char *ch_ptr;
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_miss, c_phen;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_vector *x=gsl_vector_alloc (n_size);
-	gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values; including env but not
-	// including x.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
-	gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size);
-	gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1);
-	gsl_matrix_view xHi_all_sub1=
-	  gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size);
-
-	gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub2=
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-
-	gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size);
-	gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
-	gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
-	gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
-
-	gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1);
-	gsl_vector_set_zero(&X_row1.vector);
-	gsl_vector_view X_row2=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row2.vector);
-
-	gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1);
-	gsl_vector_set_zero(&B_col1.vector);
-	gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col2.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, 
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, 
-		       U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, 
-		       UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, 
-		       Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, 
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, 
-		     &B_sub1.matrix, se_B_null1);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
-						    c+v_size));
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null1->size1; i++) {
-	  for (size_t j=0; j<se_B_null1->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-	
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, 
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-		       UltVehiE, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, 
-		       Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, 
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, 
-		     &B_sub1.matrix, se_B_null1);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null1->size1; i++) {
-	  for (size_t j=0; j<se_B_null1->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-	
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-
-	// Start reading genotypes and analyze.
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  !safeGetline(infile, line).eof();
-	  if (t%d_pace==0 || t==(ns_total-1)) {
-	    ProgressBar ("Reading SNPs  ", t, ns_total-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-	  
-	  ch_ptr=strtok ((char *)line.c_str(), " , \t");
-	  ch_ptr=strtok (NULL, " , \t");
-	  ch_ptr=strtok (NULL, " , \t");
-	  
-	  x_mean=0.0; c_phen=0; n_miss=0;
-	  gsl_vector_set_zero(x_miss);
-	  for (size_t i=0; i<ni_total; ++i) {
-	    ch_ptr=strtok (NULL, " , \t");
-	    if (indicator_idv[i]==0) {continue;}
-	    
-	    if (strcmp(ch_ptr, "NA")==0) {
-	      gsl_vector_set(x_miss, c_phen, 0.0); 
-	      n_miss++;
-	    }
-	    else {
-	      geno=atof(ch_ptr);
-	      
-	      gsl_vector_set(x, c_phen, geno);
-	      gsl_vector_set(x_miss, c_phen, 1.0);
-	      x_mean+=geno;
-	    }
-	    c_phen++;
-	  }
-	  
-	  x_mean/=(double)(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
-	    geno=gsl_vector_get(x, i);
-	    if (x_mean>1) {
-	      gsl_vector_set(x, i, 2-geno);
-	    }
-	  }
-	  
-	  // Calculate statistics.
-	  time_start=clock();
-	  gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
-	  gsl_vector_mul (x, env);
-	  gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
-	  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	  
-		//initial values
-		gsl_matrix_memcpy (V_g, V_g_null);
-		gsl_matrix_memcpy (V_e, V_e_null);
-		gsl_matrix_memcpy (B, B_null);
-
-		if (a_mode==2 || a_mode==3 || a_mode==4) {
-		  if (a_mode==3 || a_mode==4) {
-		    logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, 
-				   &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, 
-				   OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-				   UltVehiE, V_g, V_e, &B_sub2.matrix);
-		    logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, 
-				   &X_sub2.matrix, Y, Hi_all, 
-				   &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, 
-				   Hessian, crt_a, crt_b, crt_c);
-		    MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, 
-				 &B_sub2.matrix, se_B_null2);
-		  }
-
-		  if (a_mode==2 || a_mode==4) {
-		    logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, 
-				   &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, 
-				   OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-				   UltVehiE, V_g, V_e, &B_sub2.matrix);
-		    logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, 
-				   &X_sub2.matrix, Y, Hi_all, 
-				   &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, 
-				   Hessian, crt_a, crt_b, crt_c);
-		    MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, 
-				 &B_sub2.matrix, se_B_null2);
-		  }
-		}
-
-		time_start=clock();
-
-		// 3 is before 1.
-		if (a_mode==3 || a_mode==4) {
-		  p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				    V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-		  if (p_score<p_nr && crt==1) {
-		    logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, 
-				   xHi_all, Hiy_all, V_g, V_e, Hessian, 
-				   crt_a, crt_b, crt_c);
-		    p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-		  }
-		}
-		
-		if (a_mode==2 || a_mode==4) {
-		  logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, 
-				 U_hat, E_hat, OmegaU, OmegaE, UltVehiY, 
-				 UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-
-		  // Calculate beta and Vbeta.
-		  p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				  V_g, V_e, UltVehiY, beta, Vbeta);
-		  p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),(double)d_size);
-		  
-		  if (p_lrt<p_nr) {
-		    logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, 
-				   Hi_all, xHi_all, Hiy_all, V_g, V_e, 
-				   Hessian, crt_a, crt_b, crt_c);
-		    
-		    // Calculate beta and Vbeta.
-		    p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				    V_g, V_e, UltVehiY, beta, Vbeta);
-		    p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),
-					  (double)d_size );
-		    
-		    if (crt==1) {
-		      p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-		    }
-		  }
-		}
-		
-		if (a_mode==1 || a_mode==4) {
-		  logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, 
-				 U_hat, E_hat, OmegaU, OmegaE, UltVehiY, 
-				 UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		  p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				   V_g, V_e, UltVehiY, beta, Vbeta);
-		  
-		  if (p_wald<p_nr) {
-		    logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, 
-				   Hi_all, xHi_all, Hiy_all, V_g, V_e, 
-				   Hessian, crt_a, crt_b, crt_c);
-		    p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				     V_g, V_e, UltVehiY, beta, Vbeta);
-		    
-		    if (crt==1) {
-		      p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-		    }
-		  }
-		}
-
-		if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		// Store summary data.
-		for (size_t i=0; i<d_size; i++) {
-		  v_beta[i]=gsl_vector_get (beta, i);
-		}
-
-		c=0;
-		for (size_t i=0; i<d_size; i++) {
-		  for (size_t j=i; j<d_size; j++) {
-		    v_Vg[c]=gsl_matrix_get (V_g, i, j);
-		    v_Ve[c]=gsl_matrix_get (V_e, i, j);
-		    v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-		    c++;
-		  }
-		}
-
-		MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, 
-				 v_Ve, v_Vbeta};
-		sumStat.push_back(SNPs);
-	}
-	cout<<endl;
-
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-	gsl_vector_free(x_miss);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null1);
-	gsl_matrix_free(se_B_null2);
-
-	return;
+void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+                             const gsl_matrix *UtW, const gsl_matrix *UtY,
+                             const gsl_vector *env) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  string line;
+  char *ch_ptr;
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_miss, c_phen;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2;
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_vector *x = gsl_vector_alloc(n_size);
+  gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values; including env but not
+  // including x.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1);
+  gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size);
+  gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1);
+  gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix(
+      xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size);
+
+  gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub2 =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+
+  gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size);
+  gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW);
+  gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2);
+  gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
+
+  gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1);
+  gsl_vector_set_zero(&X_row1.vector);
+  gsl_vector_view X_row2 = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row2.vector);
+
+  gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1);
+  gsl_vector_set_zero(&B_col1.vector);
+  gsl_vector_view B_col2 = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col2.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub1.matrix);
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub1.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+                  &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+                  crt_b, crt_c);
+  MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+              se_B_null1);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null1->size1; i++) {
+    for (size_t j = 0; j < se_B_null1->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub1.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+                  &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+                  crt_b, crt_c);
+  MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+              se_B_null1);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null1->size1; i++) {
+    for (size_t j = 0; j < se_B_null1->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Start reading genotypes and analyze.
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+      geno = gsl_vector_get(x, i);
+      if (x_mean > 1) {
+        gsl_vector_set(x, i, 2 - geno);
+      }
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+    gsl_vector_mul(x, env);
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // initial values
+    gsl_matrix_memcpy(V_g, V_g_null);
+    gsl_matrix_memcpy(V_e, V_e_null);
+    gsl_matrix_memcpy(B, B_null);
+
+    if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
+      if (a_mode == 3 || a_mode == 4) {
+        logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+                        Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+                        UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+        logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+                        Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+                        Hessian, crt_a, crt_b, crt_c);
+        MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+                    se_B_null2);
+      }
+
+      if (a_mode == 2 || a_mode == 4) {
+        logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+                        Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+                        UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+        logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+                        Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+                        Hessian, crt_a, crt_b, crt_c);
+        MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+                    se_B_null2);
+      }
+    }
+
+    time_start = clock();
+
+    // 3 is before 1.
+    if (a_mode == 3 || a_mode == 4) {
+      p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null,
+                         V_e_null, UltVehiY, beta, Vbeta);
+      if (p_score < p_nr && crt == 1) {
+        logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                        Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+        p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+      }
+    }
+
+    if (a_mode == 2 || a_mode == 4) {
+      logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                      V_g, V_e, B);
+
+      // Calculate beta and Vbeta.
+      p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                       UltVehiY, beta, Vbeta);
+      p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+      if (p_lrt < p_nr) {
+        logl_H1 =
+            MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                  Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+        // Calculate beta and Vbeta.
+        p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                         UltVehiY, beta, Vbeta);
+        p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+        if (crt == 1) {
+          p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+        }
+      }
+    }
+
+    if (a_mode == 1 || a_mode == 4) {
+      logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                      V_g, V_e, B);
+      p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                        UltVehiY, beta, Vbeta);
+
+      if (p_wald < p_nr) {
+        logl_H1 =
+            MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                  Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+        p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                          UltVehiY, beta, Vbeta);
+
+        if (crt == 1) {
+          p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+        }
+      }
+    }
+
+    if (x_mean > 1) {
+      gsl_vector_scale(beta, -1.0);
+    }
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    for (size_t i = 0; i < d_size; i++) {
+      v_beta[i] = gsl_vector_get(beta, i);
+    }
+
+    c = 0;
+    for (size_t i = 0; i < d_size; i++) {
+      for (size_t j = i; j < d_size; j++) {
+        v_Vg[c] = gsl_matrix_get(V_g, i, j);
+        v_Ve[c] = gsl_matrix_get(V_e, i, j);
+        v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+        c++;
+      }
+    }
+
+    MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null1);
+  gsl_matrix_free(se_B_null2);
+
+  return;
 }
 
-void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, 
-			     const gsl_matrix *UtW, const gsl_matrix *UtY, 
-			     const gsl_vector *env) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl; 
-	  return;
-	}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	char ch[1];
-	bitset<8> b;
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_bit, n_miss, ci_total, ci_test;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-	
-	// Each column is H_k^{-1}y_k
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-	
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);	
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_vector *x=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
-	gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size);
-	gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1);
-	gsl_matrix_view xHi_all_sub1=
-	  gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size);
-
-	gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub2=
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-
-	gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size);
-	gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
-	gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
-	gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
-
-	gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1);
-	gsl_vector_set_zero(&X_row1.vector);
-	gsl_vector_view X_row2=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row2.vector);
-
-	gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1);
-	gsl_vector_set_zero(&B_col1.vector);
-	gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col2.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, 
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
-
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, 
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-		       UltVehiE, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, 
-		       Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, 
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, 
-		     &B_sub1.matrix, se_B_null1);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
-						    c+v_size));
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null1->size1; i++) {
-	  for (size_t j=0; j<se_B_null1->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, 
-		       U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, 
-		       UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, 
-		       Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, 
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, 
-		     &B_sub1.matrix, se_B_null1);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null1->size1; i++) {
-	  for (size_t j=0; j<se_B_null1->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-
-	// Start reading genotypes and analyze.
-	// Calculate n_bit and c, the number of bit for each SNP.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-	  infile.read(ch,1);
-	  b=ch[0];
-	}
-
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-	  if (t%d_pace==0 || t==snpInfo.size()-1) {
-	    ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-	  
-	  // n_bit, and 3 is the number of magic numbers.
-	  infile.seekg(t*n_bit+3);
-
-	  // Read genotypes.
-	  x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0;
-	  for (int i=0; i<n_bit; ++i) {
-	    infile.read(ch,1);
-	    b=ch[0];
-
-	    // Minor allele homozygous: 2.0; major: 0.0.
-	    for (size_t j=0; j<4; ++j) {                
-	      
-	      if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
-	      if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-	      
-	      if (b[2*j]==0) {
-		if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
-		else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
-	      }
-	      else {
-		if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
-		else {gsl_vector_set(x, ci_test, -9); n_miss++; }
-	      }
-	      
-	      ci_total++;
-	      ci_test++;
-	    }
-	  }
-	  
-	  x_mean/=(double)(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    geno=gsl_vector_get(x,i);
-	    if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
-	    if (x_mean>1) {
-	      gsl_vector_set(x, i, 2-geno);
-	    }
-	  }
-	  
-	  // Calculate statistics.
-	  time_start=clock();
-	  gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
-	  gsl_vector_mul (x, env);
-	  gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
-	  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	  
-	  // Initial values.
-	  gsl_matrix_memcpy (V_g, V_g_null);
-	  gsl_matrix_memcpy (V_e, V_e_null);
-	  gsl_matrix_memcpy (B, B_null);
-	  
-	  if (a_mode==2 || a_mode==3 || a_mode==4) {
-	    if (a_mode==3 || a_mode==4) {
-	      logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, 
-			     &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, 
-			     UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, 
-			     V_e, &B_sub2.matrix);
-	      logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, 
-			     &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, 
-			     Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-	      MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, 
-			   &B_sub2.matrix, se_B_null2);
-	    }
-	    
-	    if (a_mode==2 || a_mode==4) {
-	      logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, 
-			     &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, 
-			     UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, 
-			     V_e, &B_sub2.matrix);
-	      logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, 
-			     &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, 
-			     Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-	      MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, 
-			   &B_sub2.matrix, se_B_null2);
-	    }
-	  }
-	  
-	  time_start=clock();
-	  
-	  // 3 is before 1.
-	  if (a_mode==3 || a_mode==4) {
-	    p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-			      V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-	    
-	    if (p_score<p_nr && crt==1) {
-	      logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, 
-			     Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-	      p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-	    }
-	  }
-	  
-	  if (a_mode==2 || a_mode==4) {
-	    logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, 
-			   E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, 
-			   UltVehiU, UltVehiE, V_g, V_e, B);
-
-	    // Calculate beta and Vbeta.
-	    p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, 
-			    V_e, UltVehiY, beta, Vbeta);
-	    p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-	    
-	    if (p_lrt<p_nr) {
-	      logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, 
-			     xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, 
-			     crt_b, crt_c);
-	      
-	      // Calculate beta and Vbeta.
-	      p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, 
-			      V_e, UltVehiY, beta, Vbeta);
-	      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-	      if (crt==1) {
-		p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-	      }
-	    }
-	  }
-	  
-	  if (a_mode==1 || a_mode==4) {
-	    logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, 
-			   E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, 
-			   UltVehiU, UltVehiE, V_g, V_e, B);
-	    p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, 
-			     V_e, UltVehiY, beta, Vbeta);
-	    
-	    if (p_wald<p_nr) {
-	      logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, 
-			     xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, 
-			     crt_b, crt_c);
-	      p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, 
-			       V_e, UltVehiY, beta, Vbeta);
-	      
-	      if (crt==1) {
-		p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-	      }
-	    }
-	  }
-	  
-	  if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
-	  time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	  // Store summary data.
-	  for (size_t i=0; i<d_size; i++) {
-	    v_beta[i]=gsl_vector_get (beta, i);
-	  }
-	  
-	  c=0;
-	  for (size_t i=0; i<d_size; i++) {
-	    for (size_t j=i; j<d_size; j++) {
-	      v_Vg[c]=gsl_matrix_get (V_g, i, j);
-	      v_Ve[c]=gsl_matrix_get (V_e, i, j);
-	      v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-	      c++;
-	    }
-	  }
-	  
-	  MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, 
-			   v_Vg, v_Ve, v_Vbeta};
-	  sumStat.push_back(SNPs);
-	}
-	cout<<endl;
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null1);
-	gsl_matrix_free(se_B_null2);
-
-	return;
+void MVLMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+                            const gsl_matrix *UtW, const gsl_matrix *UtY,
+                            const gsl_vector *env) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  char ch[1];
+  bitset<8> b;
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_bit, n_miss, ci_total, ci_test;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2;
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_vector *x = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1);
+  gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size);
+  gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1);
+  gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix(
+      xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size);
+
+  gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub2 =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+
+  gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size);
+  gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW);
+  gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2);
+  gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
+
+  gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1);
+  gsl_vector_set_zero(&X_row1.vector);
+  gsl_vector_view X_row2 = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row2.vector);
+
+  gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1);
+  gsl_vector_set_zero(&B_col1.vector);
+  gsl_vector_view B_col2 = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col2.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub1.matrix);
+
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub1.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+                  &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+                  crt_b, crt_c);
+  MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+              se_B_null1);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null1->size1; i++) {
+    for (size_t j = 0; j < se_B_null1->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub1.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+                  &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+                  crt_b, crt_c);
+  MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+              se_B_null1);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null1->size1; i++) {
+    for (size_t j = 0; j < se_B_null1->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Start reading genotypes and analyze.
+  // Calculate n_bit and c, the number of bit for each SNP.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+
+        if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(x, ci_test, 2);
+            x_mean += 2.0;
+          } else {
+            gsl_vector_set(x, ci_test, 1);
+            x_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(x, ci_test, 0);
+          } else {
+            gsl_vector_set(x, ci_test, -9);
+            n_miss++;
+          }
+        }
+
+        ci_total++;
+        ci_test++;
+      }
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+      if (x_mean > 1) {
+        gsl_vector_set(x, i, 2 - geno);
+      }
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+    gsl_vector_mul(x, env);
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Initial values.
+    gsl_matrix_memcpy(V_g, V_g_null);
+    gsl_matrix_memcpy(V_e, V_e_null);
+    gsl_matrix_memcpy(B, B_null);
+
+    if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
+      if (a_mode == 3 || a_mode == 4) {
+        logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+                        Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+                        UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+        logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+                        Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+                        Hessian, crt_a, crt_b, crt_c);
+        MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+                    se_B_null2);
+      }
+
+      if (a_mode == 2 || a_mode == 4) {
+        logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+                        Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+                        UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+        logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+                        Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+                        Hessian, crt_a, crt_b, crt_c);
+        MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+                    se_B_null2);
+      }
+    }
+
+    time_start = clock();
+
+    // 3 is before 1.
+    if (a_mode == 3 || a_mode == 4) {
+      p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null,
+                         V_e_null, UltVehiY, beta, Vbeta);
+
+      if (p_score < p_nr && crt == 1) {
+        logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                        Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+        p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+      }
+    }
+
+    if (a_mode == 2 || a_mode == 4) {
+      logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                      V_g, V_e, B);
+
+      // Calculate beta and Vbeta.
+      p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                       UltVehiY, beta, Vbeta);
+      p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+      if (p_lrt < p_nr) {
+        logl_H1 =
+            MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                  Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+        // Calculate beta and Vbeta.
+        p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                         UltVehiY, beta, Vbeta);
+        p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+        if (crt == 1) {
+          p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+        }
+      }
+    }
+
+    if (a_mode == 1 || a_mode == 4) {
+      logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                      V_g, V_e, B);
+      p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                        UltVehiY, beta, Vbeta);
+
+      if (p_wald < p_nr) {
+        logl_H1 =
+            MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                  Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+        p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                          UltVehiY, beta, Vbeta);
+
+        if (crt == 1) {
+          p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+        }
+      }
+    }
+
+    if (x_mean > 1) {
+      gsl_vector_scale(beta, -1.0);
+    }
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    for (size_t i = 0; i < d_size; i++) {
+      v_beta[i] = gsl_vector_get(beta, i);
+    }
+
+    c = 0;
+    for (size_t i = 0; i < d_size; i++) {
+      for (size_t j = i; j < d_size; j++) {
+        v_Vg[c] = gsl_matrix_get(V_g, i, j);
+        v_Ve[c] = gsl_matrix_get(V_e, i, j);
+        v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+        c++;
+      }
+    }
+
+    MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null1);
+  gsl_matrix_free(se_B_null2);
+
+  return;
 }
diff --git a/src/mvlmm.h b/src/mvlmm.h
index d495c26..4329ad1 100644
--- a/src/mvlmm.h
+++ b/src/mvlmm.h
@@ -19,89 +19,86 @@
 #ifndef __MVLMM_H__
 #define __MVLMM_H__
 
-#include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
 #include "io.h"
+#include "param.h"
 
 using namespace std;
 
 class MVLMM {
 
 public:
-	// IO-related parameters.
-	int a_mode;	// Analysis mode: 1/2/3/4 for Frequentist tests.
-	size_t d_pace;	// Display pace.
-
-	string file_bfile;
-	string file_geno;
-	string file_oxford;
-	string file_out;
-	string path_out;
-
-	// MVLMM-related parameters.
-	double l_min;
-	double l_max;
-	size_t n_region;
-	double logl_remle_H0, logl_mle_H0;
-	vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
-        vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
-        vector<double> VVe_mle_null;
-        vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
-        vector<double> se_beta_mle_null;
-	double p_nr;
-	size_t em_iter, nr_iter;
-	double em_prec, nr_prec;
-	size_t crt;
-
-	// Summary statistics.
-	size_t ni_total, ni_test; // Number of individuals.
-	size_t ns_total, ns_test; // Number of SNPs.
-	size_t n_cvt;
-	size_t n_ph;
-	double time_UtX;	  // Time spent on optimization iterations.
-	double time_opt;	  // Time spent on optimization iterations.
-
-        // Indicator for individuals (phenotypes): 0 missing, 1
-        // available for analysis.
-	vector<int> indicator_idv;
-
-    	// Sequence indicator for SNPs: 0 ignored because of (a) maf,
-    	// (b) miss, (c) non-poly; 1 available for analysis.
-	vector<int> indicator_snp;
-
-	vector<SNPINFO> snpInfo;    // Record SNP information.
-
-	// Not included in PARAM.
-	vector<MPHSUMSTAT> sumStat; // Output SNPSummary Data.
-
-	// Main functions
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
-	void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
-			    const gsl_matrix *UtW, const gsl_matrix *UtY);
-	void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
-			   const gsl_matrix *UtW, const gsl_matrix *UtY);
-	void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
-			  const gsl_matrix *UtW, const gsl_matrix *UtY);
-	void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
-			       const gsl_matrix *UtW, const gsl_matrix *UtY,
-			       const gsl_vector *env);
-        void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
-			      const gsl_matrix *UtW, const gsl_matrix *UtY,
-			      const gsl_vector *env);
-	void WriteFiles ();
-
+  // IO-related parameters.
+  int a_mode;    // Analysis mode: 1/2/3/4 for Frequentist tests.
+  size_t d_pace; // Display pace.
+
+  string file_bfile;
+  string file_geno;
+  string file_oxford;
+  string file_out;
+  string path_out;
+
+  // MVLMM-related parameters.
+  double l_min;
+  double l_max;
+  size_t n_region;
+  double logl_remle_H0, logl_mle_H0;
+  vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
+  vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
+  vector<double> VVe_mle_null;
+  vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
+  vector<double> se_beta_mle_null;
+  double p_nr;
+  size_t em_iter, nr_iter;
+  double em_prec, nr_prec;
+  size_t crt;
+
+  // Summary statistics.
+  size_t ni_total, ni_test; // Number of individuals.
+  size_t ns_total, ns_test; // Number of SNPs.
+  size_t n_cvt;
+  size_t n_ph;
+  double time_UtX; // Time spent on optimization iterations.
+  double time_opt; // Time spent on optimization iterations.
+
+  // Indicator for individuals (phenotypes): 0 missing, 1
+  // available for analysis.
+  vector<int> indicator_idv;
+
+  // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+  // (b) miss, (c) non-poly; 1 available for analysis.
+  vector<int> indicator_snp;
+
+  vector<SNPINFO> snpInfo; // Record SNP information.
+
+  // Not included in PARAM.
+  vector<MPHSUMSTAT> sumStat; // Output SNPSummary Data.
+
+  // Main functions
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
+  void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+                     const gsl_matrix *UtW, const gsl_matrix *UtY);
+  void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+                    const gsl_matrix *UtW, const gsl_matrix *UtY);
+  void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+                   const gsl_matrix *UtW, const gsl_matrix *UtY);
+  void AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+                        const gsl_matrix *UtW, const gsl_matrix *UtY,
+                        const gsl_vector *env);
+  void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+                       const gsl_matrix *UtW, const gsl_matrix *UtY,
+                       const gsl_vector *env);
+  void WriteFiles();
 };
 
-void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
-			const gsl_matrix *UtY, const size_t em_iter,
-			const size_t nr_iter, const double em_prec,
-			const double nr_prec, const double l_min,
-			const double l_max, const size_t n_region,
-			gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
-			gsl_matrix *se_B);
+void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+                       const gsl_matrix *UtY, const size_t em_iter,
+                       const size_t nr_iter, const double em_prec,
+                       const double nr_prec, const double l_min,
+                       const double l_max, const size_t n_region,
+                       gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
+                       gsl_matrix *se_B);
 
 #endif
-
-
diff --git a/src/param.cpp b/src/param.cpp
index 413d517..2572bbb 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -16,1322 +16,1357 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
+#include <algorithm>
+#include <cmath>
+#include <cstring>
 #include <fstream>
+#include <iostream>
 #include <string>
-#include <cstring>
 #include <sys/stat.h>
-#include <cmath>
-#include <algorithm>
 
-#include "gsl/gsl_randist.h"
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_matrix.h"
-#include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
-#include "gsl/gsl_blas.h"
+#include "gsl/gsl_randist.h"
+#include "gsl/gsl_vector.h"
 
 #include "eigenlib.h"
+#include "io.h"
 #include "mathfunc.h"
 #include "param.h"
-#include "io.h"
 
 using namespace std;
 
-PARAM::PARAM(void):
-mode_silence (false), a_mode (0), k_mode(1), d_pace (100000),
-file_out("result"), path_out("./output/"),
-miss_level(0.05), maf_level(0.01), hwe_level(0), r2_level(0.9999),
-l_min(1e-5), l_max(1e5), n_region(10),p_nr(0.001),em_prec(0.0001),
-nr_prec(0.0001),em_iter(10000),nr_iter(100),crt(0),
-pheno_mean(0), noconstrain (false),
-h_min(-1), h_max(-1), h_scale(-1),
-rho_min(0.0), rho_max(1.0), rho_scale(-1),
-logp_min(0.0), logp_max(0.0), logp_scale(-1),
-h_ngrid(10), rho_ngrid(10),
-s_min(0), s_max(300),
-w_step(100000),	s_step(1000000),
-r_pace(10), w_pace(1000),
-n_accept(0),
-n_mh(10),
-geo_mean(2000.0),
-randseed(-1),
-window_cm(0), window_bp(0), window_ns(0), n_block(200),
-error(false),
-ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0),
-time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0),
-time_UtZ(0.0), time_opt(0.0), time_Omega(0.0)
-{}
+PARAM::PARAM(void)
+    : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000),
+      file_out("result"), path_out("./output/"), miss_level(0.05),
+      maf_level(0.01), hwe_level(0), r2_level(0.9999), l_min(1e-5), l_max(1e5),
+      n_region(10), p_nr(0.001), em_prec(0.0001), nr_prec(0.0001),
+      em_iter(10000), nr_iter(100), crt(0), pheno_mean(0), noconstrain(false),
+      h_min(-1), h_max(-1), h_scale(-1), rho_min(0.0), rho_max(1.0),
+      rho_scale(-1), logp_min(0.0), logp_max(0.0), logp_scale(-1), h_ngrid(10),
+      rho_ngrid(10), s_min(0), s_max(300), w_step(100000), s_step(1000000),
+      r_pace(10), w_pace(1000), n_accept(0), n_mh(10), geo_mean(2000.0),
+      randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200),
+      error(false), ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0),
+      time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0),
+      time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {}
 
 // Read files: obtain ns_total, ng_total, ns_test, ni_test.
-void PARAM::ReadFiles (void) {
-	string file_str;
-
-	// Read cat file.
-	if (!file_mcat.empty()) {
-	  if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;}
-	} else if (!file_cat.empty()) {
-	  if (ReadFile_cat (file_cat, mapRS2cat, n_vc)==false) {error=true;}
-	}
-
-	// Read snp weight files.
-	if (!file_wcat.empty()) {
-	  if (ReadFile_wsnp (file_wcat, n_vc, mapRS2wcat)==false) {error=true;}
-	}
-	if (!file_wsnp.empty()) {
-	  if (ReadFile_wsnp (file_wsnp, mapRS2wsnp)==false) {error=true;}
-	}
-
-	// Count number of kinship files.
-	if (!file_mk.empty()) {
-	  if (CountFileLines (file_mk, n_vc)==false) {error=true;}
-	}
-
-	// Read SNP set.
-	if (!file_snps.empty()) {
-		if (ReadFile_snps (file_snps, setSnps)==false) {error=true;}
-	} else {
-		setSnps.clear();
-	}
-
-	// For prediction.
-	if (!file_epm.empty()) {
-		if (ReadFile_est (file_epm, est_column, mapRS2est)==false) {
-		  error=true;
-		}
-		if (!file_bfile.empty()) {
-			file_str=file_bfile+".bim";
-			if (ReadFile_bim (file_str, snpInfo)==false) {
-			  error=true;
-			}
-			file_str=file_bfile+".fam";
-			if (ReadFile_fam (file_str, indicator_pheno, pheno,
-					  mapID2num, p_column)==false) {
-			  error=true;
-			}
-		}
-
-		if (!file_geno.empty()) {
-			if (ReadFile_pheno (file_pheno, indicator_pheno,
-					    pheno, p_column)==false) {
-			  error=true;
-			}
-
-			if (CountFileLines (file_geno, ns_total)==false) {
-			  error=true;
-			}
-		}
-
-		if (!file_ebv.empty() ) {
-			if (ReadFile_column (file_ebv, indicator_bv,
-					     vec_bv, 1)==false) {
-			  error=true;
-			}
-		}
-
-		if (!file_log.empty() ) {
-			if (ReadFile_log (file_log, pheno_mean)==false) {
-			  error=true;
-			}
-		}
-
-		// Convert indicator_pheno to indicator_idv.
-		int k=1;
-		for (size_t i=0; i<indicator_pheno.size(); i++) {
-			k=1;
-			for (size_t j=0; j<indicator_pheno[i].size(); j++) {
-				if (indicator_pheno[i][j]==0) {k=0;}
-			}
-			indicator_idv.push_back(k);
-		}
-
-		ns_test=0;
-
-		return;
-	}
-
-	// Read covariates before the genotype files.
-	if (!file_cvt.empty() ) {
-		if (ReadFile_cvt (file_cvt, indicator_cvt,
-				  cvt, n_cvt)==false) {
-		  error=true;
-		}
-		if ((indicator_cvt).size()==0) {
-			n_cvt=1;
-		}
-	} else {
-		n_cvt=1;
-	}
-
-	if (!file_gxe.empty() ) {
-	  if (ReadFile_column (file_gxe, indicator_gxe, gxe, 1)==false) {
-	    error=true;
-	  }
-	}
-	if (!file_weight.empty() ) {
-	  if (ReadFile_column (file_weight, indicator_weight,
-			       weight, 1)==false) {
-	    error=true;
-	  }
-	}
-
-	// WJA added.
-	// Read genotype and phenotype file for bgen format.
-	if (!file_oxford.empty()) {
-		file_str=file_oxford+".sample";
-		if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,
-				    indicator_cvt, cvt, n_cvt)==false) {
-		  error=true;
-		}
-		if ((indicator_cvt).size()==0) {
-			n_cvt=1;
-		}
-
-		// Post-process covariates and phenotypes, obtain
-		// ni_test, save all useful covariates.
-		ProcessCvtPhen();
-
-		// Obtain covariate matrix.
-		gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
-		CopyCvt (W);
-
-		file_str=file_oxford+".bgen";
-		if (ReadFile_bgen (file_str, setSnps, W, indicator_idv,
-				   indicator_snp, snpInfo, maf_level,
-				   miss_level, hwe_level, r2_level,
-				   ns_test)==false) {
-		  error=true;
-		}
-		gsl_matrix_free(W);
-
-		ns_total=indicator_snp.size();
-	}
-
-	// Read genotype and phenotype file for PLINK format.
-	if (!file_bfile.empty()) {
-		file_str=file_bfile+".bim";
-		snpInfo.clear();
-		if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
-
-		// If both fam file and pheno files are used, use
-		// phenotypes inside the pheno file.
-		if (!file_pheno.empty()) {
-
-		  // Phenotype file before genotype file.
-		  if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
-				      p_column)==false) {error=true;}
-		} else {
-		  file_str=file_bfile+".fam";
-		  if (ReadFile_fam (file_str, indicator_pheno, pheno,
-				    mapID2num, p_column)==false) {error=true;}
-		}
-
-		// Post-process covariates and phenotypes, obtain
-		// ni_test, save all useful covariates.
-		ProcessCvtPhen();
-
-		// Obtain covariate matrix.
-		gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
-		CopyCvt (W);
-
-		file_str=file_bfile+".bed";
-		if (ReadFile_bed (file_str, setSnps, W, indicator_idv,
-				  indicator_snp, snpInfo, maf_level,
-				  miss_level, hwe_level, r2_level,
-				  ns_test) == false) {
-		  error=true;
-		}
-		gsl_matrix_free(W);
-		ns_total=indicator_snp.size();
-	}
-
-	// Read genotype and phenotype file for BIMBAM format.
-	if (!file_geno.empty()) {
-
-	        // Annotation file before genotype file.
-		if (!file_anno.empty() ) {
-			if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp,
-					   mapRS2cM)==false) {
-			  error=true;
-			}
-		}
-
-		// Phenotype file before genotype file.
-		if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
-				    p_column) == false) {
-		  error=true;
-		}
-
-		// Post-process covariates and phenotypes, obtain
-		// ni_test, save all useful covariates.
-		ProcessCvtPhen();
-
-		// Obtain covariate matrix.
-		gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
-		CopyCvt (W);
-
-		if (ReadFile_geno (file_geno, setSnps, W, indicator_idv,
-				   indicator_snp, maf_level, miss_level,
-				   hwe_level, r2_level, mapRS2chr, mapRS2bp,
-				   mapRS2cM, snpInfo, ns_test)==false) {
-		  error=true;
-		}
-		gsl_matrix_free(W);
-		ns_total=indicator_snp.size();
-	}
-
-	// Read genotype file for multiple PLINK files.
-	if (!file_mbfile.empty()) {
-	  igzstream infile (file_mbfile.c_str(), igzstream::in);
-	  if (!infile) {
-	    cout<<"error! fail to open mbfile file: " << file_mbfile<<endl;
-	    return;
-	  }
-
-	  string file_name;
-	  size_t t=0, ns_test_tmp=0;
-	  gsl_matrix *W;
-	  while (!safeGetline(infile, file_name).eof()) {
-		file_str=file_name+".bim";
-
-		if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
-
-		if (t==0) {
-
-		  // If both fam file and pheno files are used, use
-		  // phenotypes inside the pheno file.
-		  if (!file_pheno.empty()) {
-
-		    // Phenotype file before genotype file.
-		    if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
-					p_column)==false) {
-		      error=true;
-		    }
-		  } else {
-		    file_str=file_name+".fam";
-		    if (ReadFile_fam (file_str, indicator_pheno, pheno,
-				      mapID2num, p_column)==false) {
-		      error=true;
-		    }
-		  }
-
-		  // Post-process covariates and phenotypes, obtain
-		  // ni_test, save all useful covariates.
-		  ProcessCvtPhen();
-
-		  // Obtain covariate matrix.
-		  W=gsl_matrix_alloc (ni_test, n_cvt);
-		  CopyCvt (W);
-		}
-
-		file_str=file_name+".bed";
-		if (ReadFile_bed (file_str, setSnps, W, indicator_idv,
-				  indicator_snp, snpInfo, maf_level,
-				  miss_level, hwe_level, r2_level,
-				  ns_test_tmp)==false) {
-		  error=true;
-		}
-		mindicator_snp.push_back(indicator_snp);
-		msnpInfo.push_back(snpInfo);
-		ns_test+=ns_test_tmp;
-		ns_total+=indicator_snp.size();
-
-		t++;
-	  }
-
-	  gsl_matrix_free(W);
-
-	  infile.close();
-	  infile.clear();
-	}
-
-	// Read genotype and phenotype file for multiple BIMBAM files.
-	if (!file_mgeno.empty()) {
-
-	  // Annotation file before genotype file.
-	  if (!file_anno.empty() ) {
-	    if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp,
-			       mapRS2cM)==false) {
-	      error=true;
-	    }
-	  }
-
-	  // Phenotype file before genotype file.
-	  if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
-			      p_column)==false) {
-	    error=true;
-	  }
-
-	  // Post-process covariates and phenotypes, obtain ni_test,
-	  // save all useful covariates.
-	  ProcessCvtPhen();
-
-	  // Obtain covariate matrix.
-	  gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
-	  CopyCvt (W);
-
-	  igzstream infile (file_mgeno.c_str(), igzstream::in);
-	  if (!infile) {
-	    cout<<"error! fail to open mgeno file: "<<file_mgeno<<endl;
-	    return;
-	  }
-
-	  string file_name;
-	  size_t ns_test_tmp;
-	  while (!safeGetline(infile, file_name).eof()) {
-	    if (ReadFile_geno (file_name, setSnps, W, indicator_idv,
-			       indicator_snp, maf_level, miss_level,
-			       hwe_level, r2_level, mapRS2chr, mapRS2bp,
-			       mapRS2cM, snpInfo, ns_test_tmp)==false) {
-	      error=true;
-	    }
-
-	    mindicator_snp.push_back(indicator_snp);
-	    msnpInfo.push_back(snpInfo);
-	    ns_test+=ns_test_tmp;
-	    ns_total+=indicator_snp.size();
-	  }
-
-	  gsl_matrix_free(W);
-
-	  infile.close();
-	  infile.clear();
-	}
-
-	if (!file_gene.empty()) {
-		if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
-				    p_column)==false) {error=true;}
-
-		// Convert indicator_pheno to indicator_idv.
-		int k=1;
-		for (size_t i=0; i<indicator_pheno.size(); i++) {
-			k=1;
-			for (size_t j=0; j<indicator_pheno[i].size(); j++) {
-				if (indicator_pheno[i][j]==0) {k=0;}
-			}
-			indicator_idv.push_back(k);
-		}
-
-		// Post-process covariates and phenotypes, obtain
-		// ni_test, save all useful covariates.
-		ProcessCvtPhen();
-
-		// Obtain covariate matrix.
-		gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
-		CopyCvt (W);
-
-		if (ReadFile_gene (file_gene, vec_read, snpInfo,
-				   ng_total)==false) {
-		  error=true;
-		}
-	}
-
-	// Read is after gene file.
-	if (!file_read.empty() ) {
-		if (ReadFile_column (file_read, indicator_read,
-				     vec_read, 1)==false) {
-		  error=true;
-		}
-
-		ni_test=0;
-		for (vector<int>::size_type i=0;
-		     i<(indicator_idv).size();
-		     ++i) {
-			indicator_idv[i]*=indicator_read[i];
-			ni_test+=indicator_idv[i];
-		}
-
-		if (ni_test==0) {
-		  error=true;
-		  cout<<"error! number of analyzed individuals equals 0. "<<
-		    endl;
-		  return;
-		}
-	}
-
-	// For ridge prediction, read phenotype only.
-	if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) {
-		if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
-				    p_column)==false) {
-		  error=true;
-		}
-
-		// Post-process covariates and phenotypes, obtain
-		// ni_test, save all useful covariates.
-		ProcessCvtPhen();
-	}
-	return;
+void PARAM::ReadFiles(void) {
+  string file_str;
+
+  // Read cat file.
+  if (!file_mcat.empty()) {
+    if (ReadFile_mcat(file_mcat, mapRS2cat, n_vc) == false) {
+      error = true;
+    }
+  } else if (!file_cat.empty()) {
+    if (ReadFile_cat(file_cat, mapRS2cat, n_vc) == false) {
+      error = true;
+    }
+  }
+
+  // Read snp weight files.
+  if (!file_wcat.empty()) {
+    if (ReadFile_wsnp(file_wcat, n_vc, mapRS2wcat) == false) {
+      error = true;
+    }
+  }
+  if (!file_wsnp.empty()) {
+    if (ReadFile_wsnp(file_wsnp, mapRS2wsnp) == false) {
+      error = true;
+    }
+  }
+
+  // Count number of kinship files.
+  if (!file_mk.empty()) {
+    if (CountFileLines(file_mk, n_vc) == false) {
+      error = true;
+    }
+  }
+
+  // Read SNP set.
+  if (!file_snps.empty()) {
+    if (ReadFile_snps(file_snps, setSnps) == false) {
+      error = true;
+    }
+  } else {
+    setSnps.clear();
+  }
+
+  // For prediction.
+  if (!file_epm.empty()) {
+    if (ReadFile_est(file_epm, est_column, mapRS2est) == false) {
+      error = true;
+    }
+    if (!file_bfile.empty()) {
+      file_str = file_bfile + ".bim";
+      if (ReadFile_bim(file_str, snpInfo) == false) {
+        error = true;
+      }
+      file_str = file_bfile + ".fam";
+      if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, p_column) ==
+          false) {
+        error = true;
+      }
+    }
+
+    if (!file_geno.empty()) {
+      if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) ==
+          false) {
+        error = true;
+      }
+
+      if (CountFileLines(file_geno, ns_total) == false) {
+        error = true;
+      }
+    }
+
+    if (!file_ebv.empty()) {
+      if (ReadFile_column(file_ebv, indicator_bv, vec_bv, 1) == false) {
+        error = true;
+      }
+    }
+
+    if (!file_log.empty()) {
+      if (ReadFile_log(file_log, pheno_mean) == false) {
+        error = true;
+      }
+    }
+
+    // Convert indicator_pheno to indicator_idv.
+    int k = 1;
+    for (size_t i = 0; i < indicator_pheno.size(); i++) {
+      k = 1;
+      for (size_t j = 0; j < indicator_pheno[i].size(); j++) {
+        if (indicator_pheno[i][j] == 0) {
+          k = 0;
+        }
+      }
+      indicator_idv.push_back(k);
+    }
+
+    ns_test = 0;
+
+    return;
+  }
+
+  // Read covariates before the genotype files.
+  if (!file_cvt.empty()) {
+    if (ReadFile_cvt(file_cvt, indicator_cvt, cvt, n_cvt) == false) {
+      error = true;
+    }
+    if ((indicator_cvt).size() == 0) {
+      n_cvt = 1;
+    }
+  } else {
+    n_cvt = 1;
+  }
+
+  if (!file_gxe.empty()) {
+    if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) {
+      error = true;
+    }
+  }
+  if (!file_weight.empty()) {
+    if (ReadFile_column(file_weight, indicator_weight, weight, 1) == false) {
+      error = true;
+    }
+  }
+
+  // WJA added.
+  // Read genotype and phenotype file for bgen format.
+  if (!file_oxford.empty()) {
+    file_str = file_oxford + ".sample";
+    if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,
+                        indicator_cvt, cvt, n_cvt) == false) {
+      error = true;
+    }
+    if ((indicator_cvt).size() == 0) {
+      n_cvt = 1;
+    }
+
+    // Post-process covariates and phenotypes, obtain
+    // ni_test, save all useful covariates.
+    ProcessCvtPhen();
+
+    // Obtain covariate matrix.
+    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+    CopyCvt(W);
+
+    file_str = file_oxford + ".bgen";
+    if (ReadFile_bgen(file_str, setSnps, W, indicator_idv, indicator_snp,
+                      snpInfo, maf_level, miss_level, hwe_level, r2_level,
+                      ns_test) == false) {
+      error = true;
+    }
+    gsl_matrix_free(W);
+
+    ns_total = indicator_snp.size();
+  }
+
+  // Read genotype and phenotype file for PLINK format.
+  if (!file_bfile.empty()) {
+    file_str = file_bfile + ".bim";
+    snpInfo.clear();
+    if (ReadFile_bim(file_str, snpInfo) == false) {
+      error = true;
+    }
+
+    // If both fam file and pheno files are used, use
+    // phenotypes inside the pheno file.
+    if (!file_pheno.empty()) {
+
+      // Phenotype file before genotype file.
+      if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) ==
+          false) {
+        error = true;
+      }
+    } else {
+      file_str = file_bfile + ".fam";
+      if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, p_column) ==
+          false) {
+        error = true;
+      }
+    }
+
+    // Post-process covariates and phenotypes, obtain
+    // ni_test, save all useful covariates.
+    ProcessCvtPhen();
+
+    // Obtain covariate matrix.
+    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+    CopyCvt(W);
+
+    file_str = file_bfile + ".bed";
+    if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp,
+                     snpInfo, maf_level, miss_level, hwe_level, r2_level,
+                     ns_test) == false) {
+      error = true;
+    }
+    gsl_matrix_free(W);
+    ns_total = indicator_snp.size();
+  }
+
+  // Read genotype and phenotype file for BIMBAM format.
+  if (!file_geno.empty()) {
+
+    // Annotation file before genotype file.
+    if (!file_anno.empty()) {
+      if (ReadFile_anno(file_anno, mapRS2chr, mapRS2bp, mapRS2cM) == false) {
+        error = true;
+      }
+    }
+
+    // Phenotype file before genotype file.
+    if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+      error = true;
+    }
+
+    // Post-process covariates and phenotypes, obtain
+    // ni_test, save all useful covariates.
+    ProcessCvtPhen();
+
+    // Obtain covariate matrix.
+    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+    CopyCvt(W);
+
+    if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp,
+                      maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
+                      mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
+      error = true;
+    }
+    gsl_matrix_free(W);
+    ns_total = indicator_snp.size();
+  }
+
+  // Read genotype file for multiple PLINK files.
+  if (!file_mbfile.empty()) {
+    igzstream infile(file_mbfile.c_str(), igzstream::in);
+    if (!infile) {
+      cout << "error! fail to open mbfile file: " << file_mbfile << endl;
+      return;
+    }
+
+    string file_name;
+    size_t t = 0, ns_test_tmp = 0;
+    gsl_matrix *W;
+    while (!safeGetline(infile, file_name).eof()) {
+      file_str = file_name + ".bim";
+
+      if (ReadFile_bim(file_str, snpInfo) == false) {
+        error = true;
+      }
+
+      if (t == 0) {
+
+        // If both fam file and pheno files are used, use
+        // phenotypes inside the pheno file.
+        if (!file_pheno.empty()) {
+
+          // Phenotype file before genotype file.
+          if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) ==
+              false) {
+            error = true;
+          }
+        } else {
+          file_str = file_name + ".fam";
+          if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num,
+                           p_column) == false) {
+            error = true;
+          }
+        }
+
+        // Post-process covariates and phenotypes, obtain
+        // ni_test, save all useful covariates.
+        ProcessCvtPhen();
+
+        // Obtain covariate matrix.
+        W = gsl_matrix_alloc(ni_test, n_cvt);
+        CopyCvt(W);
+      }
+
+      file_str = file_name + ".bed";
+      if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp,
+                       snpInfo, maf_level, miss_level, hwe_level, r2_level,
+                       ns_test_tmp) == false) {
+        error = true;
+      }
+      mindicator_snp.push_back(indicator_snp);
+      msnpInfo.push_back(snpInfo);
+      ns_test += ns_test_tmp;
+      ns_total += indicator_snp.size();
+
+      t++;
+    }
+
+    gsl_matrix_free(W);
+
+    infile.close();
+    infile.clear();
+  }
+
+  // Read genotype and phenotype file for multiple BIMBAM files.
+  if (!file_mgeno.empty()) {
+
+    // Annotation file before genotype file.
+    if (!file_anno.empty()) {
+      if (ReadFile_anno(file_anno, mapRS2chr, mapRS2bp, mapRS2cM) == false) {
+        error = true;
+      }
+    }
+
+    // Phenotype file before genotype file.
+    if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+      error = true;
+    }
+
+    // Post-process covariates and phenotypes, obtain ni_test,
+    // save all useful covariates.
+    ProcessCvtPhen();
+
+    // Obtain covariate matrix.
+    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+    CopyCvt(W);
+
+    igzstream infile(file_mgeno.c_str(), igzstream::in);
+    if (!infile) {
+      cout << "error! fail to open mgeno file: " << file_mgeno << endl;
+      return;
+    }
+
+    string file_name;
+    size_t ns_test_tmp;
+    while (!safeGetline(infile, file_name).eof()) {
+      if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp,
+                        maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
+                        mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) {
+        error = true;
+      }
+
+      mindicator_snp.push_back(indicator_snp);
+      msnpInfo.push_back(snpInfo);
+      ns_test += ns_test_tmp;
+      ns_total += indicator_snp.size();
+    }
+
+    gsl_matrix_free(W);
+
+    infile.close();
+    infile.clear();
+  }
+
+  if (!file_gene.empty()) {
+    if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+      error = true;
+    }
+
+    // Convert indicator_pheno to indicator_idv.
+    int k = 1;
+    for (size_t i = 0; i < indicator_pheno.size(); i++) {
+      k = 1;
+      for (size_t j = 0; j < indicator_pheno[i].size(); j++) {
+        if (indicator_pheno[i][j] == 0) {
+          k = 0;
+        }
+      }
+      indicator_idv.push_back(k);
+    }
+
+    // Post-process covariates and phenotypes, obtain
+    // ni_test, save all useful covariates.
+    ProcessCvtPhen();
+
+    // Obtain covariate matrix.
+    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+    CopyCvt(W);
+
+    if (ReadFile_gene(file_gene, vec_read, snpInfo, ng_total) == false) {
+      error = true;
+    }
+  }
+
+  // Read is after gene file.
+  if (!file_read.empty()) {
+    if (ReadFile_column(file_read, indicator_read, vec_read, 1) == false) {
+      error = true;
+    }
+
+    ni_test = 0;
+    for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+      indicator_idv[i] *= indicator_read[i];
+      ni_test += indicator_idv[i];
+    }
+
+    if (ni_test == 0) {
+      error = true;
+      cout << "error! number of analyzed individuals equals 0. " << endl;
+      return;
+    }
+  }
+
+  // For ridge prediction, read phenotype only.
+  if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) {
+    if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+      error = true;
+    }
+
+    // Post-process covariates and phenotypes, obtain
+    // ni_test, save all useful covariates.
+    ProcessCvtPhen();
+  }
+  return;
 }
 
-void PARAM::CheckParam (void) {
-	struct stat fileInfo;
-	string str;
-
-	// Check parameters.
-	if (k_mode!=1 && k_mode!=2) {
-	  cout<<"error! unknown kinship/relatedness input mode: "<<
-	    k_mode<<endl;
-	  error=true;
-	}
-	if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5
-	    && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 &&
-	    a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 &&
-	    a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 &&
-	    a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 &&
-	    a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 &&
-	    a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 &&
-	    a_mode!=71) {
-	  cout<<"error! unknown analysis mode: "<<a_mode<<
-	    ". make sure -gk or -eigen or -lmm or -bslmm -predict or " <<
-	    "-calccov is sepcified correctly."<<endl;
-	  error=true;
-	}
-	if (miss_level>1) {
-	  cout<<"error! missing level needs to be between 0 and 1. " <<
-	    "current value = "<<miss_level<<endl;
-	  error=true;
-	}
-	if (maf_level>0.5) {
-	  cout<<"error! maf level needs to be between 0 and 0.5. " <<
-	    "current value = "<<maf_level<<endl;
-	  error=true;
-	}
-	if (hwe_level>1) {
-	  cout<<"error! hwe level needs to be between 0 and 1. " <<
-	    "current value = "<<hwe_level<<endl;
-	  error=true;
-	}
-	if (r2_level>1) {
-	  cout<<"error! r2 level needs to be between 0 and 1. " <<
-	    "current value = "<<r2_level<<endl;
-	  error=true;
-	}
-
-	if (l_max<l_min) {
-	  cout<<"error! maximum lambda value must be larger than the " <<
-	    "minimal value. current values = "<<l_max<<" and "<<l_min<<endl;
-	  error=true;
-	}
-	if (h_max<h_min) {
-	  cout<<"error! maximum h value must be larger than the minimal "<<
-	    "value. current values = "<<h_max<<" and "<<h_min<<endl;
-	  error=true;
-	}
-	if (s_max<s_min) {
-	  cout<<"error! maximum s value must be larger than the minimal "<<
-	    "value. current values = "<<s_max<<" and "<<s_min<<endl;
-	  error=true;
-	}
-	if (rho_max<rho_min) {
-	  cout<<"error! maximum rho value must be larger than the"<<
-	    "minimal value. current values = "<<rho_max<<" and "<<
-	    rho_min<<endl;
-	  error=true;
-	}
-	if (logp_max<logp_min) {
-	  cout<<"error! maximum logp value must be larger than the "<<
-	    "minimal value. current values = "<<logp_max/log(10)<<
-	    " and "<<logp_min/log(10)<<endl;
-	  error=true;
-	}
-
-	if (h_max>1) {
-	  cout<<"error! h values must be bewtween 0 and 1. current "<<
-	    "values = "<<h_max<<" and "<<h_min<<endl;
-	  error=true;
-	}
-	if (rho_max>1) {
-	  cout<<"error! rho values must be between 0 and 1. current "<<
-	    "values = "<<rho_max<<" and "<<rho_min<<endl;
-	  error=true;
-	}
-	if (logp_max>0) {
-	  cout<<"error! maximum logp value must be smaller than 0. "<<
-	    "current values = "<<logp_max/log(10)<<" and "<<
-	    logp_min/log(10)<<endl;
-	  error=true;
-	}
-	if (l_max<l_min) {
-	  cout<<"error! maximum lambda value must be larger than the "<<
-	    "minimal value. current values = "<<l_max<<" and "<<l_min<<endl;
-	  error=true;
-	}
-
-	if (h_scale>1.0) {
-	  cout<<"error! hscale value must be between 0 and 1. "<<
-	    "current value = "<<h_scale<<endl;
-	  error=true;
-	}
-	if (rho_scale>1.0) {
-	  cout<<"error! rscale value must be between 0 and 1. "<<
-	    "current value = "<<rho_scale<<endl;
-	  error=true;
-	}
-	if (logp_scale>1.0) {
-	  cout<<"error! pscale value must be between 0 and 1. "<<
-	    "current value = "<<logp_scale<<endl;
-	  error=true;
-	}
-
-	if (rho_max==1 && rho_min==1 && a_mode==12) {
-	  cout<<"error! ridge regression does not support a rho "<<
-	    "parameter. current values = "<<rho_max<<" and "<<rho_min<<endl;
-	  error=true;
-	}
-
-	if (window_cm<0) {
-	  cout<<"error! windowcm values must be non-negative. "<<
-	    "current values = "<<window_cm<<endl;
-	  error=true;
-	}
-
-	if (window_cm==0 && window_bp==0 && window_ns==0) {
-	  window_bp=1000000;
-	}
-
-	// Check p_column, and (no need to) sort p_column into
-	// ascending order.
-	if (p_column.size()==0) {
-		p_column.push_back(1);
-	} else {
-		for (size_t i=0; i<p_column.size(); i++) {
-			for (size_t j=0; j<i; j++) {
-				if (p_column[i]==p_column[j]) {
-				  cout<<"error! identical phenotype "<<
-				    "columns: "<<p_column[i]<<endl;
-				  error=
-				    true;}
-			}
-		}
-	}
-
-	n_ph=p_column.size();
-
-	// Only LMM option (and one prediction option) can deal with
-	// multiple phenotypes and no gene expression files.
-	if (n_ph>1 && a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 &&
-	    a_mode!=43) {
-		cout<<"error! the current analysis mode "<<a_mode<<
-		  " can not deal with multiple phenotypes."<<endl;
-		error=true;
-	}
-	if (n_ph>1 && !file_gene.empty() ) {
-	  cout<<"error! multiple phenotype analysis option not "<<
-	    "allowed with gene expression files. "<<endl;
-	  error=true;
-	}
-
-	if (p_nr>1) {
-	  cout<<"error! pnr value must be between 0 and 1. current value = "<<
-	    p_nr<<endl;
-	  error=true;
-	}
-
-	//check est_column
-	if (est_column.size()==0) {
-		if (file_ebv.empty()) {
-			est_column.push_back(2);
-			est_column.push_back(5);
-			est_column.push_back(6);
-			est_column.push_back(7);
-		} else {
-			est_column.push_back(2);
-			est_column.push_back(0);
-			est_column.push_back(6);
-			est_column.push_back(7);
-		}
-	}
-
-	if (est_column.size()!=4) {
-	  cout<<"error! -en not followed by four numbers. current number = "<<
-	    est_column.size()<<endl;
-	  error=true;
-	}
-	if (est_column[0]==0) {
-	  cout<<"error! -en rs column can not be zero. current number = "<<
-	    est_column.size()<<endl;
-	  error=true;
-	}
-
-	// Check if files are compatible with each other, and if files exist.
-	if (!file_bfile.empty()) {
-		str=file_bfile+".bim";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .bim file: "<<str<<endl;
-		  error=true;
-		}
-		str=file_bfile+".bed";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .bed file: "<<str<<endl;
-		  error=true;
-		}
-		str=file_bfile+".fam";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .fam file: "<<str<<endl;
-		  error=true;
-		}
-	}
-
-	if (!file_oxford.empty()) {
-		str=file_oxford+".bgen";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .bgen file: "<<str<<endl;
-		  error=true;
-		}
-		str=file_oxford+".sample";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .sample file: "<<str<<endl;
-		  error=true;
-		}
-	}
-
-	if ((!file_geno.empty() || !file_gene.empty()) ) {
-		str=file_pheno;
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open phenotype file: "<<str<<endl;
-		  error=true;
-		}
-	}
-
-	str=file_geno;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open mean genotype file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_gene;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open gene expression file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_cat;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open category file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_mcat;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open mcategory file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_beta;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open beta file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_cor;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open correlation file: "<<str<<endl;
-	  error=true;
-	}
-
-	if (!file_study.empty()) {
-	  str=file_study+".Vq.txt";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .Vq.txt file: "<<str<<endl;
-		  error=true;
-		}
-		str=file_study+".q.txt";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .q.txt file: "<<str<<endl;
-		  error=true;
-		}
-		str=file_study+".size.txt";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .size.txt file: "<<str<<endl;
-		  error=true;
-		}
-	}
-
-	if (!file_ref.empty()) {
-		str=file_ref+".S.txt";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .S.txt file: "<<str<<endl;
-		  error=true;
-		}
-		str=file_ref+".size.txt";
-		if (stat(str.c_str(),&fileInfo)==-1) {
-		  cout<<"error! fail to open .size.txt file: "<<str<<endl;
-		  error=true;
-		}
-	}
-
-	str=file_mstudy;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open mstudy file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_mref;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open mref file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_mgeno;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open mgeno file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_mbfile;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open mbfile file: "<<str<<endl;
-	  error=true;
-	}
-
-	size_t flag=0;
-	if (!file_bfile.empty()) {flag++;}
-	if (!file_geno.empty()) {flag++;}
-	if (!file_gene.empty()) {flag++;}
-
-	// WJA added.
-	if (!file_oxford.empty()) {flag++;}
-
-	if (flag!=1 && a_mode!=15 && a_mode!=27 && a_mode!=28 &&
-	    a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62 &&
-	    a_mode!=63 && a_mode!=66 && a_mode!=67) {
-	  cout<<"error! either plink binary files, or bimbam mean"<<
-	    "genotype files, or gene expression files are required."<<endl;
-	  error=true;
-	}
-
-	if (file_pheno.empty() && (a_mode==43 || a_mode==5) ) {
-		cout<<"error! phenotype file is required."<<endl; error=true;
-	}
-
-	if (a_mode==61 || a_mode==62) {
-	  if (!file_beta.empty()) {
-	    if ( file_mbfile.empty() && file_bfile.empty() &&
-		 file_mgeno.empty() && file_geno.empty() &&
-		 file_mref.empty() && file_ref.empty() ) {
-	      cout<<"error! missing genotype file or ref/mref file."<<endl;
-	      error=true;
-	    }
-	  } else if (!file_pheno.empty()) {
-	    if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) &&
-		file_mk.empty() ) {
-	      cout<<"error! missing relatedness file. "<<endl;  error=true;
-	    }
-	  } else if ( (file_mstudy.empty() && file_study.empty()) ||
-		      (file_mref.empty() && file_ref.empty() )  ) {
-	    cout<<"error! either beta file, or phenotype files or "<<
-	      "study/ref mstudy/mref files are required."<<endl;
-	    error=true;
-	  }
-	}
-
-
-	if (a_mode==63) {
-	  if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) &&
-	      file_mk.empty() ) {
-	    cout<<"error! missing relatedness file. "<<endl; error=true;
-	  }
-	  if ( file_pheno.empty() ) {
-	    cout<<"error! missing phenotype file."<<endl; error=true;
-	  }
-	}
-
-	if (a_mode==66 || a_mode==67) {
-	  if (file_beta.empty() ||
-	      (file_mbfile.empty() && file_bfile.empty() &&
-	       file_mgeno.empty() && file_geno.empty()) ) {
-	    cout<<"error! missing beta file or genotype file."<<endl;
-	    error=true;
-	  }
-	}
-
-
-	if (!file_epm.empty() && file_bfile.empty() && file_geno.empty()) {
-	  cout<<"error! estimated parameter file also requires genotype "<<
-	    "file."<<endl;
-	  error=true;
-	}
-	if (!file_ebv.empty() && file_kin.empty()) {
-	  cout<<"error! estimated breeding value file also requires "<<
-	    "relatedness file."<<endl;
-	  error=true;
-	}
-
-	if (!file_log.empty() && pheno_mean!=0) {
-	  cout<<"error! either log file or mu value can be provide."<<endl;
-	  error=true;
-	}
-
-	str=file_snps;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open snps file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_log;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open log file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_anno;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open annotation file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_kin;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open relatedness matrix file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_mk;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open relatedness matrix file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_cvt;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open covariates file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_gxe;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open environmental covariate file: "<<
-	    str<<endl;
-	  error=true;
-	}
-
-	str=file_weight;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open the residual weight file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_epm;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open estimated parameter file: "<<str<<endl;
-	  error=true;
-	}
-
-	str=file_ebv;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open estimated breeding value file: "<<
-	    str<<endl;
-	  error=true;
-	}
-
-	str=file_read;
-	if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
-	  cout<<"error! fail to open total read file: "<<str<<endl;
-	  error=true;
-	}
-
-	// Check if files are compatible with analysis mode.
-	if (k_mode==2 && !file_geno.empty() ) {
-	  cout<<"error! use \"-km 1\" when using bimbam mean genotype "<<
-	    "file. "<<endl;
-	  error=true;
-	}
-
-	if ((a_mode==1 || a_mode==2 || a_mode==3 || a_mode==4 ||
-	     a_mode==5 || a_mode==31) &&
-	    (file_kin.empty() && (file_ku.empty()||file_kd.empty()))) {
-	  cout<<"error! missing relatedness file. "<<endl;
-	  error=true;
-	}
-
-	if ((a_mode==43) && file_kin.empty()) {
-	  cout<<"error! missing relatedness file. -predict option requires "<<
-	    "-k option to provide a relatedness file."<<endl;
-	  error=true;
-	}
-
-	if ((a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14 ||
-	     a_mode==16) && !file_cvt.empty()) {
-	  cout<<"error! -bslmm option does not support covariates files."<<
-	    endl;
-	  error=true;
-	}
-
-	if (a_mode==41 || a_mode==42) {
-		if (!file_cvt.empty() ) {
-		  cout<<"error! -predict option does not support "<<
-		    "covariates files."<<endl;
-		  error=true;
-		}
-		if (file_epm.empty() ) {
-		  cout<<"error! -predict option requires estimated "<<
-		    "parameter files."<<endl;
-		  error=true;
-		}
-	}
-
-	if (file_beta.empty() && (a_mode==27 || a_mode==28) ) {
-		cout<<"error! beta effects file is required."<<endl;
-		error=true;
-	}
-
-	return;
+void PARAM::CheckParam(void) {
+  struct stat fileInfo;
+  string str;
+
+  // Check parameters.
+  if (k_mode != 1 && k_mode != 2) {
+    cout << "error! unknown kinship/relatedness input mode: " << k_mode << endl;
+    error = true;
+  }
+  if (a_mode != 1 && a_mode != 2 && a_mode != 3 && a_mode != 4 && a_mode != 5 &&
+      a_mode != 11 && a_mode != 12 && a_mode != 13 && a_mode != 14 &&
+      a_mode != 15 && a_mode != 21 && a_mode != 22 && a_mode != 25 &&
+      a_mode != 26 && a_mode != 27 && a_mode != 28 && a_mode != 31 &&
+      a_mode != 41 && a_mode != 42 && a_mode != 43 && a_mode != 51 &&
+      a_mode != 52 && a_mode != 53 && a_mode != 54 && a_mode != 61 &&
+      a_mode != 62 && a_mode != 63 && a_mode != 66 && a_mode != 67 &&
+      a_mode != 71) {
+    cout << "error! unknown analysis mode: " << a_mode
+         << ". make sure -gk or -eigen or -lmm or -bslmm -predict or "
+         << "-calccov is sepcified correctly." << endl;
+    error = true;
+  }
+  if (miss_level > 1) {
+    cout << "error! missing level needs to be between 0 and 1. "
+         << "current value = " << miss_level << endl;
+    error = true;
+  }
+  if (maf_level > 0.5) {
+    cout << "error! maf level needs to be between 0 and 0.5. "
+         << "current value = " << maf_level << endl;
+    error = true;
+  }
+  if (hwe_level > 1) {
+    cout << "error! hwe level needs to be between 0 and 1. "
+         << "current value = " << hwe_level << endl;
+    error = true;
+  }
+  if (r2_level > 1) {
+    cout << "error! r2 level needs to be between 0 and 1. "
+         << "current value = " << r2_level << endl;
+    error = true;
+  }
+
+  if (l_max < l_min) {
+    cout << "error! maximum lambda value must be larger than the "
+         << "minimal value. current values = " << l_max << " and " << l_min
+         << endl;
+    error = true;
+  }
+  if (h_max < h_min) {
+    cout << "error! maximum h value must be larger than the minimal "
+         << "value. current values = " << h_max << " and " << h_min << endl;
+    error = true;
+  }
+  if (s_max < s_min) {
+    cout << "error! maximum s value must be larger than the minimal "
+         << "value. current values = " << s_max << " and " << s_min << endl;
+    error = true;
+  }
+  if (rho_max < rho_min) {
+    cout << "error! maximum rho value must be larger than the"
+         << "minimal value. current values = " << rho_max << " and " << rho_min
+         << endl;
+    error = true;
+  }
+  if (logp_max < logp_min) {
+    cout << "error! maximum logp value must be larger than the "
+         << "minimal value. current values = " << logp_max / log(10) << " and "
+         << logp_min / log(10) << endl;
+    error = true;
+  }
+
+  if (h_max > 1) {
+    cout << "error! h values must be bewtween 0 and 1. current "
+         << "values = " << h_max << " and " << h_min << endl;
+    error = true;
+  }
+  if (rho_max > 1) {
+    cout << "error! rho values must be between 0 and 1. current "
+         << "values = " << rho_max << " and " << rho_min << endl;
+    error = true;
+  }
+  if (logp_max > 0) {
+    cout << "error! maximum logp value must be smaller than 0. "
+         << "current values = " << logp_max / log(10) << " and "
+         << logp_min / log(10) << endl;
+    error = true;
+  }
+  if (l_max < l_min) {
+    cout << "error! maximum lambda value must be larger than the "
+         << "minimal value. current values = " << l_max << " and " << l_min
+         << endl;
+    error = true;
+  }
+
+  if (h_scale > 1.0) {
+    cout << "error! hscale value must be between 0 and 1. "
+         << "current value = " << h_scale << endl;
+    error = true;
+  }
+  if (rho_scale > 1.0) {
+    cout << "error! rscale value must be between 0 and 1. "
+         << "current value = " << rho_scale << endl;
+    error = true;
+  }
+  if (logp_scale > 1.0) {
+    cout << "error! pscale value must be between 0 and 1. "
+         << "current value = " << logp_scale << endl;
+    error = true;
+  }
+
+  if (rho_max == 1 && rho_min == 1 && a_mode == 12) {
+    cout << "error! ridge regression does not support a rho "
+         << "parameter. current values = " << rho_max << " and " << rho_min
+         << endl;
+    error = true;
+  }
+
+  if (window_cm < 0) {
+    cout << "error! windowcm values must be non-negative. "
+         << "current values = " << window_cm << endl;
+    error = true;
+  }
+
+  if (window_cm == 0 && window_bp == 0 && window_ns == 0) {
+    window_bp = 1000000;
+  }
+
+  // Check p_column, and (no need to) sort p_column into
+  // ascending order.
+  if (p_column.size() == 0) {
+    p_column.push_back(1);
+  } else {
+    for (size_t i = 0; i < p_column.size(); i++) {
+      for (size_t j = 0; j < i; j++) {
+        if (p_column[i] == p_column[j]) {
+          cout << "error! identical phenotype "
+               << "columns: " << p_column[i] << endl;
+          error = true;
+        }
+      }
+    }
+  }
+
+  n_ph = p_column.size();
+
+  // Only LMM option (and one prediction option) can deal with
+  // multiple phenotypes and no gene expression files.
+  if (n_ph > 1 && a_mode != 1 && a_mode != 2 && a_mode != 3 && a_mode != 4 &&
+      a_mode != 43) {
+    cout << "error! the current analysis mode " << a_mode
+         << " can not deal with multiple phenotypes." << endl;
+    error = true;
+  }
+  if (n_ph > 1 && !file_gene.empty()) {
+    cout << "error! multiple phenotype analysis option not "
+         << "allowed with gene expression files. " << endl;
+    error = true;
+  }
+
+  if (p_nr > 1) {
+    cout << "error! pnr value must be between 0 and 1. current value = " << p_nr
+         << endl;
+    error = true;
+  }
+
+  // check est_column
+  if (est_column.size() == 0) {
+    if (file_ebv.empty()) {
+      est_column.push_back(2);
+      est_column.push_back(5);
+      est_column.push_back(6);
+      est_column.push_back(7);
+    } else {
+      est_column.push_back(2);
+      est_column.push_back(0);
+      est_column.push_back(6);
+      est_column.push_back(7);
+    }
+  }
+
+  if (est_column.size() != 4) {
+    cout << "error! -en not followed by four numbers. current number = "
+         << est_column.size() << endl;
+    error = true;
+  }
+  if (est_column[0] == 0) {
+    cout << "error! -en rs column can not be zero. current number = "
+         << est_column.size() << endl;
+    error = true;
+  }
+
+  // Check if files are compatible with each other, and if files exist.
+  if (!file_bfile.empty()) {
+    str = file_bfile + ".bim";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .bim file: " << str << endl;
+      error = true;
+    }
+    str = file_bfile + ".bed";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .bed file: " << str << endl;
+      error = true;
+    }
+    str = file_bfile + ".fam";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .fam file: " << str << endl;
+      error = true;
+    }
+  }
+
+  if (!file_oxford.empty()) {
+    str = file_oxford + ".bgen";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .bgen file: " << str << endl;
+      error = true;
+    }
+    str = file_oxford + ".sample";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .sample file: " << str << endl;
+      error = true;
+    }
+  }
+
+  if ((!file_geno.empty() || !file_gene.empty())) {
+    str = file_pheno;
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open phenotype file: " << str << endl;
+      error = true;
+    }
+  }
+
+  str = file_geno;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open mean genotype file: " << str << endl;
+    error = true;
+  }
+
+  str = file_gene;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open gene expression file: " << str << endl;
+    error = true;
+  }
+
+  str = file_cat;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open category file: " << str << endl;
+    error = true;
+  }
+
+  str = file_mcat;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open mcategory file: " << str << endl;
+    error = true;
+  }
+
+  str = file_beta;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open beta file: " << str << endl;
+    error = true;
+  }
+
+  str = file_cor;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open correlation file: " << str << endl;
+    error = true;
+  }
+
+  if (!file_study.empty()) {
+    str = file_study + ".Vq.txt";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .Vq.txt file: " << str << endl;
+      error = true;
+    }
+    str = file_study + ".q.txt";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .q.txt file: " << str << endl;
+      error = true;
+    }
+    str = file_study + ".size.txt";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .size.txt file: " << str << endl;
+      error = true;
+    }
+  }
+
+  if (!file_ref.empty()) {
+    str = file_ref + ".S.txt";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .S.txt file: " << str << endl;
+      error = true;
+    }
+    str = file_ref + ".size.txt";
+    if (stat(str.c_str(), &fileInfo) == -1) {
+      cout << "error! fail to open .size.txt file: " << str << endl;
+      error = true;
+    }
+  }
+
+  str = file_mstudy;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open mstudy file: " << str << endl;
+    error = true;
+  }
+
+  str = file_mref;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open mref file: " << str << endl;
+    error = true;
+  }
+
+  str = file_mgeno;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open mgeno file: " << str << endl;
+    error = true;
+  }
+
+  str = file_mbfile;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open mbfile file: " << str << endl;
+    error = true;
+  }
+
+  size_t flag = 0;
+  if (!file_bfile.empty()) {
+    flag++;
+  }
+  if (!file_geno.empty()) {
+    flag++;
+  }
+  if (!file_gene.empty()) {
+    flag++;
+  }
+
+  // WJA added.
+  if (!file_oxford.empty()) {
+    flag++;
+  }
+
+  if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 &&
+      a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 &&
+      a_mode != 63 && a_mode != 66 && a_mode != 67) {
+    cout << "error! either plink binary files, or bimbam mean"
+         << "genotype files, or gene expression files are required." << endl;
+    error = true;
+  }
+
+  if (file_pheno.empty() && (a_mode == 43 || a_mode == 5)) {
+    cout << "error! phenotype file is required." << endl;
+    error = true;
+  }
+
+  if (a_mode == 61 || a_mode == 62) {
+    if (!file_beta.empty()) {
+      if (file_mbfile.empty() && file_bfile.empty() && file_mgeno.empty() &&
+          file_geno.empty() && file_mref.empty() && file_ref.empty()) {
+        cout << "error! missing genotype file or ref/mref file." << endl;
+        error = true;
+      }
+    } else if (!file_pheno.empty()) {
+      if (file_kin.empty() && (file_ku.empty() || file_kd.empty()) &&
+          file_mk.empty()) {
+        cout << "error! missing relatedness file. " << endl;
+        error = true;
+      }
+    } else if ((file_mstudy.empty() && file_study.empty()) ||
+               (file_mref.empty() && file_ref.empty())) {
+      cout << "error! either beta file, or phenotype files or "
+           << "study/ref mstudy/mref files are required." << endl;
+      error = true;
+    }
+  }
+
+  if (a_mode == 63) {
+    if (file_kin.empty() && (file_ku.empty() || file_kd.empty()) &&
+        file_mk.empty()) {
+      cout << "error! missing relatedness file. " << endl;
+      error = true;
+    }
+    if (file_pheno.empty()) {
+      cout << "error! missing phenotype file." << endl;
+      error = true;
+    }
+  }
+
+  if (a_mode == 66 || a_mode == 67) {
+    if (file_beta.empty() || (file_mbfile.empty() && file_bfile.empty() &&
+                              file_mgeno.empty() && file_geno.empty())) {
+      cout << "error! missing beta file or genotype file." << endl;
+      error = true;
+    }
+  }
+
+  if (!file_epm.empty() && file_bfile.empty() && file_geno.empty()) {
+    cout << "error! estimated parameter file also requires genotype "
+         << "file." << endl;
+    error = true;
+  }
+  if (!file_ebv.empty() && file_kin.empty()) {
+    cout << "error! estimated breeding value file also requires "
+         << "relatedness file." << endl;
+    error = true;
+  }
+
+  if (!file_log.empty() && pheno_mean != 0) {
+    cout << "error! either log file or mu value can be provide." << endl;
+    error = true;
+  }
+
+  str = file_snps;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open snps file: " << str << endl;
+    error = true;
+  }
+
+  str = file_log;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open log file: " << str << endl;
+    error = true;
+  }
+
+  str = file_anno;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open annotation file: " << str << endl;
+    error = true;
+  }
+
+  str = file_kin;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open relatedness matrix file: " << str << endl;
+    error = true;
+  }
+
+  str = file_mk;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open relatedness matrix file: " << str << endl;
+    error = true;
+  }
+
+  str = file_cvt;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open covariates file: " << str << endl;
+    error = true;
+  }
+
+  str = file_gxe;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open environmental covariate file: " << str << endl;
+    error = true;
+  }
+
+  str = file_weight;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open the residual weight file: " << str << endl;
+    error = true;
+  }
+
+  str = file_epm;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open estimated parameter file: " << str << endl;
+    error = true;
+  }
+
+  str = file_ebv;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open estimated breeding value file: " << str
+         << endl;
+    error = true;
+  }
+
+  str = file_read;
+  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+    cout << "error! fail to open total read file: " << str << endl;
+    error = true;
+  }
+
+  // Check if files are compatible with analysis mode.
+  if (k_mode == 2 && !file_geno.empty()) {
+    cout << "error! use \"-km 1\" when using bimbam mean genotype "
+         << "file. " << endl;
+    error = true;
+  }
+
+  if ((a_mode == 1 || a_mode == 2 || a_mode == 3 || a_mode == 4 ||
+       a_mode == 5 || a_mode == 31) &&
+      (file_kin.empty() && (file_ku.empty() || file_kd.empty()))) {
+    cout << "error! missing relatedness file. " << endl;
+    error = true;
+  }
+
+  if ((a_mode == 43) && file_kin.empty()) {
+    cout << "error! missing relatedness file. -predict option requires "
+         << "-k option to provide a relatedness file." << endl;
+    error = true;
+  }
+
+  if ((a_mode == 11 || a_mode == 12 || a_mode == 13 || a_mode == 14 ||
+       a_mode == 16) &&
+      !file_cvt.empty()) {
+    cout << "error! -bslmm option does not support covariates files." << endl;
+    error = true;
+  }
+
+  if (a_mode == 41 || a_mode == 42) {
+    if (!file_cvt.empty()) {
+      cout << "error! -predict option does not support "
+           << "covariates files." << endl;
+      error = true;
+    }
+    if (file_epm.empty()) {
+      cout << "error! -predict option requires estimated "
+           << "parameter files." << endl;
+      error = true;
+    }
+  }
+
+  if (file_beta.empty() && (a_mode == 27 || a_mode == 28)) {
+    cout << "error! beta effects file is required." << endl;
+    error = true;
+  }
+
+  return;
 }
 
-void PARAM::CheckData (void) {
+void PARAM::CheckData(void) {
 
   // WJA NOTE: I added this condition so that covariates can be added
   // through sample, probably not exactly what is wanted.
-  if(file_oxford.empty())
-	{
-	  if ((file_cvt).empty() || (indicator_cvt).size()==0) {
-	    n_cvt=1;
-	  }
-	}
-
-  if ( (a_mode==66 || a_mode==67) && (v_pve.size()!=n_vc))  {
-    cout<<"error! the number of pve estimates does not equal to "<<
-      "the number of categories in the cat file:"<<v_pve.size()<<" "<<
-      n_vc<<endl;
-    error=true;
-  }
-
-  if ( (indicator_cvt).size()!=0 &&
-       (indicator_cvt).size()!=(indicator_idv).size()) {
-    error=true;
-    cout << "error! number of rows in the covariates file do not "<<
-      "match the number of individuals. "<<endl;
+  if (file_oxford.empty()) {
+    if ((file_cvt).empty() || (indicator_cvt).size() == 0) {
+      n_cvt = 1;
+    }
+  }
+
+  if ((a_mode == 66 || a_mode == 67) && (v_pve.size() != n_vc)) {
+    cout << "error! the number of pve estimates does not equal to "
+         << "the number of categories in the cat file:" << v_pve.size() << " "
+         << n_vc << endl;
+    error = true;
+  }
+
+  if ((indicator_cvt).size() != 0 &&
+      (indicator_cvt).size() != (indicator_idv).size()) {
+    error = true;
+    cout << "error! number of rows in the covariates file do not "
+         << "match the number of individuals. " << endl;
     return;
   }
-  if ( (indicator_gxe).size()!=0 && (indicator_gxe).size() !=
-       (indicator_idv).size()) {
-    error=true;
-    cout<<"error! number of rows in the gxe file do not match the number "<<
-      "of individuals. "<<endl;
+  if ((indicator_gxe).size() != 0 &&
+      (indicator_gxe).size() != (indicator_idv).size()) {
+    error = true;
+    cout << "error! number of rows in the gxe file do not match the number "
+         << "of individuals. " << endl;
     return;
   }
-  if ( (indicator_weight).size()!=0 &&
-       (indicator_weight).size()!=(indicator_idv).size()) {
-    error=true;
-    cout<<"error! number of rows in the weight file do not match "<<
-      "the number of individuals. "<<endl;
+  if ((indicator_weight).size() != 0 &&
+      (indicator_weight).size() != (indicator_idv).size()) {
+    error = true;
+    cout << "error! number of rows in the weight file do not match "
+         << "the number of individuals. " << endl;
     return;
   }
 
-  if ( (indicator_read).size()!=0 &&
-       (indicator_read).size()!=(indicator_idv).size()) {
-    error=true;
-    cout<<"error! number of rows in the total read file do not "<<
-      "match the number of individuals. "<<endl;
+  if ((indicator_read).size() != 0 &&
+      (indicator_read).size() != (indicator_idv).size()) {
+    error = true;
+    cout << "error! number of rows in the total read file do not "
+         << "match the number of individuals. " << endl;
     return;
   }
 
-	// Calculate ni_total and ni_test, and set indicator_idv to 0
-	// whenever indicator_cvt=0, and calculate np_obs and np_miss.
-	ni_total=(indicator_idv).size();
-
-	ni_test=0;
-	for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
-		if (indicator_idv[i]==0) {continue;}
-		ni_test++;
-	}
-
-	ni_cvt=0;
-	for (size_t i=0; i<indicator_cvt.size(); i++) {
-		if (indicator_cvt[i]==0) {continue;}
-		ni_cvt++;
-	}
-
-	np_obs=0; np_miss=0;
-	for (size_t i=0; i<indicator_pheno.size(); i++) {
-		if (indicator_cvt.size()!=0) {
-			if (indicator_cvt[i]==0) {continue;}
-		}
-
-		if (indicator_gxe.size()!=0) {
-			if (indicator_gxe[i]==0) {continue;}
-		}
-
-		if (indicator_weight.size()!=0) {
-			if (indicator_weight[i]==0) {continue;}
-		}
-
-		for (size_t j=0; j<indicator_pheno[i].size(); j++) {
-			if (indicator_pheno[i][j]==0) {
-				np_miss++;
-			} else {
-				np_obs++;
-			}
-		}
-	}
-
-	if (ni_test==0 && file_cor.empty() && file_mstudy.empty() &&
-	    file_study.empty() && file_beta.empty() && file_bf.empty() ) {
-		error=true;
-		cout<<"error! number of analyzed individuals equals 0. "<<endl;
-		return;
-	}
-
-	if (a_mode==43) {
-		if (ni_cvt==ni_test) {
-			error=true;
-			cout<<"error! no individual has missing "<<
-			  "phenotypes."<<endl;
-			return;
-		}
-		if ((np_obs+np_miss)!=(ni_cvt*n_ph)) {
-			error=true;
-			cout<<"error! number of phenotypes do not match the "<<
-			  "summation of missing and observed phenotypes."<<
-			  endl;
-			return;
-		}
-	}
-
-	// Output some information.
-	if (file_cor.empty() && file_mstudy.empty() && file_study.empty() &&
-	    a_mode!=15 && a_mode!=27 && a_mode!=28) {
-	  cout<<"## number of total individuals = "<<ni_total<<endl;
-	  if (a_mode==43) {
-	    cout<<"## number of analyzed individuals = "<<ni_cvt<<endl;
-	    cout<<"## number of individuals with full phenotypes = "<<
-	      ni_test<<endl;
-	  } else {
-	    cout<<"## number of analyzed individuals = "<<ni_test<<endl;
-	  }
-	  cout<<"## number of covariates = "<<n_cvt<<endl;
-	  cout<<"## number of phenotypes = "<<n_ph<<endl;
-	  if (a_mode==43) {
-	    cout<<"## number of observed data = "<<np_obs<<endl;
-	    cout<<"## number of missing data = "<<np_miss<<endl;
-	  }
-	  if (!file_gene.empty()) {
-	    cout<<"## number of total genes = "<<ng_total<<endl;
-	  } else if (file_epm.empty() && a_mode!=43 && a_mode!=5) {
-	    cout<<"## number of total SNPs = "<<ns_total<<endl;
-	    cout<<"## number of analyzed SNPs = "<<ns_test<<endl;
-	  } else {}
-	}
-
-	// Set d_pace to 1000 for gene expression.
-	if (!file_gene.empty() && d_pace==100000) {
-		d_pace=1000;
-	}
-
-	// For case-control studies, count # cases and # controls.
-	int flag_cc=0;
-	if (a_mode==13) {
-		ni_case=0;
-		ni_control=0;
-		for (size_t i=0; i<indicator_idv.size(); i++) {
-			if (indicator_idv[i]==0) {continue;}
-
-			if (pheno[i][0]==0) {ni_control++;}
-			else if (pheno[i][0]==1) {ni_case++;}
-			else {flag_cc=1;}
-		}
-		cout<<"## number of cases = "<<ni_case<<endl;
-		cout<<"## number of controls = "<<ni_control<<endl;
-	}
-
-	if (flag_cc==1) {cout<<"Unexpected non-binary phenotypes for "<<
-	    "case/control analysis. Use default (BSLMM) analysis instead."<<
-	    endl;
-	  a_mode=11;
-	}
-
-	// Set parameters for BSLMM and check for predict.
-	if (a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14) {
-		if (a_mode==11) {
-		  n_mh=1;
-		}
-		if (logp_min==0) {
-		  logp_min=-1.0*log((double)ns_test);
-		}
-
-		if (h_scale==-1) {
-		  h_scale=min(1.0, 10.0/sqrt((double)ni_test) );
-		}
-		if (rho_scale==-1) {
-		  rho_scale=min(1.0, 10.0/sqrt((double)ni_test) );
-		}
-		if (logp_scale==-1) {
-		  logp_scale=min(1.0, 5.0/sqrt((double)ni_test) );
-		}
-
-		if (h_min==-1) {h_min=0.0;}
-		if (h_max==-1) {h_max=1.0;}
-
-		if (s_max>ns_test) {
-		  s_max=ns_test;
-		  cout<<"s_max is re-set to the number of analyzed SNPs."<<
-		    endl;
-		}
-		if (s_max<s_min) {
-		  cout<<"error! maximum s value must be larger than the "<<
-		    "minimal value. current values = "<<s_max<<" and "<<
-		    s_min<<endl;
-		  error=true;
-		}
-	} else if (a_mode==41 || a_mode==42) {
-		if (indicator_bv.size()!=0) {
-		  if (indicator_idv.size()!=indicator_bv.size()) {
-		    cout<<"error! number of rows in the "<<
-		      "phenotype file does not match that in the "<<
-		      "estimated breeding value file: "<<
-		      indicator_idv.size()<<"\t"<<indicator_bv.size()<<
-		      endl;
-		    error=true;
-		  } else {
-		    size_t flag_bv=0;
-		    for (size_t i=0; i<(indicator_bv).size(); ++i) {
-		      if (indicator_idv[i]!=indicator_bv[i]) {flag_bv++;}
-		    }
-		    if (flag_bv!=0) {
-		      cout<<"error! individuals with missing value in the "<<
-			"phenotype file does not match that in the "<<
-			"estimated breeding value file: "<<flag_bv<<endl;
-		      error=true;
-		    }
-		  }
-		}
-	}
-
-	if (a_mode==62 && !file_beta.empty() && mapRS2wcat.size()==0) {
-	  cout<<"vc analysis with beta files requires -wcat file."<<endl;
-	  error=true;
-	}
-	if (a_mode==67 && mapRS2wcat.size()==0) {
-	  cout<<"ci analysis with beta files requires -wcat file."<<endl;
-	  error=true;
-	}
-
-	// File_mk needs to contain more than one line.
-	if (n_vc==1 && !file_mk.empty()) {
-	  cout<<"error! -mk file should contain more than one line."<<endl;
-	  error=true;
-	}
-
-	return;
-}
+  // Calculate ni_total and ni_test, and set indicator_idv to 0
+  // whenever indicator_cvt=0, and calculate np_obs and np_miss.
+  ni_total = (indicator_idv).size();
+
+  ni_test = 0;
+  for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+    if (indicator_idv[i] == 0) {
+      continue;
+    }
+    ni_test++;
+  }
+
+  ni_cvt = 0;
+  for (size_t i = 0; i < indicator_cvt.size(); i++) {
+    if (indicator_cvt[i] == 0) {
+      continue;
+    }
+    ni_cvt++;
+  }
+
+  np_obs = 0;
+  np_miss = 0;
+  for (size_t i = 0; i < indicator_pheno.size(); i++) {
+    if (indicator_cvt.size() != 0) {
+      if (indicator_cvt[i] == 0) {
+        continue;
+      }
+    }
+
+    if (indicator_gxe.size() != 0) {
+      if (indicator_gxe[i] == 0) {
+        continue;
+      }
+    }
+
+    if (indicator_weight.size() != 0) {
+      if (indicator_weight[i] == 0) {
+        continue;
+      }
+    }
+
+    for (size_t j = 0; j < indicator_pheno[i].size(); j++) {
+      if (indicator_pheno[i][j] == 0) {
+        np_miss++;
+      } else {
+        np_obs++;
+      }
+    }
+  }
+
+  if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() &&
+      file_study.empty() && file_beta.empty() && file_bf.empty()) {
+    error = true;
+    cout << "error! number of analyzed individuals equals 0. " << endl;
+    return;
+  }
+
+  if (a_mode == 43) {
+    if (ni_cvt == ni_test) {
+      error = true;
+      cout << "error! no individual has missing "
+           << "phenotypes." << endl;
+      return;
+    }
+    if ((np_obs + np_miss) != (ni_cvt * n_ph)) {
+      error = true;
+      cout << "error! number of phenotypes do not match the "
+           << "summation of missing and observed phenotypes." << endl;
+      return;
+    }
+  }
+
+  // Output some information.
+  if (file_cor.empty() && file_mstudy.empty() && file_study.empty() &&
+      a_mode != 15 && a_mode != 27 && a_mode != 28) {
+    cout << "## number of total individuals = " << ni_total << endl;
+    if (a_mode == 43) {
+      cout << "## number of analyzed individuals = " << ni_cvt << endl;
+      cout << "## number of individuals with full phenotypes = " << ni_test
+           << endl;
+    } else {
+      cout << "## number of analyzed individuals = " << ni_test << endl;
+    }
+    cout << "## number of covariates = " << n_cvt << endl;
+    cout << "## number of phenotypes = " << n_ph << endl;
+    if (a_mode == 43) {
+      cout << "## number of observed data = " << np_obs << endl;
+      cout << "## number of missing data = " << np_miss << endl;
+    }
+    if (!file_gene.empty()) {
+      cout << "## number of total genes = " << ng_total << endl;
+    } else if (file_epm.empty() && a_mode != 43 && a_mode != 5) {
+      cout << "## number of total SNPs = " << ns_total << endl;
+      cout << "## number of analyzed SNPs = " << ns_test << endl;
+    } else {
+    }
+  }
+
+  // Set d_pace to 1000 for gene expression.
+  if (!file_gene.empty() && d_pace == 100000) {
+    d_pace = 1000;
+  }
+
+  // For case-control studies, count # cases and # controls.
+  int flag_cc = 0;
+  if (a_mode == 13) {
+    ni_case = 0;
+    ni_control = 0;
+    for (size_t i = 0; i < indicator_idv.size(); i++) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (pheno[i][0] == 0) {
+        ni_control++;
+      } else if (pheno[i][0] == 1) {
+        ni_case++;
+      } else {
+        flag_cc = 1;
+      }
+    }
+    cout << "## number of cases = " << ni_case << endl;
+    cout << "## number of controls = " << ni_control << endl;
+  }
+
+  if (flag_cc == 1) {
+    cout << "Unexpected non-binary phenotypes for "
+         << "case/control analysis. Use default (BSLMM) analysis instead."
+         << endl;
+    a_mode = 11;
+  }
+
+  // Set parameters for BSLMM and check for predict.
+  if (a_mode == 11 || a_mode == 12 || a_mode == 13 || a_mode == 14) {
+    if (a_mode == 11) {
+      n_mh = 1;
+    }
+    if (logp_min == 0) {
+      logp_min = -1.0 * log((double)ns_test);
+    }
+
+    if (h_scale == -1) {
+      h_scale = min(1.0, 10.0 / sqrt((double)ni_test));
+    }
+    if (rho_scale == -1) {
+      rho_scale = min(1.0, 10.0 / sqrt((double)ni_test));
+    }
+    if (logp_scale == -1) {
+      logp_scale = min(1.0, 5.0 / sqrt((double)ni_test));
+    }
+
+    if (h_min == -1) {
+      h_min = 0.0;
+    }
+    if (h_max == -1) {
+      h_max = 1.0;
+    }
+
+    if (s_max > ns_test) {
+      s_max = ns_test;
+      cout << "s_max is re-set to the number of analyzed SNPs." << endl;
+    }
+    if (s_max < s_min) {
+      cout << "error! maximum s value must be larger than the "
+           << "minimal value. current values = " << s_max << " and " << s_min
+           << endl;
+      error = true;
+    }
+  } else if (a_mode == 41 || a_mode == 42) {
+    if (indicator_bv.size() != 0) {
+      if (indicator_idv.size() != indicator_bv.size()) {
+        cout << "error! number of rows in the "
+             << "phenotype file does not match that in the "
+             << "estimated breeding value file: " << indicator_idv.size()
+             << "\t" << indicator_bv.size() << endl;
+        error = true;
+      } else {
+        size_t flag_bv = 0;
+        for (size_t i = 0; i < (indicator_bv).size(); ++i) {
+          if (indicator_idv[i] != indicator_bv[i]) {
+            flag_bv++;
+          }
+        }
+        if (flag_bv != 0) {
+          cout << "error! individuals with missing value in the "
+               << "phenotype file does not match that in the "
+               << "estimated breeding value file: " << flag_bv << endl;
+          error = true;
+        }
+      }
+    }
+  }
 
-void PARAM::PrintSummary () {
-	if (n_ph==1) {
-		cout<<"pve estimate ="<<pve_null<<endl;
-		cout<<"se(pve) ="<<pve_se_null<<endl;
-	} else {
+  if (a_mode == 62 && !file_beta.empty() && mapRS2wcat.size() == 0) {
+    cout << "vc analysis with beta files requires -wcat file." << endl;
+    error = true;
+  }
+  if (a_mode == 67 && mapRS2wcat.size() == 0) {
+    cout << "ci analysis with beta files requires -wcat file." << endl;
+    error = true;
+  }
+
+  // File_mk needs to contain more than one line.
+  if (n_vc == 1 && !file_mk.empty()) {
+    cout << "error! -mk file should contain more than one line." << endl;
+    error = true;
+  }
+
+  return;
+}
 
-	}
-	return;
+void PARAM::PrintSummary() {
+  if (n_ph == 1) {
+    cout << "pve estimate =" << pve_null << endl;
+    cout << "se(pve) =" << pve_se_null << endl;
+  } else {
+  }
+  return;
 }
 
-void PARAM::ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
-	string file_str;
-
-	if (!file_bfile.empty()) {
-		file_str=file_bfile+".bed";
-		if (ReadFile_bed (file_str, indicator_idv, indicator_snp,
-				  UtX, K, calc_K)==false) {
-		  error=true;
-		}
-	}
-	else {
-		if (ReadFile_geno (file_geno, indicator_idv, indicator_snp,
-				   UtX, K, calc_K)==false) {
-		  error=true;
-		}
-	}
-
-	return;
+void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
+  string file_str;
+
+  if (!file_bfile.empty()) {
+    file_str = file_bfile + ".bed";
+    if (ReadFile_bed(file_str, indicator_idv, indicator_snp, UtX, K, calc_K) ==
+        false) {
+      error = true;
+    }
+  } else {
+    if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K,
+                      calc_K) == false) {
+      error = true;
+    }
+  }
+
+  return;
 }
 
-void PARAM::ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K,
-			   const bool calc_K) {
-	string file_str;
-
-	if (!file_bfile.empty()) {
-		file_str=file_bfile+".bed";
-		if (ReadFile_bed (file_str, indicator_idv, indicator_snp,
-				  Xt, K, calc_K, ni_test, ns_test)==false) {
-		  error=true;
-		}
-	} else {
-		if (ReadFile_geno (file_geno, indicator_idv, indicator_snp,
-				   Xt, K, calc_K, ni_test, ns_test)==false) {
-		  error=true;
-		}
-	}
-
-	return;
+void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
+                          const bool calc_K) {
+  string file_str;
+
+  if (!file_bfile.empty()) {
+    file_str = file_bfile + ".bed";
+    if (ReadFile_bed(file_str, indicator_idv, indicator_snp, Xt, K, calc_K,
+                     ni_test, ns_test) == false) {
+      error = true;
+    }
+  } else {
+    if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K,
+                      ni_test, ns_test) == false) {
+      error = true;
+    }
+  }
+
+  return;
 }
 
-void PARAM::CalcKin (gsl_matrix *matrix_kin)  {
-	string file_str;
-
-	gsl_matrix_set_zero (matrix_kin);
-
-	if (!file_bfile.empty() ) {
-		file_str=file_bfile+".bed";
-		if (PlinkKin (file_str, indicator_snp, a_mode-20, d_pace,
-			      matrix_kin)==false) {
-		  error=true;
-		}
-	}
-	else if (!file_oxford.empty() ) {
-		file_str=file_oxford+".bgen";
-		if (bgenKin (file_str, indicator_snp, a_mode-20, d_pace,
-			     matrix_kin)==false) {
-		  error=true;
-		}
- 	}
-	else {
-		file_str=file_geno;
-		if (BimbamKin (file_str, indicator_snp, a_mode-20, d_pace,
-			       matrix_kin)==false) {
-		  error=true;
-		}
-	}
-
-	return;
+void PARAM::CalcKin(gsl_matrix *matrix_kin) {
+  string file_str;
+
+  gsl_matrix_set_zero(matrix_kin);
+
+  if (!file_bfile.empty()) {
+    file_str = file_bfile + ".bed";
+    if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
+        false) {
+      error = true;
+    }
+  } else if (!file_oxford.empty()) {
+    file_str = file_oxford + ".bgen";
+    if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
+        false) {
+      error = true;
+    }
+  } else {
+    file_str = file_geno;
+    if (BimbamKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
+        false) {
+      error = true;
+    }
+  }
+
+  return;
 }
 
 // From an existing n by nd A and K matrices, compute the d by d S
 // matrix (which is not necessary symmetric).
-void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt,
-		gsl_matrix *S) {
-  size_t n_vc=S->size1, ni_test=A->size1;
+void compAKtoS(const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt,
+               gsl_matrix *S) {
+  size_t n_vc = S->size1, ni_test = A->size1;
   double di, dj, tr_AK, sum_A, sum_K, s_A, s_K, sum_AK, tr_A, tr_K, d;
 
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t j=0; j<n_vc; j++) {
-      tr_AK=0; sum_A=0; sum_K=0; sum_AK=0; tr_A=0; tr_K=0;
-      for (size_t l=0; l<ni_test; l++) {
-	s_A=0; s_K=0;
-	for (size_t k=0; k<ni_test; k++) {
-	  di=gsl_matrix_get(A, l, k+ni_test*i);
-	  dj=gsl_matrix_get(K, l, k+ni_test*j);
-	  s_A+=di; s_K+=dj;
-
-	  tr_AK+=di*dj; sum_A+=di; sum_K+=dj;
-	  if (l==k) {tr_A+=di; tr_K+=dj;}
-	}
-	sum_AK+=s_A*s_K;
-      }
-
-      sum_A/=(double)ni_test;
-      sum_K/=(double)ni_test;
-      sum_AK/=(double)ni_test;
-      tr_A-=sum_A;
-      tr_K-=sum_K;
-      d=tr_AK-2*sum_AK+sum_A*sum_K;
-
-      if (tr_A==0 || tr_K==0) {
-	d=0;
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t j = 0; j < n_vc; j++) {
+      tr_AK = 0;
+      sum_A = 0;
+      sum_K = 0;
+      sum_AK = 0;
+      tr_A = 0;
+      tr_K = 0;
+      for (size_t l = 0; l < ni_test; l++) {
+        s_A = 0;
+        s_K = 0;
+        for (size_t k = 0; k < ni_test; k++) {
+          di = gsl_matrix_get(A, l, k + ni_test * i);
+          dj = gsl_matrix_get(K, l, k + ni_test * j);
+          s_A += di;
+          s_K += dj;
+
+          tr_AK += di * dj;
+          sum_A += di;
+          sum_K += dj;
+          if (l == k) {
+            tr_A += di;
+            tr_K += dj;
+          }
+        }
+        sum_AK += s_A * s_K;
+      }
+
+      sum_A /= (double)ni_test;
+      sum_K /= (double)ni_test;
+      sum_AK /= (double)ni_test;
+      tr_A -= sum_A;
+      tr_K -= sum_K;
+      d = tr_AK - 2 * sum_AK + sum_A * sum_K;
+
+      if (tr_A == 0 || tr_K == 0) {
+        d = 0;
       } else {
-	d=d/(tr_A*tr_K)-1/(double)(ni_test-n_cvt);
+        d = d / (tr_A * tr_K) - 1 / (double)(ni_test - n_cvt);
       }
 
-      gsl_matrix_set (S, i, j, d);
+      gsl_matrix_set(S, i, j, d);
     }
   }
 
@@ -1340,187 +1375,195 @@ void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt,
 
 // Copied from lmm.cpp; is used in the following function compKtoV
 // map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
-size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
-	if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {
-	  cout<<"error in GetabIndex."<<endl;
-	  return 0;
-	}
-	size_t index;
-	size_t l, h;
-	if (b>a) {l=a; h=b;} else {l=b; h=a;}
-
-	size_t n=n_cvt+2;
-	index=(2*n-l+2)*(l-1)/2+h-l;
-
-	return index;
+size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt) {
+  if (a > n_cvt + 2 || b > n_cvt + 2 || a <= 0 || b <= 0) {
+    cout << "error in GetabIndex." << endl;
+    return 0;
+  }
+  size_t index;
+  size_t l, h;
+  if (b > a) {
+    l = a;
+    h = b;
+  } else {
+    l = b;
+    h = a;
+  }
+
+  size_t n = n_cvt + 2;
+  index = (2 * n - l + 2) * (l - 1) / 2 + h - l;
+
+  return index;
 }
 
 // From an existing n by nd (centered) G matrix, compute the d+1 by
 // d*(d-1)/2*(d+1) Q matrix where inside i'th d+1 by d+1 matrix, each
 // element is tr(KiKlKjKm)-r*tr(KmKiKl)-r*tr(KlKjKm)+r^2*tr(KlKm),
 // where r=n/(n-1)
-void compKtoV (const gsl_matrix *G, gsl_matrix *V) {
-  size_t n_vc=G->size2/G->size1, ni_test=G->size1;
+void compKtoV(const gsl_matrix *G, gsl_matrix *V) {
+  size_t n_vc = G->size2 / G->size1, ni_test = G->size1;
 
-  gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, (n_vc*(n_vc+1))/2*ni_test);
-  gsl_vector *trKiKj=gsl_vector_alloc( n_vc*(n_vc+1)/2 );
-  gsl_vector *trKi=gsl_vector_alloc(n_vc);
+  gsl_matrix *KiKj =
+      gsl_matrix_alloc(ni_test, (n_vc * (n_vc + 1)) / 2 * ni_test);
+  gsl_vector *trKiKj = gsl_vector_alloc(n_vc * (n_vc + 1) / 2);
+  gsl_vector *trKi = gsl_vector_alloc(n_vc);
 
-  double d, tr, r=(double)ni_test/(double)(ni_test-1);
+  double d, tr, r = (double)ni_test / (double)(ni_test - 1);
   size_t t, t_il, t_jm, t_lm, t_im, t_jl, t_ij;
 
   // Compute KiKj for all pairs of i and j (not including the identity
   // matrix).
-  t=0;
-  for (size_t i=0; i<n_vc; i++) {
-    gsl_matrix_const_view Ki=
-      gsl_matrix_const_submatrix(G, 0, i*ni_test, ni_test, ni_test);
-    for (size_t j=i; j<n_vc; j++) {
-      gsl_matrix_const_view Kj=
-	gsl_matrix_const_submatrix(G, 0, j*ni_test, ni_test, ni_test);
-      gsl_matrix_view KiKj_sub=
-	gsl_matrix_submatrix (KiKj, 0, t*ni_test, ni_test, ni_test);
-      eigenlib_dgemm ("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0,
-		      &KiKj_sub.matrix);
+  t = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    gsl_matrix_const_view Ki =
+        gsl_matrix_const_submatrix(G, 0, i * ni_test, ni_test, ni_test);
+    for (size_t j = i; j < n_vc; j++) {
+      gsl_matrix_const_view Kj =
+          gsl_matrix_const_submatrix(G, 0, j * ni_test, ni_test, ni_test);
+      gsl_matrix_view KiKj_sub =
+          gsl_matrix_submatrix(KiKj, 0, t * ni_test, ni_test, ni_test);
+      eigenlib_dgemm("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0,
+                     &KiKj_sub.matrix);
       t++;
     }
   }
 
   // Compute trKi, trKiKj.
-  t=0;
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t j=i; j<n_vc; j++) {
-      tr=0;
-      for (size_t k=0; k<ni_test; k++) {
-	tr+=gsl_matrix_get (KiKj, k, t*ni_test+k);
+  t = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t j = i; j < n_vc; j++) {
+      tr = 0;
+      for (size_t k = 0; k < ni_test; k++) {
+        tr += gsl_matrix_get(KiKj, k, t * ni_test + k);
       }
-      gsl_vector_set (trKiKj, t, tr);
+      gsl_vector_set(trKiKj, t, tr);
 
       t++;
     }
 
-    tr=0;
-    for (size_t k=0; k<ni_test; k++) {
-      tr+=gsl_matrix_get (G, k, i*ni_test+k);
+    tr = 0;
+    for (size_t k = 0; k < ni_test; k++) {
+      tr += gsl_matrix_get(G, k, i * ni_test + k);
     }
-    gsl_vector_set (trKi, i, tr);
+    gsl_vector_set(trKi, i, tr);
   }
 
   // Compute V.
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t j=i; j<n_vc; j++) {
-      t_ij=GetabIndex (i+1, j+1, n_vc-2);
-      for (size_t l=0; l<n_vc+1; l++) {
-	for (size_t m=0; m<n_vc+1; m++) {
-	  if (l!=n_vc && m!=n_vc) {
-	    t_il=GetabIndex (i+1, l+1, n_vc-2);
-	    t_jm=GetabIndex (j+1, m+1, n_vc-2);
-	    t_lm=GetabIndex (l+1, m+1, n_vc-2);
-	    tr=0;
-	    for (size_t k=0; k<ni_test; k++) {
-	      gsl_vector_const_view KiKl_row=
-		gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test);
-	      gsl_vector_const_view KiKl_col=
-		gsl_matrix_const_column (KiKj, t_il*ni_test+k);
-	      gsl_vector_const_view KjKm_row=
-		gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test);
-	      gsl_vector_const_view KjKm_col=
-		gsl_matrix_const_column (KiKj, t_jm*ni_test+k);
-
-	      gsl_vector_const_view Kl_row=
-		gsl_matrix_const_subrow (G, k, l*ni_test, ni_test);
-	      gsl_vector_const_view Km_row=
-		gsl_matrix_const_subrow (G, k, m*ni_test, ni_test);
-
-	      if (i<=l && j<=m) {
-		gsl_blas_ddot (&KiKl_row.vector, &KjKm_col.vector, &d);
-		tr+=d;
-		gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d);
-		tr-=r*d;
-		gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d);
-		tr-=r*d;
-	      } else if (i<=l && j>m) {
-		gsl_blas_ddot (&KiKl_row.vector, &KjKm_row.vector, &d);
-		tr+=d;
-		gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d);
-		tr-=r*d;
-		gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d);
-		tr-=r*d;
-	      } else if (i>l && j<=m) {
-		gsl_blas_ddot (&KiKl_col.vector, &KjKm_col.vector, &d);
-		tr+=d;
-		gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d);
-		tr-=r*d;
-		gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d);
-		tr-=r*d;
-	      } else {
-		gsl_blas_ddot (&KiKl_col.vector, &KjKm_row.vector, &d);
-		tr+=d;
-		gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d);
-		tr-=r*d;
-		gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d);
-		tr-=r*d;
-	      }
-	    }
-
-	    tr+=r*r*gsl_vector_get (trKiKj, t_lm);
-	  } else if (l!=n_vc && m==n_vc) {
-	    t_il=GetabIndex (i+1, l+1, n_vc-2);
-	    t_jl=GetabIndex (j+1, l+1, n_vc-2);
-	    tr=0;
-	    for (size_t k=0; k<ni_test; k++) {
-	      gsl_vector_const_view KiKl_row=
-		gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test);
-	      gsl_vector_const_view KiKl_col=
-		gsl_matrix_const_column (KiKj, t_il*ni_test+k);
-	      gsl_vector_const_view Kj_row=
-		gsl_matrix_const_subrow (G, k, j*ni_test, ni_test);
-
-	      if (i<=l) {
-		gsl_blas_ddot (&KiKl_row.vector, &Kj_row.vector, &d);
-		tr+=d;
-	      } else {
-		gsl_blas_ddot (&KiKl_col.vector, &Kj_row.vector, &d);
-		tr+=d;
-	      }
-	    }
-	    tr+=-r*gsl_vector_get (trKiKj, t_il) -
-	      r*gsl_vector_get (trKiKj, t_jl)+r*r*gsl_vector_get (trKi, l);
-	  } else if (l==n_vc && m!=n_vc) {
-	    t_jm=GetabIndex (j+1, m+1, n_vc-2);
-	    t_im=GetabIndex (i+1, m+1, n_vc-2);
-	    tr=0;
-	    for (size_t k=0; k<ni_test; k++) {
-	      gsl_vector_const_view KjKm_row=
-		gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test);
-	      gsl_vector_const_view KjKm_col=
-		gsl_matrix_const_column (KiKj, t_jm*ni_test+k);
-	      gsl_vector_const_view Ki_row=
-		gsl_matrix_const_subrow (G, k, i*ni_test, ni_test);
-
-	      if (j<=m) {
-		gsl_blas_ddot (&KjKm_row.vector, &Ki_row.vector, &d);
-		tr+=d;
-	      } else {
-		gsl_blas_ddot (&KjKm_col.vector, &Ki_row.vector, &d);
-		tr+=d;
-	      }
-	    }
-	    tr+=-r*gsl_vector_get (trKiKj, t_im) -
-	      r*gsl_vector_get (trKiKj, t_jm)+r*r*gsl_vector_get (trKi, m);
-	  } else {
-	    tr=gsl_vector_get (trKiKj, t_ij) -
-	      r*gsl_vector_get (trKi, i) -
-	      r*gsl_vector_get (trKi, j)+r*r*(double)(ni_test-1);
-	  }
-
-	  gsl_matrix_set (V, l, t_ij*(n_vc+1)+m, tr);
-	}
-      }
-    }
-  }
-
-  gsl_matrix_scale (V, 1.0/pow((double)ni_test, 2) );
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t j = i; j < n_vc; j++) {
+      t_ij = GetabIndex(i + 1, j + 1, n_vc - 2);
+      for (size_t l = 0; l < n_vc + 1; l++) {
+        for (size_t m = 0; m < n_vc + 1; m++) {
+          if (l != n_vc && m != n_vc) {
+            t_il = GetabIndex(i + 1, l + 1, n_vc - 2);
+            t_jm = GetabIndex(j + 1, m + 1, n_vc - 2);
+            t_lm = GetabIndex(l + 1, m + 1, n_vc - 2);
+            tr = 0;
+            for (size_t k = 0; k < ni_test; k++) {
+              gsl_vector_const_view KiKl_row =
+                  gsl_matrix_const_subrow(KiKj, k, t_il * ni_test, ni_test);
+              gsl_vector_const_view KiKl_col =
+                  gsl_matrix_const_column(KiKj, t_il * ni_test + k);
+              gsl_vector_const_view KjKm_row =
+                  gsl_matrix_const_subrow(KiKj, k, t_jm * ni_test, ni_test);
+              gsl_vector_const_view KjKm_col =
+                  gsl_matrix_const_column(KiKj, t_jm * ni_test + k);
+
+              gsl_vector_const_view Kl_row =
+                  gsl_matrix_const_subrow(G, k, l * ni_test, ni_test);
+              gsl_vector_const_view Km_row =
+                  gsl_matrix_const_subrow(G, k, m * ni_test, ni_test);
+
+              if (i <= l && j <= m) {
+                gsl_blas_ddot(&KiKl_row.vector, &KjKm_col.vector, &d);
+                tr += d;
+                gsl_blas_ddot(&Km_row.vector, &KiKl_col.vector, &d);
+                tr -= r * d;
+                gsl_blas_ddot(&Kl_row.vector, &KjKm_col.vector, &d);
+                tr -= r * d;
+              } else if (i <= l && j > m) {
+                gsl_blas_ddot(&KiKl_row.vector, &KjKm_row.vector, &d);
+                tr += d;
+                gsl_blas_ddot(&Km_row.vector, &KiKl_col.vector, &d);
+                tr -= r * d;
+                gsl_blas_ddot(&Kl_row.vector, &KjKm_row.vector, &d);
+                tr -= r * d;
+              } else if (i > l && j <= m) {
+                gsl_blas_ddot(&KiKl_col.vector, &KjKm_col.vector, &d);
+                tr += d;
+                gsl_blas_ddot(&Km_row.vector, &KiKl_row.vector, &d);
+                tr -= r * d;
+                gsl_blas_ddot(&Kl_row.vector, &KjKm_col.vector, &d);
+                tr -= r * d;
+              } else {
+                gsl_blas_ddot(&KiKl_col.vector, &KjKm_row.vector, &d);
+                tr += d;
+                gsl_blas_ddot(&Km_row.vector, &KiKl_row.vector, &d);
+                tr -= r * d;
+                gsl_blas_ddot(&Kl_row.vector, &KjKm_row.vector, &d);
+                tr -= r * d;
+              }
+            }
+
+            tr += r * r * gsl_vector_get(trKiKj, t_lm);
+          } else if (l != n_vc && m == n_vc) {
+            t_il = GetabIndex(i + 1, l + 1, n_vc - 2);
+            t_jl = GetabIndex(j + 1, l + 1, n_vc - 2);
+            tr = 0;
+            for (size_t k = 0; k < ni_test; k++) {
+              gsl_vector_const_view KiKl_row =
+                  gsl_matrix_const_subrow(KiKj, k, t_il * ni_test, ni_test);
+              gsl_vector_const_view KiKl_col =
+                  gsl_matrix_const_column(KiKj, t_il * ni_test + k);
+              gsl_vector_const_view Kj_row =
+                  gsl_matrix_const_subrow(G, k, j * ni_test, ni_test);
+
+              if (i <= l) {
+                gsl_blas_ddot(&KiKl_row.vector, &Kj_row.vector, &d);
+                tr += d;
+              } else {
+                gsl_blas_ddot(&KiKl_col.vector, &Kj_row.vector, &d);
+                tr += d;
+              }
+            }
+            tr += -r * gsl_vector_get(trKiKj, t_il) -
+                  r * gsl_vector_get(trKiKj, t_jl) +
+                  r * r * gsl_vector_get(trKi, l);
+          } else if (l == n_vc && m != n_vc) {
+            t_jm = GetabIndex(j + 1, m + 1, n_vc - 2);
+            t_im = GetabIndex(i + 1, m + 1, n_vc - 2);
+            tr = 0;
+            for (size_t k = 0; k < ni_test; k++) {
+              gsl_vector_const_view KjKm_row =
+                  gsl_matrix_const_subrow(KiKj, k, t_jm * ni_test, ni_test);
+              gsl_vector_const_view KjKm_col =
+                  gsl_matrix_const_column(KiKj, t_jm * ni_test + k);
+              gsl_vector_const_view Ki_row =
+                  gsl_matrix_const_subrow(G, k, i * ni_test, ni_test);
+
+              if (j <= m) {
+                gsl_blas_ddot(&KjKm_row.vector, &Ki_row.vector, &d);
+                tr += d;
+              } else {
+                gsl_blas_ddot(&KjKm_col.vector, &Ki_row.vector, &d);
+                tr += d;
+              }
+            }
+            tr += -r * gsl_vector_get(trKiKj, t_im) -
+                  r * gsl_vector_get(trKiKj, t_jm) +
+                  r * r * gsl_vector_get(trKi, m);
+          } else {
+            tr = gsl_vector_get(trKiKj, t_ij) - r * gsl_vector_get(trKi, i) -
+                 r * gsl_vector_get(trKi, j) + r * r * (double)(ni_test - 1);
+          }
+
+          gsl_matrix_set(V, l, t_ij * (n_vc + 1) + m, tr);
+        }
+      }
+    }
+  }
+
+  gsl_matrix_scale(V, 1.0 / pow((double)ni_test, 2));
 
   gsl_matrix_free(KiKj);
   gsl_vector_free(trKiKj);
@@ -1530,21 +1573,21 @@ void compKtoV (const gsl_matrix *G, gsl_matrix *V) {
 }
 
 // Perform Jacknife sampling for variance of S.
-void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A,
-		     const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) {
-  size_t n_vc=Svar->size1, ni_test=A->size1, n_cvt=W->size2;
+void JackknifeAKtoS(const gsl_matrix *W, const gsl_matrix *A,
+                    const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) {
+  size_t n_vc = Svar->size1, ni_test = A->size1, n_cvt = W->size2;
 
-  vector<vector<vector<double> > > trAK, sumAK;
-  vector<vector<double> > sumA, sumK, trA, trK, sA, sK;
+  vector<vector<vector<double>>> trAK, sumAK;
+  vector<vector<double>> sumA, sumK, trA, trK, sA, sK;
   vector<double> vec_tmp;
   double di, dj, d, m, v;
 
   // Initialize and set all elements to zero.
-  for (size_t i=0; i<ni_test; i++) {
+  for (size_t i = 0; i < ni_test; i++) {
     vec_tmp.push_back(0);
   }
 
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     sumA.push_back(vec_tmp);
     sumK.push_back(vec_tmp);
     trA.push_back(vec_tmp);
@@ -1553,82 +1596,93 @@ void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A,
     sK.push_back(vec_tmp);
   }
 
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     trAK.push_back(sumK);
     sumAK.push_back(sumK);
   }
 
   // Run jackknife.
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t l=0; l<ni_test; l++) {
-      for (size_t k=0; k<ni_test; k++) {
-	di=gsl_matrix_get(A, l, k+ni_test*i);
-	dj=gsl_matrix_get(K, l, k+ni_test*i);
-
-	for (size_t t=0; t<ni_test; t++) {
-	  if (t==l || t==k) {continue;}
-	  sumA[i][t]+=di;
-	  sumK[i][t]+=dj;
-	  if (l==k) {trA[i][t]+=di; trK[i][t]+=dj;}
-	}
-	sA[i][l]+=di;
-	sK[i][l]+=dj;
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t l = 0; l < ni_test; l++) {
+      for (size_t k = 0; k < ni_test; k++) {
+        di = gsl_matrix_get(A, l, k + ni_test * i);
+        dj = gsl_matrix_get(K, l, k + ni_test * i);
+
+        for (size_t t = 0; t < ni_test; t++) {
+          if (t == l || t == k) {
+            continue;
+          }
+          sumA[i][t] += di;
+          sumK[i][t] += dj;
+          if (l == k) {
+            trA[i][t] += di;
+            trK[i][t] += dj;
+          }
+        }
+        sA[i][l] += di;
+        sK[i][l] += dj;
       }
     }
 
-    for (size_t t=0; t<ni_test; t++) {
-      sumA[i][t]/=(double)(ni_test-1);
-      sumK[i][t]/=(double)(ni_test-1);
+    for (size_t t = 0; t < ni_test; t++) {
+      sumA[i][t] /= (double)(ni_test - 1);
+      sumK[i][t] /= (double)(ni_test - 1);
     }
   }
 
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t j=0; j<n_vc; j++) {
-      for (size_t l=0; l<ni_test; l++) {
-	for (size_t k=0; k<ni_test; k++) {
-	  di=gsl_matrix_get(A, l, k+ni_test*i);
-	  dj=gsl_matrix_get(K, l, k+ni_test*j);
-	  d=di*dj;
-
-	  for (size_t t=0; t<ni_test; t++) {
-	    if (t==l || t==k) {continue;}
-	    trAK[i][j][t]+=d;
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t j = 0; j < n_vc; j++) {
+      for (size_t l = 0; l < ni_test; l++) {
+        for (size_t k = 0; k < ni_test; k++) {
+          di = gsl_matrix_get(A, l, k + ni_test * i);
+          dj = gsl_matrix_get(K, l, k + ni_test * j);
+          d = di * dj;
+
+          for (size_t t = 0; t < ni_test; t++) {
+            if (t == l || t == k) {
+              continue;
+            }
+            trAK[i][j][t] += d;
           }
-	}
+        }
 
-	for (size_t t=0; t<ni_test; t++) {
-	  if (t==l) {continue;}
-	  di=gsl_matrix_get(A, l, t+ni_test*i);
-	  dj=gsl_matrix_get(K, l, t+ni_test*j);
+        for (size_t t = 0; t < ni_test; t++) {
+          if (t == l) {
+            continue;
+          }
+          di = gsl_matrix_get(A, l, t + ni_test * i);
+          dj = gsl_matrix_get(K, l, t + ni_test * j);
 
-	  sumAK[i][j][t]+=(sA[i][l]-di)*(sK[j][l]-dj);
-	}
+          sumAK[i][j][t] += (sA[i][l] - di) * (sK[j][l] - dj);
+        }
       }
 
-      for (size_t t=0; t<ni_test; t++) {
-	sumAK[i][j][t]/=(double)(ni_test-1);
+      for (size_t t = 0; t < ni_test; t++) {
+        sumAK[i][j][t] /= (double)(ni_test - 1);
       }
 
-      m=0; v=0;
-      for (size_t t=0; t<ni_test; t++) {
-	d=trAK[i][j][t]-2*sumAK[i][j][t]+sumA[i][t]*sumK[j][t];
-	if ( (trA[i][t]-sumA[i][t])==0 || (trK[j][t]-sumK[j][t])==0) {
-	  d=0;
-	} else {
-	  d/=(trA[i][t]-sumA[i][t])*(trK[j][t]-sumK[j][t]);
-	  d-=1/(double)(ni_test-n_cvt-1);
-	}
-	m+=d; v+=d*d;
+      m = 0;
+      v = 0;
+      for (size_t t = 0; t < ni_test; t++) {
+        d = trAK[i][j][t] - 2 * sumAK[i][j][t] + sumA[i][t] * sumK[j][t];
+        if ((trA[i][t] - sumA[i][t]) == 0 || (trK[j][t] - sumK[j][t]) == 0) {
+          d = 0;
+        } else {
+          d /= (trA[i][t] - sumA[i][t]) * (trK[j][t] - sumK[j][t]);
+          d -= 1 / (double)(ni_test - n_cvt - 1);
+        }
+        m += d;
+        v += d * d;
       }
-      m/=(double)ni_test;
-      v/=(double)ni_test;
-      v-=m*m;
-      v*=(double)(ni_test-1);
-      gsl_matrix_set (Svar, i, j, v);
-      if (n_cvt==1) {
-	d=gsl_matrix_get (S, i, j);
-      	d=(double)ni_test*d-(double)(ni_test-1)*m;
-	gsl_matrix_set (S, i, j, d);
+      m /= (double)ni_test;
+      v /= (double)ni_test;
+      v -= m * m;
+      v *= (double)(ni_test - 1);
+      gsl_matrix_set(Svar, i, j, v);
+      if (n_cvt == 1) {
+        d = gsl_matrix_get(S, i, j);
+        d = (double)ni_test * d - (double)(ni_test - 1) * m;
+        gsl_matrix_set(S, i, j, d);
       }
     }
   }
@@ -1638,561 +1692,590 @@ void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A,
 
 // Compute the d by d S matrix with its d by d variance matrix of
 // Svar, and the d+1 by d(d+1) matrix of Q for V(q).
-void PARAM::CalcS (const map<string, double> &mapRS2wA,
-		   const map<string, double> &mapRS2wK,
-		   const gsl_matrix *W, gsl_matrix *A,
-		   gsl_matrix *K, gsl_matrix *S,
-		   gsl_matrix *Svar, gsl_vector *ns)  {
+void PARAM::CalcS(const map<string, double> &mapRS2wA,
+                  const map<string, double> &mapRS2wK, const gsl_matrix *W,
+                  gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar,
+                  gsl_vector *ns) {
   string file_str;
 
-  gsl_matrix_set_zero (S);
-  gsl_matrix_set_zero (Svar);
-  gsl_vector_set_zero (ns);
+  gsl_matrix_set_zero(S);
+  gsl_matrix_set_zero(Svar);
+  gsl_vector_set_zero(ns);
 
   // Compute the kinship matrix G for multiple categories; these
   // matrices are not centered, for convienence of Jacknife sampling.
-  if (!file_bfile.empty() ) {
-    file_str=file_bfile+".bed";
-    if (mapRS2wA.size()==0) {
-      if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK,
-		    mapRS2cat, snpInfo, W, K, ns)==false) {
-	error=true;
+  if (!file_bfile.empty()) {
+    file_str = file_bfile + ".bed";
+    if (mapRS2wA.size() == 0) {
+      if (PlinkKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK,
+                   mapRS2cat, snpInfo, W, K, ns) == false) {
+        error = true;
       }
     } else {
-      if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA,
-		    mapRS2cat, snpInfo, W, A, ns)==false) {
-	error=true;
+      if (PlinkKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA,
+                   mapRS2cat, snpInfo, W, A, ns) == false) {
+        error = true;
       }
     }
   } else if (!file_geno.empty()) {
-    file_str=file_geno;
-    if (mapRS2wA.size()==0) {
-      if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp,
-		     mapRS2wK, mapRS2cat, snpInfo, W, K, ns)==false) {
-	error=true;
+    file_str = file_geno;
+    if (mapRS2wA.size() == 0) {
+      if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK,
+                    mapRS2cat, snpInfo, W, K, ns) == false) {
+        error = true;
       }
     } else {
-      if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp,
-		     mapRS2wA, mapRS2cat, snpInfo, W, A, ns)==false) {
-	error=true;
+      if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA,
+                    mapRS2cat, snpInfo, W, A, ns) == false) {
+        error = true;
       }
     }
-  } else if (!file_mbfile.empty() ){
-    if (mapRS2wA.size()==0) {
-      if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
-		    mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) {
-	error=true;
+  } else if (!file_mbfile.empty()) {
+    if (mapRS2wA.size() == 0) {
+      if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
+                   mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) {
+        error = true;
       }
     } else {
-      if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
-		    mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) {
-	error=true;
+      if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
+                   mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) {
+        error = true;
       }
     }
   } else if (!file_mgeno.empty()) {
-    if (mapRS2wA.size()==0) {
-      if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
-		    mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) {
-	error=true;
+    if (mapRS2wA.size() == 0) {
+      if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
+                   mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) {
+        error = true;
       }
     } else {
-      if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
-		    mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) {
-	error=true;
+      if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
+                   mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) {
+        error = true;
       }
     }
   }
 
-  if (mapRS2wA.size()==0) {
-    gsl_matrix_memcpy (A, K);
+  if (mapRS2wA.size() == 0) {
+    gsl_matrix_memcpy(A, K);
   }
 
   // Center and scale every kinship matrix inside G.
-  for (size_t i=0; i<n_vc; i++) {
-    gsl_matrix_view Ksub=gsl_matrix_submatrix(K,0,i*ni_test,ni_test,ni_test);
+  for (size_t i = 0; i < n_vc; i++) {
+    gsl_matrix_view Ksub =
+        gsl_matrix_submatrix(K, 0, i * ni_test, ni_test, ni_test);
     CenterMatrix(&Ksub.matrix);
     ScaleMatrix(&Ksub.matrix);
 
-    gsl_matrix_view Asub=gsl_matrix_submatrix(A,0,i*ni_test,ni_test,ni_test);
+    gsl_matrix_view Asub =
+        gsl_matrix_submatrix(A, 0, i * ni_test, ni_test, ni_test);
     CenterMatrix(&Asub.matrix);
     ScaleMatrix(&Asub.matrix);
   }
 
   // Cased on G, compute S.
-  compAKtoS (A, K, W->size2, S);
+  compAKtoS(A, K, W->size2, S);
 
   // Compute Svar and update S with Jacknife.
-  JackknifeAKtoS (W, A, K, S, Svar);
+  JackknifeAKtoS(W, A, K, S, Svar);
 
   return;
 }
 
-void PARAM::WriteVector (const gsl_vector *q, const gsl_vector *s,
-			 const size_t n_total, const string suffix) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".";
-	file_str+=suffix;
-	file_str+=".txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
+void PARAM::WriteVector(const gsl_vector *q, const gsl_vector *s,
+                        const size_t n_total, const string suffix) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".";
+  file_str += suffix;
+  file_str += ".txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
 
-	outfile.precision(10);
+  outfile.precision(10);
 
-	for (size_t i=0; i<q->size; ++i) {
-	  outfile<<gsl_vector_get (q, i)<<endl;
-	}
+  for (size_t i = 0; i < q->size; ++i) {
+    outfile << gsl_vector_get(q, i) << endl;
+  }
 
-	for (size_t i=0; i<s->size; ++i) {
-	  outfile<<gsl_vector_get (s, i)<<endl;
-	}
+  for (size_t i = 0; i < s->size; ++i) {
+    outfile << gsl_vector_get(s, i) << endl;
+  }
 
-	outfile<<n_total<<endl;
+  outfile << n_total << endl;
 
-	outfile.close();
-	outfile.clear();
-	return;
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
-void PARAM::WriteVar (const string suffix) {
+void PARAM::WriteVar(const string suffix) {
   string file_str, rs;
-	file_str=path_out+"/"+file_out;
-	file_str+=".";
-	file_str+=suffix;
-	file_str+=".txt.gz";
-
-	ogzstream outfile (file_str.c_str(), ogzstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	outfile.precision(10);
-
-	if (mindicator_snp.size()!=0) {
-	  for (size_t t=0; t<mindicator_snp.size(); t++) {
-	    indicator_snp=mindicator_snp[t];
-	    for (size_t i=0; i<indicator_snp.size(); i++) {
-	      if (indicator_snp[i]==0) {continue;}
-	      rs=snpInfo[i].rs_number;
-	      outfile<<rs<<endl;
-	    }
-	  }
-	} else {
-	  for (size_t i=0; i<indicator_snp.size(); i++) {
-	    if (indicator_snp[i]==0) {continue;}
-	    rs=snpInfo[i].rs_number;
-	    outfile<<rs<<endl;
-	  }
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
-}
+  file_str = path_out + "/" + file_out;
+  file_str += ".";
+  file_str += suffix;
+  file_str += ".txt.gz";
+
+  ogzstream outfile(file_str.c_str(), ogzstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  outfile.precision(10);
+
+  if (mindicator_snp.size() != 0) {
+    for (size_t t = 0; t < mindicator_snp.size(); t++) {
+      indicator_snp = mindicator_snp[t];
+      for (size_t i = 0; i < indicator_snp.size(); i++) {
+        if (indicator_snp[i] == 0) {
+          continue;
+        }
+        rs = snpInfo[i].rs_number;
+        outfile << rs << endl;
+      }
+    }
+  } else {
+    for (size_t i = 0; i < indicator_snp.size(); i++) {
+      if (indicator_snp[i] == 0) {
+        continue;
+      }
+      rs = snpInfo[i].rs_number;
+      outfile << rs << endl;
+    }
+  }
 
-void PARAM::WriteMatrix (const gsl_matrix *matrix_U, const string suffix) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".";
-	file_str+=suffix;
-	file_str+=".txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	outfile.precision(10);
-
-	for (size_t i=0; i<matrix_U->size1; ++i) {
-		for (size_t j=0; j<matrix_U->size2; ++j) {
-			outfile<<gsl_matrix_get (matrix_U, i, j)<<"\t";
-		}
-		outfile<<endl;
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
-void PARAM::WriteVector (const gsl_vector *vector_D, const string suffix) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".";
-	file_str+=suffix;
-	file_str+=".txt";
+void PARAM::WriteMatrix(const gsl_matrix *matrix_U, const string suffix) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".";
+  file_str += suffix;
+  file_str += ".txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  outfile.precision(10);
 
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
+  for (size_t i = 0; i < matrix_U->size1; ++i) {
+    for (size_t j = 0; j < matrix_U->size2; ++j) {
+      outfile << gsl_matrix_get(matrix_U, i, j) << "\t";
+    }
+    outfile << endl;
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
+}
+
+void PARAM::WriteVector(const gsl_vector *vector_D, const string suffix) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".";
+  file_str += suffix;
+  file_str += ".txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
 
-	outfile.precision(10);
+  outfile.precision(10);
 
-	for (size_t i=0; i<vector_D->size; ++i) {
-		outfile<<gsl_vector_get (vector_D, i)<<endl;
-	}
+  for (size_t i = 0; i < vector_D->size; ++i) {
+    outfile << gsl_vector_get(vector_D, i) << endl;
+  }
 
-	outfile.close();
-	outfile.clear();
-	return;
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
-void PARAM::CheckCvt () {
-	if (indicator_cvt.size()==0) {return;}
-
-	size_t ci_test=0;
-
-	gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
-
-	for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
-		if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;}
-		for (size_t j=0; j<n_cvt; ++j) {
-			gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
-		}
-		ci_test++;
-	}
-
-	size_t flag_ipt=0;
-	double v_min, v_max;
-	set<size_t> set_remove;
-
-	// Check if any columns is an intercept.
-	for (size_t i=0; i<W->size2; i++) {
-		gsl_vector_view w_col=gsl_matrix_column (W, i);
-		gsl_vector_minmax (&w_col.vector, &v_min, &v_max);
-		if (v_min==v_max) {flag_ipt=1; set_remove.insert (i);}
-	}
-
-	// Add an intecept term if needed.
-	if (n_cvt==set_remove.size()) {
-		indicator_cvt.clear();
-		n_cvt=1;
-	} else if (flag_ipt==0) {
-	  cout<<"no intecept term is found in the cvt file. "<<
-	    "a column of 1s is added."<<endl;
-		for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
-		  if (indicator_idv[i]==0 || indicator_cvt[i]==0) {
-		    continue;
-		  }
-		  cvt[i].push_back(1.0);
-		}
-
-		n_cvt++;
-	} else {}
-
-	gsl_matrix_free(W);
-
-	return;
+void PARAM::CheckCvt() {
+  if (indicator_cvt.size() == 0) {
+    return;
+  }
+
+  size_t ci_test = 0;
+
+  gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+
+  for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+    if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) {
+      continue;
+    }
+    for (size_t j = 0; j < n_cvt; ++j) {
+      gsl_matrix_set(W, ci_test, j, (cvt)[i][j]);
+    }
+    ci_test++;
+  }
+
+  size_t flag_ipt = 0;
+  double v_min, v_max;
+  set<size_t> set_remove;
+
+  // Check if any columns is an intercept.
+  for (size_t i = 0; i < W->size2; i++) {
+    gsl_vector_view w_col = gsl_matrix_column(W, i);
+    gsl_vector_minmax(&w_col.vector, &v_min, &v_max);
+    if (v_min == v_max) {
+      flag_ipt = 1;
+      set_remove.insert(i);
+    }
+  }
+
+  // Add an intecept term if needed.
+  if (n_cvt == set_remove.size()) {
+    indicator_cvt.clear();
+    n_cvt = 1;
+  } else if (flag_ipt == 0) {
+    cout << "no intecept term is found in the cvt file. "
+         << "a column of 1s is added." << endl;
+    for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+      if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) {
+        continue;
+      }
+      cvt[i].push_back(1.0);
+    }
+
+    n_cvt++;
+  } else {
+  }
+
+  gsl_matrix_free(W);
+
+  return;
 }
 
 // Post-process phentoypes and covariates.
-void PARAM::ProcessCvtPhen () {
-
- 	// Convert indicator_pheno to indicator_idv.
-	int k=1;
-	indicator_idv.clear();
-	for (size_t i=0; i<indicator_pheno.size(); i++) {
-		k=1;
-		for (size_t j=0; j<indicator_pheno[i].size(); j++) {
-			if (indicator_pheno[i][j]==0) {k=0;}
-		}
-		indicator_idv.push_back(k);
-	}
-
-	// Remove individuals with missing covariates.
-	if ((indicator_cvt).size()!=0) {
-		for (vector<int>::size_type i=0;
-		     i<(indicator_idv).size();
-		     ++i) {
-			indicator_idv[i]*=indicator_cvt[i];
-		}
-	}
-
-	// Remove individuals with missing gxe variables.
-	if ((indicator_gxe).size()!=0) {
-		for (vector<int>::size_type i=0;
-		     i<(indicator_idv).size();
-		     ++i) {
-			indicator_idv[i]*=indicator_gxe[i];
-		}
-	}
-
-	// Remove individuals with missing residual weights.
-	if ((indicator_weight).size()!=0) {
-		for (vector<int>::size_type i=0;
-		     i<(indicator_idv).size();
-		     ++i) {
-			indicator_idv[i]*=indicator_weight[i];
-		}
-	}
-
-	// Obtain ni_test.
-	ni_test=0;
-	for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
-	    if (indicator_idv[i]==0) {continue;}
-		ni_test++;
-	}
-
-	// If subsample number is set, perform a random sub-sampling
-	// to determine the subsampled ids.
-	if (ni_subsample!=0) {
-	  if (ni_test<ni_subsample) {
-	    cout<<"error! number of subsamples is less than number of"<<
-	      "analyzed individuals. "<<endl;
-	  } else {
-
-	    // Set up random environment.
-	    gsl_rng_env_setup();
-	    gsl_rng *gsl_r;
-	    const gsl_rng_type * gslType;
-	    gslType = gsl_rng_default;
-	    if (randseed<0) {
-	      time_t rawtime;
-	      time (&rawtime);
-	      tm * ptm = gmtime (&rawtime);
-
-	      randseed = (unsigned)
-		(ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec);
-	    }
-	    gsl_r = gsl_rng_alloc(gslType);
-	    gsl_rng_set(gsl_r, randseed);
-
-	    // From ni_test, sub-sample ni_subsample.
-	    vector<size_t> a, b;
-	    for (size_t i=0; i<ni_subsample; i++) {
-              a.push_back(0);
-	    }
-	    for (size_t i=0; i<ni_test; i++) {
-	      b.push_back(i);
-	    }
-
-	    gsl_ran_choose (gsl_r, static_cast<void*>(&a[0]), ni_subsample,
-			    static_cast<void*>(&b[0]),ni_test,sizeof (size_t));
-
-	    // Re-set indicator_idv and ni_test.
-	    int j=0;
-	    for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
-	      if (indicator_idv[i]==0) {continue;}
-	      if(find(a.begin(), a.end(), j) == a.end()) {
-		indicator_idv[i]=0;
-	      }
-	      j++;
-	    }
-	    ni_test=ni_subsample;
-	  }
-	}
-
-	// Check ni_test.
-	if (ni_test==0 && a_mode!=15) {
-		error=true;
-		cout<<"error! number of analyzed individuals equals 0. "<<endl;
-		return;
-	}
-
-	// Check covariates to see if they are correlated with each
-	// other, and to see if the intercept term is included.
-	// After getting ni_test.
-	// Add or remove covariates.
-	if (indicator_cvt.size()!=0) {
-		CheckCvt();
-	} else {
-		vector<double> cvt_row;
-		cvt_row.push_back(1);
-
-		for (vector<int>::size_type i=0;
-		     i<(indicator_idv).size();
-		     ++i) {
-			indicator_cvt.push_back(1);
-			cvt.push_back(cvt_row);
-		}
-	}
-
-	return;
+void PARAM::ProcessCvtPhen() {
+
+  // Convert indicator_pheno to indicator_idv.
+  int k = 1;
+  indicator_idv.clear();
+  for (size_t i = 0; i < indicator_pheno.size(); i++) {
+    k = 1;
+    for (size_t j = 0; j < indicator_pheno[i].size(); j++) {
+      if (indicator_pheno[i][j] == 0) {
+        k = 0;
+      }
+    }
+    indicator_idv.push_back(k);
+  }
+
+  // Remove individuals with missing covariates.
+  if ((indicator_cvt).size() != 0) {
+    for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+      indicator_idv[i] *= indicator_cvt[i];
+    }
+  }
+
+  // Remove individuals with missing gxe variables.
+  if ((indicator_gxe).size() != 0) {
+    for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+      indicator_idv[i] *= indicator_gxe[i];
+    }
+  }
+
+  // Remove individuals with missing residual weights.
+  if ((indicator_weight).size() != 0) {
+    for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+      indicator_idv[i] *= indicator_weight[i];
+    }
+  }
+
+  // Obtain ni_test.
+  ni_test = 0;
+  for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+    if (indicator_idv[i] == 0) {
+      continue;
+    }
+    ni_test++;
+  }
+
+  // If subsample number is set, perform a random sub-sampling
+  // to determine the subsampled ids.
+  if (ni_subsample != 0) {
+    if (ni_test < ni_subsample) {
+      cout << "error! number of subsamples is less than number of"
+           << "analyzed individuals. " << endl;
+    } else {
+
+      // Set up random environment.
+      gsl_rng_env_setup();
+      gsl_rng *gsl_r;
+      const gsl_rng_type *gslType;
+      gslType = gsl_rng_default;
+      if (randseed < 0) {
+        time_t rawtime;
+        time(&rawtime);
+        tm *ptm = gmtime(&rawtime);
+
+        randseed = (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 +
+                              ptm->tm_sec);
+      }
+      gsl_r = gsl_rng_alloc(gslType);
+      gsl_rng_set(gsl_r, randseed);
+
+      // From ni_test, sub-sample ni_subsample.
+      vector<size_t> a, b;
+      for (size_t i = 0; i < ni_subsample; i++) {
+        a.push_back(0);
+      }
+      for (size_t i = 0; i < ni_test; i++) {
+        b.push_back(i);
+      }
+
+      gsl_ran_choose(gsl_r, static_cast<void *>(&a[0]), ni_subsample,
+                     static_cast<void *>(&b[0]), ni_test, sizeof(size_t));
+
+      // Re-set indicator_idv and ni_test.
+      int j = 0;
+      for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+        if (indicator_idv[i] == 0) {
+          continue;
+        }
+        if (find(a.begin(), a.end(), j) == a.end()) {
+          indicator_idv[i] = 0;
+        }
+        j++;
+      }
+      ni_test = ni_subsample;
+    }
+  }
+
+  // Check ni_test.
+  if (ni_test == 0 && a_mode != 15) {
+    error = true;
+    cout << "error! number of analyzed individuals equals 0. " << endl;
+    return;
+  }
+
+  // Check covariates to see if they are correlated with each
+  // other, and to see if the intercept term is included.
+  // After getting ni_test.
+  // Add or remove covariates.
+  if (indicator_cvt.size() != 0) {
+    CheckCvt();
+  } else {
+    vector<double> cvt_row;
+    cvt_row.push_back(1);
+
+    for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+      indicator_cvt.push_back(1);
+      cvt.push_back(cvt_row);
+    }
+  }
+
+  return;
 }
 
-void PARAM::CopyCvt (gsl_matrix *W) {
-	size_t ci_test=0;
+void PARAM::CopyCvt(gsl_matrix *W) {
+  size_t ci_test = 0;
 
-	for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
-		if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;}
-		for (size_t j=0; j<n_cvt; ++j) {
-			gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
-		}
-		ci_test++;
-	}
+  for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+    if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) {
+      continue;
+    }
+    for (size_t j = 0; j < n_cvt; ++j) {
+      gsl_matrix_set(W, ci_test, j, (cvt)[i][j]);
+    }
+    ci_test++;
+  }
 
-	return;
+  return;
 }
 
-void PARAM::CopyGxe (gsl_vector *env) {
-	size_t ci_test=0;
+void PARAM::CopyGxe(gsl_vector *env) {
+  size_t ci_test = 0;
 
-	for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
-		if (indicator_idv[i]==0 || indicator_gxe[i]==0) {continue;}
-		gsl_vector_set (env, ci_test, gxe[i]);
-		ci_test++;
-	}
+  for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+    if (indicator_idv[i] == 0 || indicator_gxe[i] == 0) {
+      continue;
+    }
+    gsl_vector_set(env, ci_test, gxe[i]);
+    ci_test++;
+  }
 
-	return;
+  return;
 }
 
-void PARAM::CopyWeight (gsl_vector *w) {
-	size_t ci_test=0;
+void PARAM::CopyWeight(gsl_vector *w) {
+  size_t ci_test = 0;
 
-	for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
-		if (indicator_idv[i]==0 || indicator_weight[i]==0) {continue;}
-		gsl_vector_set (w, ci_test, weight[i]);
-		ci_test++;
-	}
+  for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+    if (indicator_idv[i] == 0 || indicator_weight[i] == 0) {
+      continue;
+    }
+    gsl_vector_set(w, ci_test, weight[i]);
+    ci_test++;
+  }
 
-	return;
+  return;
 }
 
 // If flag=0, then use indicator_idv to load W and Y;
 // else, use indicator_cvt to load them.
-void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag) {
-	size_t ci_test=0;
+void PARAM::CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag) {
+  size_t ci_test = 0;
 
-	for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
-		if (flag==0) {
-			if (indicator_idv[i]==0) {continue;}
-		} else {
-			if (indicator_cvt[i]==0) {continue;}
-		}
+  for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+    if (flag == 0) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+    } else {
+      if (indicator_cvt[i] == 0) {
+        continue;
+      }
+    }
 
-		gsl_vector_set (y, ci_test, (pheno)[i][0]);
+    gsl_vector_set(y, ci_test, (pheno)[i][0]);
 
-		for (size_t j=0; j<n_cvt; ++j) {
-			gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
-		}
-		ci_test++;
-	}
+    for (size_t j = 0; j < n_cvt; ++j) {
+      gsl_matrix_set(W, ci_test, j, (cvt)[i][j]);
+    }
+    ci_test++;
+  }
 
-	return;
+  return;
 }
 
 // If flag=0, then use indicator_idv to load W and Y;
 // else, use indicator_cvt to load them.
-void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag) {
-	size_t ci_test=0;
-
-	for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
-		if (flag==0) {
-			if (indicator_idv[i]==0) {continue;}
-		} else {
-			if (indicator_cvt[i]==0) {continue;}
-		}
-
-        for (size_t j=0; j<n_ph; ++j) {
-			gsl_matrix_set (Y, ci_test, j, (pheno)[i][j]);
-		}
-		for (size_t j=0; j<n_cvt; ++j) {
-			gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
-		}
-
-		ci_test++;
-	}
-
-	return;
+void PARAM::CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag) {
+  size_t ci_test = 0;
+
+  for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+    if (flag == 0) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+    } else {
+      if (indicator_cvt[i] == 0) {
+        continue;
+      }
+    }
+
+    for (size_t j = 0; j < n_ph; ++j) {
+      gsl_matrix_set(Y, ci_test, j, (pheno)[i][j]);
+    }
+    for (size_t j = 0; j < n_cvt; ++j) {
+      gsl_matrix_set(W, ci_test, j, (cvt)[i][j]);
+    }
+
+    ci_test++;
+  }
+
+  return;
 }
 
-void PARAM::CopyRead (gsl_vector *log_N) {
-	size_t ci_test=0;
+void PARAM::CopyRead(gsl_vector *log_N) {
+  size_t ci_test = 0;
 
-	for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
-		if (indicator_idv[i]==0) {continue;}
-		gsl_vector_set (log_N, ci_test, log(vec_read[i]) );
-		ci_test++;
-	}
+  for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+    if (indicator_idv[i] == 0) {
+      continue;
+    }
+    gsl_vector_set(log_N, ci_test, log(vec_read[i]));
+    ci_test++;
+  }
 
-	return;
+  return;
 }
 
-void PARAM::ObtainWeight (const set<string> &setSnps_beta,
-			  map<string, double> &mapRS2wK) {
+void PARAM::ObtainWeight(const set<string> &setSnps_beta,
+                         map<string, double> &mapRS2wK) {
   mapRS2wK.clear();
 
   vector<double> wsum, wcount;
 
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     wsum.push_back(0.0);
     wcount.push_back(0.0);
   }
 
   string rs;
-  if (msnpInfo.size()==0) {
-    for (size_t i=0; i<snpInfo.size(); i++) {
-      if (indicator_snp[i]==0) {continue;}
-
-      rs=snpInfo[i].rs_number;
-      if ( (setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) &&
-	   (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) &&
-	   (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) &&
-	   (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
-	if (mapRS2wsnp.size()!=0) {
-	  mapRS2wK[rs]=mapRS2wsnp[rs];
-	  if (mapRS2cat.size()==0) {
-	    wsum[0]+=mapRS2wsnp[rs];
-	  } else {
-	    wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs];
-	  }
-	  wcount[0]++;
-	} else {
-	  mapRS2wK[rs]=1;
-	}
+  if (msnpInfo.size() == 0) {
+    for (size_t i = 0; i < snpInfo.size(); i++) {
+      if (indicator_snp[i] == 0) {
+        continue;
       }
 
+      rs = snpInfo[i].rs_number;
+      if ((setSnps_beta.size() == 0 || setSnps_beta.count(rs) != 0) &&
+          (mapRS2wsnp.size() == 0 || mapRS2wsnp.count(rs) != 0) &&
+          (mapRS2wcat.size() == 0 || mapRS2wcat.count(rs) != 0) &&
+          (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) {
+        if (mapRS2wsnp.size() != 0) {
+          mapRS2wK[rs] = mapRS2wsnp[rs];
+          if (mapRS2cat.size() == 0) {
+            wsum[0] += mapRS2wsnp[rs];
+          } else {
+            wsum[mapRS2cat[rs]] += mapRS2wsnp[rs];
+          }
+          wcount[0]++;
+        } else {
+          mapRS2wK[rs] = 1;
+        }
+      }
     }
   } else {
-    for (size_t t=0; t<msnpInfo.size(); t++) {
-      snpInfo=msnpInfo[t];
-      indicator_snp=mindicator_snp[t];
-
-      for (size_t i=0; i<snpInfo.size(); i++) {
-	if (indicator_snp[i]==0) {continue;}
-
-	rs=snpInfo[i].rs_number;
-	if ((setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) &&
-	    (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) &&
-	    (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) &&
-	    (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
-	  if (mapRS2wsnp.size()!=0) {
-	    mapRS2wK[rs]=mapRS2wsnp[rs];
-	    if (mapRS2cat.size()==0) {
-	      wsum[0]+=mapRS2wsnp[rs];
-	    } else {
-	      wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs];
-	    }
-	    wcount[0]++;
-	  } else {
-	    mapRS2wK[rs]=1;
-	  }
-	}
-      }
-    }
-  }
-
-  if (mapRS2wsnp.size()!=0) {
-    for (size_t i=0; i<n_vc; i++) {
-      wsum[i]/=wcount[i];
-    }
-
-    for (map<string, double>::iterator it=mapRS2wK.begin();
-	 it!=mapRS2wK.end();
-	 ++it) {
-      if (mapRS2cat.size()==0) {
-	it->second/=wsum[0];
+    for (size_t t = 0; t < msnpInfo.size(); t++) {
+      snpInfo = msnpInfo[t];
+      indicator_snp = mindicator_snp[t];
+
+      for (size_t i = 0; i < snpInfo.size(); i++) {
+        if (indicator_snp[i] == 0) {
+          continue;
+        }
+
+        rs = snpInfo[i].rs_number;
+        if ((setSnps_beta.size() == 0 || setSnps_beta.count(rs) != 0) &&
+            (mapRS2wsnp.size() == 0 || mapRS2wsnp.count(rs) != 0) &&
+            (mapRS2wcat.size() == 0 || mapRS2wcat.count(rs) != 0) &&
+            (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) {
+          if (mapRS2wsnp.size() != 0) {
+            mapRS2wK[rs] = mapRS2wsnp[rs];
+            if (mapRS2cat.size() == 0) {
+              wsum[0] += mapRS2wsnp[rs];
+            } else {
+              wsum[mapRS2cat[rs]] += mapRS2wsnp[rs];
+            }
+            wcount[0]++;
+          } else {
+            mapRS2wK[rs] = 1;
+          }
+        }
+      }
+    }
+  }
+
+  if (mapRS2wsnp.size() != 0) {
+    for (size_t i = 0; i < n_vc; i++) {
+      wsum[i] /= wcount[i];
+    }
+
+    for (map<string, double>::iterator it = mapRS2wK.begin();
+         it != mapRS2wK.end(); ++it) {
+      if (mapRS2cat.size() == 0) {
+        it->second /= wsum[0];
       } else {
-	it->second/=wsum[mapRS2cat[it->first]];
+        it->second /= wsum[mapRS2cat[it->first]];
       }
     }
   }
@@ -2201,54 +2284,52 @@ void PARAM::ObtainWeight (const set<string> &setSnps_beta,
 
 // If pve_flag=0 then do not change pve; pve_flag==1, then change pve
 // to 0 if pve < 0 and pve to 1 if pve > 1.
-void PARAM::UpdateWeight (const size_t pve_flag,
-			  const map<string, double> &mapRS2wK,
-			  const size_t ni_test, const gsl_vector *ns,
-			  map<string, double> &mapRS2wA) {
+void PARAM::UpdateWeight(const size_t pve_flag,
+                         const map<string, double> &mapRS2wK,
+                         const size_t ni_test, const gsl_vector *ns,
+                         map<string, double> &mapRS2wA) {
   double d;
   vector<double> wsum, wcount;
 
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     wsum.push_back(0.0);
     wcount.push_back(0.0);
   }
 
-  for (map<string, double>::const_iterator it=mapRS2wK.begin();
-       it!=mapRS2wK.end();
-       ++it) {
-    d=1;
-    for (size_t i=0; i<n_vc; i++) {
-      if (v_pve[i]>=1 && pve_flag==1) {
-	d+=(double)ni_test/gsl_vector_get(ns, i)*mapRS2wcat[it->first][i];
-      } else if (v_pve[i]<=0 && pve_flag==1) {
-	d+=0;
+  for (map<string, double>::const_iterator it = mapRS2wK.begin();
+       it != mapRS2wK.end(); ++it) {
+    d = 1;
+    for (size_t i = 0; i < n_vc; i++) {
+      if (v_pve[i] >= 1 && pve_flag == 1) {
+        d += (double)ni_test / gsl_vector_get(ns, i) * mapRS2wcat[it->first][i];
+      } else if (v_pve[i] <= 0 && pve_flag == 1) {
+        d += 0;
       } else {
-	d+=(double)ni_test/gsl_vector_get(ns, i)*
-	  mapRS2wcat[it->first][i]*v_pve[i];
+        d += (double)ni_test / gsl_vector_get(ns, i) *
+             mapRS2wcat[it->first][i] * v_pve[i];
       }
     }
-    mapRS2wA[it->first]=1/(d*d);
+    mapRS2wA[it->first] = 1 / (d * d);
 
-    if (mapRS2cat.size()==0) {
-      wsum[0]+=mapRS2wA[it->first];
+    if (mapRS2cat.size() == 0) {
+      wsum[0] += mapRS2wA[it->first];
       wcount[0]++;
     } else {
-      wsum[mapRS2cat[it->first]]+=mapRS2wA[it->first];
+      wsum[mapRS2cat[it->first]] += mapRS2wA[it->first];
       wcount[mapRS2cat[it->first]]++;
     }
   }
 
-  for (size_t i=0; i<n_vc; i++) {
-    wsum[i]/=wcount[i];
+  for (size_t i = 0; i < n_vc; i++) {
+    wsum[i] /= wcount[i];
   }
 
-  for (map<string, double>::iterator it=mapRS2wA.begin();
-       it!=mapRS2wA.end();
-       ++it) {
-    if (mapRS2cat.size()==0) {
-      it->second/=wsum[0];
+  for (map<string, double>::iterator it = mapRS2wA.begin();
+       it != mapRS2wA.end(); ++it) {
+    if (mapRS2cat.size() == 0) {
+      it->second /= wsum[0];
     } else {
-      it->second/=wsum[mapRS2cat[it->first]];
+      it->second /= wsum[mapRS2cat[it->first]];
     }
   }
   return;
@@ -2256,61 +2337,64 @@ void PARAM::UpdateWeight (const size_t pve_flag,
 
 // This function updates indicator_snp, and save z-scores and other
 // values into vectors.
-void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA,
-			 const map<string, string> &mapRS2A1,
-			 const map<string, double> &mapRS2z,
-			 gsl_vector *w, gsl_vector *z,
-			 vector<size_t> &vec_cat) {
-  gsl_vector_set_zero (w);
-  gsl_vector_set_zero (z);
+void PARAM::UpdateSNPnZ(const map<string, double> &mapRS2wA,
+                        const map<string, string> &mapRS2A1,
+                        const map<string, double> &mapRS2z, gsl_vector *w,
+                        gsl_vector *z, vector<size_t> &vec_cat) {
+  gsl_vector_set_zero(w);
+  gsl_vector_set_zero(z);
   vec_cat.clear();
 
   string rs, a1;
-  size_t c=0;
-  if (msnpInfo.size()==0) {
-    for (size_t i=0; i<snpInfo.size(); i++) {
-      if (indicator_snp[i]==0) {continue;}
-
-      rs=snpInfo[i].rs_number;
-      a1=snpInfo[i].a_minor;
-
-      if (mapRS2wA.count(rs)!=0) {
-	if (a1==mapRS2A1.at(rs)) {
-	  gsl_vector_set (z, c, mapRS2z.at(rs) );
-	} else {
-	  gsl_vector_set (z, c, -1*mapRS2z.at(rs) );
-	}
-	vec_cat.push_back(mapRS2cat.at(rs) );
-	gsl_vector_set (w, c, mapRS2wA.at(rs) );
-
-	c++;
-      } else {
-	indicator_snp[i]=0;
+  size_t c = 0;
+  if (msnpInfo.size() == 0) {
+    for (size_t i = 0; i < snpInfo.size(); i++) {
+      if (indicator_snp[i] == 0) {
+        continue;
       }
-    }
-  } else {
-    for (size_t t=0; t<msnpInfo.size(); t++) {
-      snpInfo=msnpInfo[t];
 
-      for (size_t i=0; i<snpInfo.size(); i++) {
-	if (mindicator_snp[t][i]==0) {continue;}
+      rs = snpInfo[i].rs_number;
+      a1 = snpInfo[i].a_minor;
 
-	rs=snpInfo[i].rs_number;
-	a1=snpInfo[i].a_minor;
+      if (mapRS2wA.count(rs) != 0) {
+        if (a1 == mapRS2A1.at(rs)) {
+          gsl_vector_set(z, c, mapRS2z.at(rs));
+        } else {
+          gsl_vector_set(z, c, -1 * mapRS2z.at(rs));
+        }
+        vec_cat.push_back(mapRS2cat.at(rs));
+        gsl_vector_set(w, c, mapRS2wA.at(rs));
 
-	if (mapRS2wA.count(rs)!=0) {
-	  if (a1==mapRS2A1.at(rs)) {
-	    gsl_vector_set (z, c, mapRS2z.at(rs) );
-	  } else {
-	    gsl_vector_set (z, c, -1*mapRS2z.at(rs) );
-	  }
-	  vec_cat.push_back(mapRS2cat.at(rs) );
-	  gsl_vector_set (w, c, mapRS2wA.at(rs) );
+        c++;
+      } else {
+        indicator_snp[i] = 0;
+      }
+    }
+  } else {
+    for (size_t t = 0; t < msnpInfo.size(); t++) {
+      snpInfo = msnpInfo[t];
+
+      for (size_t i = 0; i < snpInfo.size(); i++) {
+        if (mindicator_snp[t][i] == 0) {
+          continue;
+        }
+
+        rs = snpInfo[i].rs_number;
+        a1 = snpInfo[i].a_minor;
+
+        if (mapRS2wA.count(rs) != 0) {
+          if (a1 == mapRS2A1.at(rs)) {
+            gsl_vector_set(z, c, mapRS2z.at(rs));
+          } else {
+            gsl_vector_set(z, c, -1 * mapRS2z.at(rs));
+          }
+          vec_cat.push_back(mapRS2cat.at(rs));
+          gsl_vector_set(w, c, mapRS2wA.at(rs));
 
-	  c++;
-	} else {
-	  mindicator_snp[t][i]=0;
-	}
+          c++;
+        } else {
+          mindicator_snp[t][i] = 0;
+        }
       }
     }
   }
@@ -2320,30 +2404,34 @@ void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA,
 
 // This function updates indicator_snp, and save z-scores and other
 // values into vectors.
-void PARAM::UpdateSNP (const map<string, double> &mapRS2wA) {
+void PARAM::UpdateSNP(const map<string, double> &mapRS2wA) {
   string rs;
-  if (msnpInfo.size()==0) {
-    for (size_t i=0; i<snpInfo.size(); i++) {
-      if (indicator_snp[i]==0) {continue;}
+  if (msnpInfo.size() == 0) {
+    for (size_t i = 0; i < snpInfo.size(); i++) {
+      if (indicator_snp[i] == 0) {
+        continue;
+      }
 
-      rs=snpInfo[i].rs_number;
+      rs = snpInfo[i].rs_number;
 
-      if (mapRS2wA.count(rs)==0) {
-	indicator_snp[i]=0;
+      if (mapRS2wA.count(rs) == 0) {
+        indicator_snp[i] = 0;
       }
     }
   } else {
-    for (size_t t=0; t<msnpInfo.size(); t++) {
-      snpInfo=msnpInfo[t];
+    for (size_t t = 0; t < msnpInfo.size(); t++) {
+      snpInfo = msnpInfo[t];
 
-      for (size_t i=0; i<mindicator_snp[t].size(); i++) {
-	if (mindicator_snp[t][i]==0) {continue;}
+      for (size_t i = 0; i < mindicator_snp[t].size(); i++) {
+        if (mindicator_snp[t][i] == 0) {
+          continue;
+        }
 
-	rs=snpInfo[i].rs_number;
+        rs = snpInfo[i].rs_number;
 
-	if (mapRS2wA.count(rs)==0) {
-	  mindicator_snp[t][i]=0;
-	}
+        if (mapRS2wA.count(rs) == 0) {
+          mindicator_snp[t][i] = 0;
+        }
       }
     }
   }
diff --git a/src/param.h b/src/param.h
index f58da53..33e2431 100644
--- a/src/param.h
+++ b/src/param.h
@@ -19,340 +19,336 @@
 #ifndef __PARAM_H__
 #define __PARAM_H__
 
-#include <vector>
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 #include <map>
 #include <set>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
+#include <vector>
 
 using namespace std;
 
 class SNPINFO {
 public:
-	string chr;
-	string rs_number;
-	double cM;
-	long int base_position;
-	string a_minor;
-	string a_major;
-	size_t n_miss;
-	double missingness;
-	double maf;
-	size_t n_idv;         // Number of non-missing individuals.
-	size_t n_nb;          // Number of neighbours on the right hand side.
-	size_t file_position; // SNP location in file.
+  string chr;
+  string rs_number;
+  double cM;
+  long int base_position;
+  string a_minor;
+  string a_major;
+  size_t n_miss;
+  double missingness;
+  double maf;
+  size_t n_idv;         // Number of non-missing individuals.
+  size_t n_nb;          // Number of neighbours on the right hand side.
+  size_t file_position; // SNP location in file.
 };
 
 // Results for LMM.
 class SUMSTAT {
 public:
-	double beta;		// REML estimator for beta.
-	double se;		// SE for beta.
-	double lambda_remle;	// REML estimator for lambda.
-	double lambda_mle;	// MLE estimator for lambda.
-	double p_wald;		// p value from a Wald test.
-	double p_lrt;		// p value from a likelihood ratio test.
-	double p_score;		// p value from a score test.
+  double beta;         // REML estimator for beta.
+  double se;           // SE for beta.
+  double lambda_remle; // REML estimator for lambda.
+  double lambda_mle;   // MLE estimator for lambda.
+  double p_wald;       // p value from a Wald test.
+  double p_lrt;        // p value from a likelihood ratio test.
+  double p_score;      // p value from a score test.
 };
 
 // Results for mvLMM.
 class MPHSUMSTAT {
 public:
-	vector<double> v_beta;	// REML estimator for beta.
-	double p_wald;		// p value from a Wald test.
-	double p_lrt;		// p value from a likelihood ratio test.
-	double p_score;		// p value from a score test.
-	vector<double> v_Vg;	// Estimator for Vg, right half.
-	vector<double> v_Ve;	// Estimator for Ve, right half.
-	vector<double> v_Vbeta;	// Estimator for Vbeta, right half.
+  vector<double> v_beta;  // REML estimator for beta.
+  double p_wald;          // p value from a Wald test.
+  double p_lrt;           // p value from a likelihood ratio test.
+  double p_score;         // p value from a score test.
+  vector<double> v_Vg;    // Estimator for Vg, right half.
+  vector<double> v_Ve;    // Estimator for Ve, right half.
+  vector<double> v_Vbeta; // Estimator for Vbeta, right half.
 };
 
 // Hyper-parameters for BSLMM.
 class HYPBSLMM {
 public:
-	double h;
-	double pve;
-	double rho;
-	double pge;
-	double logp;
-	size_t n_gamma;
+  double h;
+  double pve;
+  double rho;
+  double pge;
+  double logp;
+  size_t n_gamma;
 };
 
 // Header class.
 class HEADER {
 public:
-    size_t rs_col;
-    size_t chr_col;
-    size_t pos_col;
-    size_t cm_col;
-    size_t a1_col;
-    size_t a0_col;
-    size_t z_col;
-    size_t beta_col;
-    size_t sebeta_col;
-    size_t chisq_col;
-    size_t p_col;
-    size_t n_col;
-    size_t nmis_col;
-    size_t nobs_col;
-    size_t ncase_col;
-    size_t ncontrol_col;
-    size_t af_col;
-    size_t var_col;
-    size_t ws_col;
-    size_t cor_col;
-    size_t coln; // Number of columns.
-    set<size_t> catc_col;
-    set<size_t> catd_col;
+  size_t rs_col;
+  size_t chr_col;
+  size_t pos_col;
+  size_t cm_col;
+  size_t a1_col;
+  size_t a0_col;
+  size_t z_col;
+  size_t beta_col;
+  size_t sebeta_col;
+  size_t chisq_col;
+  size_t p_col;
+  size_t n_col;
+  size_t nmis_col;
+  size_t nobs_col;
+  size_t ncase_col;
+  size_t ncontrol_col;
+  size_t af_col;
+  size_t var_col;
+  size_t ws_col;
+  size_t cor_col;
+  size_t coln; // Number of columns.
+  set<size_t> catc_col;
+  set<size_t> catd_col;
 };
 
 class PARAM {
 public:
-	// IO-related parameters.
-	bool mode_silence;
-	int a_mode;  // Analysis mode, 1/2/3/4 for Frequentist tests
-        int k_mode;  // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
-	vector<size_t> p_column; // Which phenotype column needs analysis.
-	size_t d_pace;	 	 // Display pace
-
-	string file_bfile, file_mbfile;
-	string file_geno, file_mgeno;
-	string file_pheno;
-	string file_anno; // Optional.
-	string file_gxe;  // Optional.
-	string file_cvt;  // Optional.
-	string file_cat, file_mcat;
-	string file_catc, file_mcatc;
-	string file_var;
-	string file_beta;
-	string file_cor;
-	string file_kin, file_mk;
-	string file_ku, file_kd;
-	string file_study, file_mstudy;
-	string file_ref, file_mref;
-	string file_weight, file_wsnp, file_wcat;
-	string file_out;
-	string file_bf, file_hyp;
-	string path_out;
-
-	string file_epm;  // Estimated parameter file.
-	string file_ebv;  // Estimated breeding value file.
-	string file_log;  // Log file containing mean estimate.
-	string file_read; // File containing total number of reads.
-	string file_gene; // Gene expression file.
-	string file_snps; // File containing analyzed SNPs or genes.
-
-        // WJA added.
-	string file_oxford;
-
-	// QC-related parameters.
-	double miss_level;
-	double maf_level;
-	double hwe_level;
-	double r2_level;
-
-	// LMM-related parameters.
-	double l_min;
-	double l_max;
-	size_t n_region;
-	double l_mle_null, l_remle_null;
-	double logl_mle_H0, logl_remle_H0;
-	double pve_null, pve_se_null, pve_total, se_pve_total;
-	double vg_remle_null, ve_remle_null, vg_mle_null, ve_mle_null;
-	vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
-        vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
-        vector<double> VVe_mle_null;
-        vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
-        vector<double> se_beta_mle_null;
-	double p_nr;
-	double em_prec, nr_prec;
-	size_t em_iter, nr_iter;
-	size_t crt;
-	double pheno_mean; // Phenotype mean from BSLMM fitting or prediction.
-
-	// For fitting multiple variance components.
-	// The first 3 are of size (n_vc), and the next 2 are of size n_vc+1.
-	bool noconstrain;
-	vector<double> v_traceG;
-	vector<double> v_pve;
-	vector<double> v_se_pve;
-
-	vector<double> v_sigma2;
-	vector<double> v_se_sigma2;
-	vector<double> v_enrich;
-	vector<double> v_se_enrich;
-	vector<double> v_beta;
-	vector<double> v_se_beta;
-
-	// BSLMM/MCMC-related parameters.
-	double h_min, h_max, h_scale;		// Priors for h.
-	double rho_min, rho_max, rho_scale;	// Priors for rho.
-	double logp_min, logp_max, logp_scale; 	// Priors for log(pi).
-	size_t h_ngrid, rho_ngrid;
-	size_t s_min, s_max;			// Min & max. number of gammas.
-	size_t w_step;				// # warm up/burn in iter.
-	size_t s_step;				// # sampling iterations.
-	size_t r_pace;				// Record pace.
-	size_t w_pace;				// Write pace.
-	size_t n_accept;			// Number of acceptance.
-	size_t n_mh;				// # MH steps in each iter.
-	double geo_mean;			// Mean of geometric dist.
-	long int randseed;
-	double trace_G;
-
-	HYPBSLMM cHyp_initial;
-
-	// VARCOV-related parameters.
-	double window_cm;
-	size_t window_bp;
-	size_t window_ns;
-
-	// vc-related parameters.
-	size_t n_block;
-
-	// Summary statistics.
-	bool error;
-
-        // Number of individuals.
-	size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
-
-        // Number of observed and missing phenotypes.
-	size_t np_obs, np_miss;
-
-        // Number of SNPs.
-	size_t ns_total, ns_test, ns_study, ns_ref;
-
-	size_t ng_total, ng_test;   // Number of genes.
-	size_t ni_control, ni_case; // Number of controls and number of cases.
-	size_t ni_subsample;        // Number of subsampled individuals.
-	size_t n_cvt;		    // Number of covariates.
-	size_t n_cat;		    // Number of continuous categories.
-	size_t n_ph;		    // Number of phenotypes.
-	size_t n_vc;		    // Number of variance components
-                                    // (including the diagonal matrix).
-	double time_total;	    // Record total time.
-	double time_G;	 	    // Time spent on reading files the
-                                    // second time and calculate K.
-	double time_eigen;	    // Time spent on eigen-decomposition.
-	double time_UtX;	    // Time spent on calculating UX and Uy.
-	double time_UtZ;	    // Time calculating UtZ for probit BSLMM.
-	double time_opt;	    // Time on optimization iterations/MCMC.
-	double time_Omega;   	    // Time spent on calculating Omega.
-	double time_hyp;	    // Time sampling hyperparameters in PMM.
-	double time_Proposal;       // Time spent on constructing the
-				    // proposal distribution (i.e. the
-				    // initial LMM or LM analysis).
-
-	// Data.
-        // Vector recording all phenotypes (NA replaced with -9).
-	vector<vector<double> > pheno;
-
-        // Vector recording all covariates (NA replaced with -9).
-	vector<vector<double> > cvt;
-
-        // Vector recording all covariates (NA replaced with -9).
-	vector<double> gxe;
-
-        // Vector recording weights for the individuals, which is
-        // useful for animal breeding studies.
-	vector<double> weight;
-
-        // Matrix recording when a phenotype is missing for an
-        // individual; 0 missing, 1 available.
-	vector<vector<int> > indicator_pheno;
-
-        // Indicator for individuals (phenotypes): 0 missing, 1
-        // available for analysis
-	vector<int> indicator_idv;
-
-        // Sequence indicator for SNPs: 0 ignored because of (a) maf,
-        // (b) miss, (c) non-poly; 1 available for analysis.
-	vector<int> indicator_snp;
-
-        // Sequence indicator for SNPs: 0 ignored because of (a) maf,
-        // (b) miss, (c) non-poly; 1 available for analysis.
-	vector< vector<int> >  mindicator_snp;
-
-        // Indicator for covariates: 0 missing, 1 available for
-        // analysis.
-	vector<int> indicator_cvt;
-
-        // Indicator for gxe: 0 missing, 1 available for analysis.
-	vector<int> indicator_gxe;
-
-        // Indicator for weight: 0 missing, 1 available for analysis.
-	vector<int> indicator_weight;
-
-        // Indicator for estimated breeding value file: 0 missing, 1
-        // available for analysis.
-	vector<int> indicator_bv;
-
-	// Indicator for read file: 0 missing, 1 available for analysis.
-	vector<int> indicator_read;
-	vector<double> vec_read;	// Total number of reads.
-	vector<double> vec_bv;   	// Breeding values.
-	vector<size_t> est_column;
-
-	map<string, int> mapID2num;	// Map small ID to number, 0 to n-1.
-	map<string, string> mapRS2chr; 	// Map rs# to chromosome location.
-	map<string, long int> mapRS2bp;	// Map rs# to base position.
-	map<string, double> mapRS2cM;	// Map rs# to cM.
-	map<string, double> mapRS2est;	// Map rs# to parameters.
-	map<string, size_t> mapRS2cat;  // Map rs# to category number.
-	map<string, vector<double> > mapRS2catc; // Map rs# to cont. cat's.
-	map<string, double> mapRS2wsnp;          // Map rs# to SNP weights.
-	map<string, vector<double> > mapRS2wcat; // Map rs# to SNP cat weights.
-
-	vector<SNPINFO> snpInfo;	 	 // Record SNP information.
-	vector< vector<SNPINFO> > msnpInfo;	 // Record SNP information.
-	set<string> setSnps;			 // Set of snps for analysis.
-
-	// Constructor.
-	PARAM();
-
-	// Functions.
-	void ReadFiles ();
-	void CheckParam ();
-	void CheckData ();
-	void PrintSummary ();
-	void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K,
-			    const bool calc_K);
-	void ReadGenotypes (vector<vector<unsigned char> > &Xt,
-			    gsl_matrix *K, const bool calc_K);
-	void CheckCvt ();
-	void CopyCvt (gsl_matrix *W);
-	void CopyA (size_t flag, gsl_matrix *A);
-	void CopyGxe (gsl_vector *gxe);
-	void CopyWeight (gsl_vector *w);
-	void ProcessCvtPhen();
-	void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag);
-	void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag);
-	void CalcKin (gsl_matrix *matrix_kin);
-	void CalcS (const map<string, double> &mapRS2wA,
-		    const map<string, double> &mapRS2wK,
-		    const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K,
-		    gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns);
-	void WriteVector (const gsl_vector *q, const gsl_vector *s,
-			  const size_t n_total, const string suffix);
-	void WriteVar (const string suffix);
-	void WriteMatrix (const gsl_matrix *matrix_U, const string suffix);
-	void WriteVector (const gsl_vector *vector_D, const string suffix);
-	void CopyRead (gsl_vector *log_N);
-	void ObtainWeight (const set<string> &setSnps_beta, map<string,
-			   double> &mapRS2wK);
-	void UpdateWeight (const size_t pve_flag,
-			   const map<string,double> &mapRS2wK,
-			   const size_t ni_test, const gsl_vector *ns,
-			   map<string, double> &mapRS2wA);
-	void UpdateSNPnZ (const map<string, double> &mapRS2wA,
-			  const map<string, string> &mapRS2A1,
-			  const map<string, double> &mapRS2z,
-			  gsl_vector *w, gsl_vector *z,
-			  vector<size_t> &vec_cat);
-	void UpdateSNP (const map<string, double> &mapRS2wA);
+  // IO-related parameters.
+  bool mode_silence;
+  int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
+  int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
+  vector<size_t> p_column; // Which phenotype column needs analysis.
+  size_t d_pace;           // Display pace
+
+  string file_bfile, file_mbfile;
+  string file_geno, file_mgeno;
+  string file_pheno;
+  string file_anno; // Optional.
+  string file_gxe;  // Optional.
+  string file_cvt;  // Optional.
+  string file_cat, file_mcat;
+  string file_catc, file_mcatc;
+  string file_var;
+  string file_beta;
+  string file_cor;
+  string file_kin, file_mk;
+  string file_ku, file_kd;
+  string file_study, file_mstudy;
+  string file_ref, file_mref;
+  string file_weight, file_wsnp, file_wcat;
+  string file_out;
+  string file_bf, file_hyp;
+  string path_out;
+
+  string file_epm;  // Estimated parameter file.
+  string file_ebv;  // Estimated breeding value file.
+  string file_log;  // Log file containing mean estimate.
+  string file_read; // File containing total number of reads.
+  string file_gene; // Gene expression file.
+  string file_snps; // File containing analyzed SNPs or genes.
+
+  // WJA added.
+  string file_oxford;
+
+  // QC-related parameters.
+  double miss_level;
+  double maf_level;
+  double hwe_level;
+  double r2_level;
+
+  // LMM-related parameters.
+  double l_min;
+  double l_max;
+  size_t n_region;
+  double l_mle_null, l_remle_null;
+  double logl_mle_H0, logl_remle_H0;
+  double pve_null, pve_se_null, pve_total, se_pve_total;
+  double vg_remle_null, ve_remle_null, vg_mle_null, ve_mle_null;
+  vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
+  vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
+  vector<double> VVe_mle_null;
+  vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
+  vector<double> se_beta_mle_null;
+  double p_nr;
+  double em_prec, nr_prec;
+  size_t em_iter, nr_iter;
+  size_t crt;
+  double pheno_mean; // Phenotype mean from BSLMM fitting or prediction.
+
+  // For fitting multiple variance components.
+  // The first 3 are of size (n_vc), and the next 2 are of size n_vc+1.
+  bool noconstrain;
+  vector<double> v_traceG;
+  vector<double> v_pve;
+  vector<double> v_se_pve;
+
+  vector<double> v_sigma2;
+  vector<double> v_se_sigma2;
+  vector<double> v_enrich;
+  vector<double> v_se_enrich;
+  vector<double> v_beta;
+  vector<double> v_se_beta;
+
+  // BSLMM/MCMC-related parameters.
+  double h_min, h_max, h_scale;          // Priors for h.
+  double rho_min, rho_max, rho_scale;    // Priors for rho.
+  double logp_min, logp_max, logp_scale; // Priors for log(pi).
+  size_t h_ngrid, rho_ngrid;
+  size_t s_min, s_max; // Min & max. number of gammas.
+  size_t w_step;       // # warm up/burn in iter.
+  size_t s_step;       // # sampling iterations.
+  size_t r_pace;       // Record pace.
+  size_t w_pace;       // Write pace.
+  size_t n_accept;     // Number of acceptance.
+  size_t n_mh;         // # MH steps in each iter.
+  double geo_mean;     // Mean of geometric dist.
+  long int randseed;
+  double trace_G;
+
+  HYPBSLMM cHyp_initial;
+
+  // VARCOV-related parameters.
+  double window_cm;
+  size_t window_bp;
+  size_t window_ns;
+
+  // vc-related parameters.
+  size_t n_block;
+
+  // Summary statistics.
+  bool error;
+
+  // Number of individuals.
+  size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
+
+  // Number of observed and missing phenotypes.
+  size_t np_obs, np_miss;
+
+  // Number of SNPs.
+  size_t ns_total, ns_test, ns_study, ns_ref;
+
+  size_t ng_total, ng_test;   // Number of genes.
+  size_t ni_control, ni_case; // Number of controls and number of cases.
+  size_t ni_subsample;        // Number of subsampled individuals.
+  size_t n_cvt;               // Number of covariates.
+  size_t n_cat;               // Number of continuous categories.
+  size_t n_ph;                // Number of phenotypes.
+  size_t n_vc;                // Number of variance components
+                              // (including the diagonal matrix).
+  double time_total;          // Record total time.
+  double time_G;              // Time spent on reading files the
+                              // second time and calculate K.
+  double time_eigen;          // Time spent on eigen-decomposition.
+  double time_UtX;            // Time spent on calculating UX and Uy.
+  double time_UtZ;            // Time calculating UtZ for probit BSLMM.
+  double time_opt;            // Time on optimization iterations/MCMC.
+  double time_Omega;          // Time spent on calculating Omega.
+  double time_hyp;            // Time sampling hyperparameters in PMM.
+  double time_Proposal;       // Time spent on constructing the
+                              // proposal distribution (i.e. the
+                              // initial LMM or LM analysis).
+
+  // Data.
+  // Vector recording all phenotypes (NA replaced with -9).
+  vector<vector<double>> pheno;
+
+  // Vector recording all covariates (NA replaced with -9).
+  vector<vector<double>> cvt;
+
+  // Vector recording all covariates (NA replaced with -9).
+  vector<double> gxe;
+
+  // Vector recording weights for the individuals, which is
+  // useful for animal breeding studies.
+  vector<double> weight;
+
+  // Matrix recording when a phenotype is missing for an
+  // individual; 0 missing, 1 available.
+  vector<vector<int>> indicator_pheno;
+
+  // Indicator for individuals (phenotypes): 0 missing, 1
+  // available for analysis
+  vector<int> indicator_idv;
+
+  // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+  // (b) miss, (c) non-poly; 1 available for analysis.
+  vector<int> indicator_snp;
+
+  // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+  // (b) miss, (c) non-poly; 1 available for analysis.
+  vector<vector<int>> mindicator_snp;
+
+  // Indicator for covariates: 0 missing, 1 available for
+  // analysis.
+  vector<int> indicator_cvt;
+
+  // Indicator for gxe: 0 missing, 1 available for analysis.
+  vector<int> indicator_gxe;
+
+  // Indicator for weight: 0 missing, 1 available for analysis.
+  vector<int> indicator_weight;
+
+  // Indicator for estimated breeding value file: 0 missing, 1
+  // available for analysis.
+  vector<int> indicator_bv;
+
+  // Indicator for read file: 0 missing, 1 available for analysis.
+  vector<int> indicator_read;
+  vector<double> vec_read; // Total number of reads.
+  vector<double> vec_bv;   // Breeding values.
+  vector<size_t> est_column;
+
+  map<string, int> mapID2num;             // Map small ID to number, 0 to n-1.
+  map<string, string> mapRS2chr;          // Map rs# to chromosome location.
+  map<string, long int> mapRS2bp;         // Map rs# to base position.
+  map<string, double> mapRS2cM;           // Map rs# to cM.
+  map<string, double> mapRS2est;          // Map rs# to parameters.
+  map<string, size_t> mapRS2cat;          // Map rs# to category number.
+  map<string, vector<double>> mapRS2catc; // Map rs# to cont. cat's.
+  map<string, double> mapRS2wsnp;         // Map rs# to SNP weights.
+  map<string, vector<double>> mapRS2wcat; // Map rs# to SNP cat weights.
+
+  vector<SNPINFO> snpInfo;          // Record SNP information.
+  vector<vector<SNPINFO>> msnpInfo; // Record SNP information.
+  set<string> setSnps;              // Set of snps for analysis.
+
+  // Constructor.
+  PARAM();
+
+  // Functions.
+  void ReadFiles();
+  void CheckParam();
+  void CheckData();
+  void PrintSummary();
+  void ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
+  void ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
+                     const bool calc_K);
+  void CheckCvt();
+  void CopyCvt(gsl_matrix *W);
+  void CopyA(size_t flag, gsl_matrix *A);
+  void CopyGxe(gsl_vector *gxe);
+  void CopyWeight(gsl_vector *w);
+  void ProcessCvtPhen();
+  void CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag);
+  void CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag);
+  void CalcKin(gsl_matrix *matrix_kin);
+  void CalcS(const map<string, double> &mapRS2wA,
+             const map<string, double> &mapRS2wK, const gsl_matrix *W,
+             gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar,
+             gsl_vector *ns);
+  void WriteVector(const gsl_vector *q, const gsl_vector *s,
+                   const size_t n_total, const string suffix);
+  void WriteVar(const string suffix);
+  void WriteMatrix(const gsl_matrix *matrix_U, const string suffix);
+  void WriteVector(const gsl_vector *vector_D, const string suffix);
+  void CopyRead(gsl_vector *log_N);
+  void ObtainWeight(const set<string> &setSnps_beta,
+                    map<string, double> &mapRS2wK);
+  void UpdateWeight(const size_t pve_flag, const map<string, double> &mapRS2wK,
+                    const size_t ni_test, const gsl_vector *ns,
+                    map<string, double> &mapRS2wA);
+  void UpdateSNPnZ(const map<string, double> &mapRS2wA,
+                   const map<string, string> &mapRS2A1,
+                   const map<string, double> &mapRS2z, gsl_vector *w,
+                   gsl_vector *z, vector<size_t> &vec_cat);
+  void UpdateSNP(const map<string, double> &mapRS2wA);
 };
 
-size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt);
+size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt);
 
 #endif
-
diff --git a/src/prdt.cpp b/src/prdt.cpp
index b29d150..3e7c004 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -16,527 +16,537 @@
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
-#include <sstream>
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
+#include <bitset>
+#include <cmath>
 #include <fstream>
-#include <string>
 #include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
+#include <sstream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <cmath>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
-#include "gsl/gsl_blas.h"
+#include <string>
+#include <vector>
 
-#include "io.h"
-#include "lapack.h"
 #include "gzstream.h"
 #include "io.h"
-#include "prdt.h"
+#include "io.h"
+#include "lapack.h"
 #include "mathfunc.h"
+#include "prdt.h"
 
 using namespace std;
 
-void PRDT::CopyFromParam (PARAM &cPar) {
-	a_mode=cPar.a_mode;
-	d_pace=cPar.d_pace;
+void PRDT::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
+  d_pace = cPar.d_pace;
 
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
 
-	indicator_pheno=cPar.indicator_pheno;
-	indicator_cvt=cPar.indicator_cvt;
-	indicator_idv=cPar.indicator_idv;
+  indicator_pheno = cPar.indicator_pheno;
+  indicator_cvt = cPar.indicator_cvt;
+  indicator_idv = cPar.indicator_idv;
 
-	snpInfo=cPar.snpInfo;
-	mapRS2est=cPar.mapRS2est;
+  snpInfo = cPar.snpInfo;
+  mapRS2est = cPar.mapRS2est;
 
-	time_eigen=0;
+  time_eigen = 0;
 
-	n_ph=cPar.n_ph;
-	np_obs=cPar.np_obs;
-	np_miss=cPar.np_miss;
-	ns_total=cPar.ns_total;
-	ns_test=0;
+  n_ph = cPar.n_ph;
+  np_obs = cPar.np_obs;
+  np_miss = cPar.np_miss;
+  ns_total = cPar.ns_total;
+  ns_test = 0;
 
-	return;
+  return;
 }
 
-void PRDT::CopyToParam (PARAM &cPar) {
-	cPar.ns_test=ns_test;
-	cPar.time_eigen=time_eigen;
+void PRDT::CopyToParam(PARAM &cPar) {
+  cPar.ns_test = ns_test;
+  cPar.time_eigen = time_eigen;
 
-	return;
+  return;
 }
 
-void PRDT::WriteFiles (gsl_vector *y_prdt) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".";
-	file_str+="prdt";
-	file_str+=".txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	size_t ci_test=0;
-	for (size_t i=0; i<indicator_idv.size(); i++) {
-		if (indicator_idv[i]==1) {
-			outfile<<"NA"<<endl;
-		} else {
-			outfile<<gsl_vector_get (y_prdt, ci_test)<<endl;
-			ci_test++;
-		}
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+void PRDT::WriteFiles(gsl_vector *y_prdt) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".";
+  file_str += "prdt";
+  file_str += ".txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  size_t ci_test = 0;
+  for (size_t i = 0; i < indicator_idv.size(); i++) {
+    if (indicator_idv[i] == 1) {
+      outfile << "NA" << endl;
+    } else {
+      outfile << gsl_vector_get(y_prdt, ci_test) << endl;
+      ci_test++;
+    }
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
-void PRDT::WriteFiles (gsl_matrix *Y_full)  {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".prdt.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	size_t ci_test=0;
-	for (size_t i=0; i<indicator_cvt.size(); i++) {
-		if (indicator_cvt[i]==0) {
-			outfile<<"NA"<<endl;
-		} else {
-			for (size_t j=0; j<Y_full->size2; j++) {
-				outfile << gsl_matrix_get(Y_full,ci_test,j) <<
-				  "\t";
-			}
-			outfile<<endl;
-			ci_test++;
-		}
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+void PRDT::WriteFiles(gsl_matrix *Y_full) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".prdt.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  size_t ci_test = 0;
+  for (size_t i = 0; i < indicator_cvt.size(); i++) {
+    if (indicator_cvt[i] == 0) {
+      outfile << "NA" << endl;
+    } else {
+      for (size_t j = 0; j < Y_full->size2; j++) {
+        outfile << gsl_matrix_get(Y_full, ci_test, j) << "\t";
+      }
+      outfile << endl;
+      ci_test++;
+    }
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
-void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
-	size_t ni_test=u_hat->size, ni_total=G->size1;
-
-	gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test);
-	gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test);
-	gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test);
-	gsl_vector *eval=gsl_vector_alloc (ni_test);
-	gsl_vector *Utu=gsl_vector_alloc (ni_test);
-	gsl_vector *w=gsl_vector_alloc (ni_total);
-	gsl_permutation *pmt=gsl_permutation_alloc (ni_test);
-
-	//center matrix G based on indicator_idv
-	for (size_t i=0; i<ni_total; i++) {
-		gsl_vector_set(w, i, indicator_idv[i]);
-	}
-	CenterMatrix(G, w);
-
-	//obtain Koo and Kfo
-	size_t o_i=0, o_j=0;
-	double d;
-	for (size_t i=0; i<indicator_idv.size(); i++) {
-		o_j=0;
-		for (size_t j=0; j<indicator_idv.size(); j++) {
-			d=gsl_matrix_get(G, i, j);
-			if (indicator_idv[i]==1 && indicator_idv[j]==1) {
-				gsl_matrix_set(Goo, o_i, o_j, d);
-			}
-			if (indicator_idv[i]==0 && indicator_idv[j]==1) {
-				gsl_matrix_set(Gfo, i-o_i, o_j, d);
-			}
-			if (indicator_idv[j]==1) {o_j++;}
-		}
-		if (indicator_idv[i]==1) {o_i++;}
-	}
-
-	//matrix operations to get u_prdt
-	cout<<"Start Eigen-Decomposition..."<<endl;
-	clock_t time_start=clock();
-	EigenDecomp (Goo, U, eval, 0);
-	for (size_t i=0; i<eval->size; i++) {
-		if (gsl_vector_get(eval,i)<1e-10) {
-		  gsl_vector_set(eval, i, 0);
-		}
-	}
-
-	time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu);
-	for (size_t i=0; i<eval->size; i++) {
-		d=gsl_vector_get(eval, i);
-		if (d!=0) {
-		  d=gsl_vector_get(Utu, i)/d;
-		  gsl_vector_set(Utu, i, d);
-		}
-	}
-	gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
-
-	// Free matrices.
-	gsl_matrix_free(Goo);
-	gsl_matrix_free(Gfo);
-	gsl_matrix_free(U);
-	gsl_vector_free(eval);
-	gsl_vector_free(Utu);
-	gsl_vector_free(w);
-	gsl_permutation_free(pmt);
-
-	return;
+void PRDT::AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
+  size_t ni_test = u_hat->size, ni_total = G->size1;
+
+  gsl_matrix *Goo = gsl_matrix_alloc(ni_test, ni_test);
+  gsl_matrix *Gfo = gsl_matrix_alloc(ni_total - ni_test, ni_test);
+  gsl_matrix *U = gsl_matrix_alloc(ni_test, ni_test);
+  gsl_vector *eval = gsl_vector_alloc(ni_test);
+  gsl_vector *Utu = gsl_vector_alloc(ni_test);
+  gsl_vector *w = gsl_vector_alloc(ni_total);
+  gsl_permutation *pmt = gsl_permutation_alloc(ni_test);
+
+  // center matrix G based on indicator_idv
+  for (size_t i = 0; i < ni_total; i++) {
+    gsl_vector_set(w, i, indicator_idv[i]);
+  }
+  CenterMatrix(G, w);
+
+  // obtain Koo and Kfo
+  size_t o_i = 0, o_j = 0;
+  double d;
+  for (size_t i = 0; i < indicator_idv.size(); i++) {
+    o_j = 0;
+    for (size_t j = 0; j < indicator_idv.size(); j++) {
+      d = gsl_matrix_get(G, i, j);
+      if (indicator_idv[i] == 1 && indicator_idv[j] == 1) {
+        gsl_matrix_set(Goo, o_i, o_j, d);
+      }
+      if (indicator_idv[i] == 0 && indicator_idv[j] == 1) {
+        gsl_matrix_set(Gfo, i - o_i, o_j, d);
+      }
+      if (indicator_idv[j] == 1) {
+        o_j++;
+      }
+    }
+    if (indicator_idv[i] == 1) {
+      o_i++;
+    }
+  }
+
+  // matrix operations to get u_prdt
+  cout << "Start Eigen-Decomposition..." << endl;
+  clock_t time_start = clock();
+  EigenDecomp(Goo, U, eval, 0);
+  for (size_t i = 0; i < eval->size; i++) {
+    if (gsl_vector_get(eval, i) < 1e-10) {
+      gsl_vector_set(eval, i, 0);
+    }
+  }
+
+  time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+  gsl_blas_dgemv(CblasTrans, 1.0, U, u_hat, 0.0, Utu);
+  for (size_t i = 0; i < eval->size; i++) {
+    d = gsl_vector_get(eval, i);
+    if (d != 0) {
+      d = gsl_vector_get(Utu, i) / d;
+      gsl_vector_set(Utu, i, d);
+    }
+  }
+  gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, eval);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
+
+  // Free matrices.
+  gsl_matrix_free(Goo);
+  gsl_matrix_free(Gfo);
+  gsl_matrix_free(U);
+  gsl_vector_free(eval);
+  gsl_vector_free(Utu);
+  gsl_vector_free(w);
+  gsl_permutation_free(pmt);
+
+  return;
 }
 
-void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return;
-	}
-
-	string line;
-	char *ch_ptr;
-	string rs;
-
-	size_t n_miss, n_train_nomiss, c_phen;
-	double geno, x_mean, x_train_mean, effect_size;
-
-	gsl_vector *x=gsl_vector_alloc (y_prdt->size);
-	gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size);
-
-	ns_test=0;
-
-	// Start reading genotypes and analyze.
-	for (size_t t=0; t<ns_total; ++t) {
-		!safeGetline(infile, line).eof();
-		if (t%d_pace==0 || t==(ns_total-1)) {
-		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
-		}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		rs=ch_ptr;
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		if (mapRS2est.count(rs)==0) {
-		  continue;
-		} else {
-		  effect_size=mapRS2est[rs];
-		}
-
-		x_mean=0.0;
-		c_phen=0;
-		n_miss=0;
-		x_train_mean=0;
-		n_train_nomiss=0;
-
-		gsl_vector_set_zero(x_miss);
-
-		for (size_t i=0; i<indicator_idv.size(); ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==1) {
-				if (strcmp(ch_ptr, "NA")!=0) {
-					geno=atof(ch_ptr);
-					x_train_mean+=geno;
-					n_train_nomiss++;
-				}
-			} else {
-				if (strcmp(ch_ptr, "NA")==0) {
-					gsl_vector_set(x_miss, c_phen, 0.0);
-					n_miss++;
-				} else {
-					geno=atof(ch_ptr);
-
-					gsl_vector_set(x, c_phen, geno);
-					gsl_vector_set(x_miss, c_phen, 1.0);
-					x_mean+=geno;
-				}
-				c_phen++;
-			}
-		}
-
-		if (x->size==n_miss) {
-		  cout << "snp " << rs << " has missing genotype for all " <<
-		    "individuals and will be ignored." << endl;
-		  continue;}
-
-
-		x_mean/=(double)(x->size-n_miss);
-		x_train_mean/=(double)(n_train_nomiss);
-
-
-		for (size_t i=0; i<x->size; ++i) {
-			geno=gsl_vector_get(x, i);
-			if (gsl_vector_get (x_miss, i)==0) {
-				gsl_vector_set(x, i, x_mean-x_train_mean);
-			} else {
-				gsl_vector_set(x, i, geno-x_train_mean);
-			}
-		}
-
-		gsl_vector_scale (x, effect_size);
-		gsl_vector_add (y_prdt, x);
-
-		ns_test++;
-	}
-	cout<<endl;
-
-	gsl_vector_free (x);
-	gsl_vector_free (x_miss);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  string line;
+  char *ch_ptr;
+  string rs;
+
+  size_t n_miss, n_train_nomiss, c_phen;
+  double geno, x_mean, x_train_mean, effect_size;
+
+  gsl_vector *x = gsl_vector_alloc(y_prdt->size);
+  gsl_vector *x_miss = gsl_vector_alloc(y_prdt->size);
+
+  ns_test = 0;
+
+  // Start reading genotypes and analyze.
+  for (size_t t = 0; t < ns_total; ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    if (mapRS2est.count(rs) == 0) {
+      continue;
+    } else {
+      effect_size = mapRS2est[rs];
+    }
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    x_train_mean = 0;
+    n_train_nomiss = 0;
+
+    gsl_vector_set_zero(x_miss);
+
+    for (size_t i = 0; i < indicator_idv.size(); ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 1) {
+        if (strcmp(ch_ptr, "NA") != 0) {
+          geno = atof(ch_ptr);
+          x_train_mean += geno;
+          n_train_nomiss++;
+        }
+      } else {
+        if (strcmp(ch_ptr, "NA") == 0) {
+          gsl_vector_set(x_miss, c_phen, 0.0);
+          n_miss++;
+        } else {
+          geno = atof(ch_ptr);
+
+          gsl_vector_set(x, c_phen, geno);
+          gsl_vector_set(x_miss, c_phen, 1.0);
+          x_mean += geno;
+        }
+        c_phen++;
+      }
+    }
+
+    if (x->size == n_miss) {
+      cout << "snp " << rs << " has missing genotype for all "
+           << "individuals and will be ignored." << endl;
+      continue;
+    }
+
+    x_mean /= (double)(x->size - n_miss);
+    x_train_mean /= (double)(n_train_nomiss);
+
+    for (size_t i = 0; i < x->size; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean - x_train_mean);
+      } else {
+        gsl_vector_set(x, i, geno - x_train_mean);
+      }
+    }
+
+    gsl_vector_scale(x, effect_size);
+    gsl_vector_add(y_prdt, x);
+
+    ns_test++;
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return;
-	}
-
-	char ch[1];
-	bitset<8> b;
-	string rs;
-
-	size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
-	double geno, x_mean, x_train_mean, effect_size;
-
-	gsl_vector *x=gsl_vector_alloc (y_prdt->size);
-
-	// Calculate n_bit and c, the number of bit for each SNP.
-	if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;}
-	else {n_bit=indicator_idv.size()/4+1; }
-
-	// Print the first 3 magic numbers.
-	for (size_t i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	ns_test=0;
-
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-		if (t%d_pace==0 || t==snpInfo.size()-1) {
-		  ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-		}
-
-		rs=snpInfo[t].rs_number;
-
-		if (mapRS2est.count(rs)==0) {
-		  continue;
-		} else {
-		  effect_size=mapRS2est[rs];
-		}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		x_mean=0.0;
-		n_miss=0;
-		ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0;
-		for (size_t i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {
-				if ((i==(n_bit-1)) &&
-				    ci_total==indicator_idv.size()) {
-				  break;
-				}
-				if (indicator_idv[ci_total]==1) {
-					if (b[2*j]==0) {
-						if (b[2*j+1]==0) {
-						  x_train_mean+=2.0;
-						  n_train_nomiss++;
-						}
-						else {
-						  x_train_mean+=1.0;
-						  n_train_nomiss++;
-						}
-					}
-					else {
-						if (b[2*j+1]==1) {
-						  n_train_nomiss++;
-						}
-						else {}
-					}
-				} else {
-					if (b[2*j]==0) {
-						if (b[2*j+1]==0) {
-						  gsl_vector_set(x,ci_test,2);
-						  x_mean+=2.0;
-						}
-						else {
-						  gsl_vector_set(x,ci_test,1);
-						  x_mean+=1.0;
-						}
-					}
-					else {
-						if (b[2*j+1]==1) {
-						  gsl_vector_set(x,ci_test,0);
-						}
-						else {
-						  gsl_vector_set(x,ci_test,-9);
-						  n_miss++;
-						}
-					}
-					ci_test++;
-				}
-				ci_total++;
-
-			}
-		}
-
-		if (x->size==n_miss) {
-		  cout << "snp " << rs << " has missing genotype for all " <<
-		    "individuals and will be ignored."<<endl;
-		  continue;
-		}
-
-		x_mean/=(double)(x->size-n_miss);
-		x_train_mean/=(double)(n_train_nomiss);
-
-		for (size_t i=0; i<x->size; ++i) {
-			geno=gsl_vector_get(x, i);
-			if (geno==-9) {
-				gsl_vector_set(x, i, x_mean-x_train_mean);
-			} else {
-				gsl_vector_set(x, i, geno-x_train_mean);
-			}
-		}
-
-		gsl_vector_scale (x, effect_size);
-		gsl_vector_add (y_prdt, x);
-
-		ns_test++;
-	}
-	cout<<endl;
-
-	gsl_vector_free (x);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void PRDT::AnalyzePlink(gsl_vector *y_prdt) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  char ch[1];
+  bitset<8> b;
+  string rs;
+
+  size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
+  double geno, x_mean, x_train_mean, effect_size;
+
+  gsl_vector *x = gsl_vector_alloc(y_prdt->size);
+
+  // Calculate n_bit and c, the number of bit for each SNP.
+  if (indicator_idv.size() % 4 == 0) {
+    n_bit = indicator_idv.size() / 4;
+  } else {
+    n_bit = indicator_idv.size() / 4 + 1;
+  }
+
+  // Print the first 3 magic numbers.
+  for (size_t i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  ns_test = 0;
+
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+
+    rs = snpInfo[t].rs_number;
+
+    if (mapRS2est.count(rs) == 0) {
+      continue;
+    } else {
+      effect_size = mapRS2est[rs];
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    x_train_mean = 0;
+    n_train_nomiss = 0;
+    for (size_t i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == indicator_idv.size()) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 1) {
+          if (b[2 * j] == 0) {
+            if (b[2 * j + 1] == 0) {
+              x_train_mean += 2.0;
+              n_train_nomiss++;
+            } else {
+              x_train_mean += 1.0;
+              n_train_nomiss++;
+            }
+          } else {
+            if (b[2 * j + 1] == 1) {
+              n_train_nomiss++;
+            } else {
+            }
+          }
+        } else {
+          if (b[2 * j] == 0) {
+            if (b[2 * j + 1] == 0) {
+              gsl_vector_set(x, ci_test, 2);
+              x_mean += 2.0;
+            } else {
+              gsl_vector_set(x, ci_test, 1);
+              x_mean += 1.0;
+            }
+          } else {
+            if (b[2 * j + 1] == 1) {
+              gsl_vector_set(x, ci_test, 0);
+            } else {
+              gsl_vector_set(x, ci_test, -9);
+              n_miss++;
+            }
+          }
+          ci_test++;
+        }
+        ci_total++;
+      }
+    }
+
+    if (x->size == n_miss) {
+      cout << "snp " << rs << " has missing genotype for all "
+           << "individuals and will be ignored." << endl;
+      continue;
+    }
+
+    x_mean /= (double)(x->size - n_miss);
+    x_train_mean /= (double)(n_train_nomiss);
+
+    for (size_t i = 0; i < x->size; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean - x_train_mean);
+      } else {
+        gsl_vector_set(x, i, geno - x_train_mean);
+      }
+    }
+
+    gsl_vector_scale(x, effect_size);
+    gsl_vector_add(y_prdt, x);
+
+    ns_test++;
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
 // Predict missing phenotypes using ridge regression.
 // Y_hat contains fixed effects
-void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
-		       gsl_matrix *Y_full) {
-	gsl_vector *y_obs=gsl_vector_alloc (np_obs);
-	gsl_vector *y_miss=gsl_vector_alloc (np_miss);
-	gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs);
-	gsl_matrix *H_mo=gsl_matrix_alloc (np_miss, np_obs);
-	gsl_vector *Hiy=gsl_vector_alloc (np_obs);
-
-	size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0;
-
-	// Obtain H_oo, H_mo.
-	c_obs1=0; c_miss1=0;
-	for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) {
-		if (indicator_cvt[i1]==0) {continue;}
-		for (vector<int>::size_type j1=0; j1<n_ph; ++j1) {
-
-			c_obs2=0; c_miss2=0;
-			for (vector<int>::size_type i2=0;
-			     i2<indicator_pheno.size(); ++i2) {
-				if (indicator_cvt[i2]==0) {continue;}
-				for (vector<int>::size_type j2=0;
-				     j2<n_ph; j2++) {
-
-					if (indicator_pheno[i2][j2]==1) {
-					      if (indicator_pheno[i1][j1]==1) {
-						gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
-						} else {
-							gsl_matrix_set (H_mo, c_miss1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
-						}
-						c_obs2++;
-					} else {
-						c_miss2++;
-					}
-				}
-			}
-
-			if (indicator_pheno[i1][j1]==1) {
-				c_obs1++;
-			} else {
-				c_miss1++;
-			}
-		}
-
-	}
-
-	// Do LU decomposition of H_oo.
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (np_obs);
-	LUDecomp (H_oo, pmt, &sig);
-
-		// Obtain y_obs=y_full-y_hat.
-		// Add the fixed effects part to y_miss: y_miss=y_hat.
-		c_obs1=0; c_miss1=0;
-		for (vector<int>::size_type i=0;
-		     i<indicator_pheno.size(); ++i) {
-			if (indicator_cvt[i]==0) {continue;}
-
-			for (vector<int>::size_type j=0; j<n_ph; ++j) {
-				if (indicator_pheno[i][j]==1) {
-					gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j)-gsl_matrix_get (Y_hat, i, j) );
-					c_obs1++;
-				} else {
-					gsl_vector_set (y_miss, c_miss1, gsl_matrix_get (Y_hat, i, j) );
-					c_miss1++;
-				}
-			}
-		}
-
-		LUSolve (H_oo, pmt, y_obs, Hiy);
-
-		gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
-
-		// Put back predicted y_miss to Y_full.
-		c_miss1=0;
-		for (vector<int>::size_type i=0;
-		     i<indicator_pheno.size(); ++i) {
-			if (indicator_cvt[i]==0) {continue;}
-
-			for (vector<int>::size_type j=0; j<n_ph; ++j) {
-				if (indicator_pheno[i][j]==0) {
-					gsl_matrix_set (Y_full, i, j, gsl_vector_get (y_miss, c_miss1) );
-					c_miss1++;
-				}
-			}
-		}
-
-	// Free matrices.
-	gsl_vector_free(y_obs);
-	gsl_vector_free(y_miss);
-	gsl_matrix_free(H_oo);
-	gsl_matrix_free(H_mo);
-	gsl_vector_free(Hiy);
-
-	return;
+void PRDT::MvnormPrdt(const gsl_matrix *Y_hat, const gsl_matrix *H,
+                      gsl_matrix *Y_full) {
+  gsl_vector *y_obs = gsl_vector_alloc(np_obs);
+  gsl_vector *y_miss = gsl_vector_alloc(np_miss);
+  gsl_matrix *H_oo = gsl_matrix_alloc(np_obs, np_obs);
+  gsl_matrix *H_mo = gsl_matrix_alloc(np_miss, np_obs);
+  gsl_vector *Hiy = gsl_vector_alloc(np_obs);
+
+  size_t c_obs1 = 0, c_obs2 = 0, c_miss1 = 0, c_miss2 = 0;
+
+  // Obtain H_oo, H_mo.
+  c_obs1 = 0;
+  c_miss1 = 0;
+  for (vector<int>::size_type i1 = 0; i1 < indicator_pheno.size(); ++i1) {
+    if (indicator_cvt[i1] == 0) {
+      continue;
+    }
+    for (vector<int>::size_type j1 = 0; j1 < n_ph; ++j1) {
+
+      c_obs2 = 0;
+      c_miss2 = 0;
+      for (vector<int>::size_type i2 = 0; i2 < indicator_pheno.size(); ++i2) {
+        if (indicator_cvt[i2] == 0) {
+          continue;
+        }
+        for (vector<int>::size_type j2 = 0; j2 < n_ph; j2++) {
+
+          if (indicator_pheno[i2][j2] == 1) {
+            if (indicator_pheno[i1][j1] == 1) {
+              gsl_matrix_set(
+                  H_oo, c_obs1, c_obs2,
+                  gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2));
+            } else {
+              gsl_matrix_set(
+                  H_mo, c_miss1, c_obs2,
+                  gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2));
+            }
+            c_obs2++;
+          } else {
+            c_miss2++;
+          }
+        }
+      }
+
+      if (indicator_pheno[i1][j1] == 1) {
+        c_obs1++;
+      } else {
+        c_miss1++;
+      }
+    }
+  }
+
+  // Do LU decomposition of H_oo.
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(np_obs);
+  LUDecomp(H_oo, pmt, &sig);
+
+  // Obtain y_obs=y_full-y_hat.
+  // Add the fixed effects part to y_miss: y_miss=y_hat.
+  c_obs1 = 0;
+  c_miss1 = 0;
+  for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) {
+    if (indicator_cvt[i] == 0) {
+      continue;
+    }
+
+    for (vector<int>::size_type j = 0; j < n_ph; ++j) {
+      if (indicator_pheno[i][j] == 1) {
+        gsl_vector_set(y_obs, c_obs1, gsl_matrix_get(Y_full, i, j) -
+                                          gsl_matrix_get(Y_hat, i, j));
+        c_obs1++;
+      } else {
+        gsl_vector_set(y_miss, c_miss1, gsl_matrix_get(Y_hat, i, j));
+        c_miss1++;
+      }
+    }
+  }
+
+  LUSolve(H_oo, pmt, y_obs, Hiy);
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
+
+  // Put back predicted y_miss to Y_full.
+  c_miss1 = 0;
+  for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) {
+    if (indicator_cvt[i] == 0) {
+      continue;
+    }
+
+    for (vector<int>::size_type j = 0; j < n_ph; ++j) {
+      if (indicator_pheno[i][j] == 0) {
+        gsl_matrix_set(Y_full, i, j, gsl_vector_get(y_miss, c_miss1));
+        c_miss1++;
+      }
+    }
+  }
+
+  // Free matrices.
+  gsl_vector_free(y_obs);
+  gsl_vector_free(y_miss);
+  gsl_matrix_free(H_oo);
+  gsl_matrix_free(H_mo);
+  gsl_vector_free(Hiy);
+
+  return;
 }
-
-
diff --git a/src/prdt.h b/src/prdt.h
index 0939b36..571fdb8 100644
--- a/src/prdt.h
+++ b/src/prdt.h
@@ -19,58 +19,50 @@
 #ifndef __PRDT_H__
 #define __PRDT_H__
 
-#include <vector>
-#include <map>
-#include <string.h>
-#include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 #include "param.h"
+#include <map>
+#include <string.h>
+#include <vector>
 
 using namespace std;
 
 class PRDT {
 
 public:
-	// IO-related parameters.
-	size_t a_mode;
-	size_t d_pace;
-
-	string file_bfile;
-	string file_geno;
-	string file_out;
-	string path_out;
-
-	vector<vector<int> > indicator_pheno;
-	vector<int> indicator_cvt;
-	vector<int> indicator_idv;
-	vector<SNPINFO> snpInfo;
-	map<string, double> mapRS2est;
-
-	size_t n_ph;
-	size_t np_obs, np_miss;
-	size_t ns_total;
-	size_t ns_test;
-
-	double time_eigen;
-
-	// Main functions.
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
-	void WriteFiles (gsl_vector *y_prdt);
-	void WriteFiles (gsl_matrix *Y_full);
-	void AddBV (gsl_matrix *G, const gsl_vector *u_hat,
-		    gsl_vector *y_prdt);
-	void AnalyzeBimbam (gsl_vector *y_prdt);
-	void AnalyzePlink (gsl_vector *y_prdt);
-	void MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
-			 gsl_matrix *Y_full);
+  // IO-related parameters.
+  size_t a_mode;
+  size_t d_pace;
+
+  string file_bfile;
+  string file_geno;
+  string file_out;
+  string path_out;
+
+  vector<vector<int>> indicator_pheno;
+  vector<int> indicator_cvt;
+  vector<int> indicator_idv;
+  vector<SNPINFO> snpInfo;
+  map<string, double> mapRS2est;
+
+  size_t n_ph;
+  size_t np_obs, np_miss;
+  size_t ns_total;
+  size_t ns_test;
+
+  double time_eigen;
+
+  // Main functions.
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
+  void WriteFiles(gsl_vector *y_prdt);
+  void WriteFiles(gsl_matrix *Y_full);
+  void AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt);
+  void AnalyzeBimbam(gsl_vector *y_prdt);
+  void AnalyzePlink(gsl_vector *y_prdt);
+  void MvnormPrdt(const gsl_matrix *Y_hat, const gsl_matrix *H,
+                  gsl_matrix *Y_full);
 };
 
 #endif
-
-
-
-
-
-
-
diff --git a/src/varcov.cpp b/src/varcov.cpp
index 46b5bf8..0f87ba8 100644
--- a/src/varcov.cpp
+++ b/src/varcov.cpp
@@ -16,103 +16,126 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
+#include <bitset>
+#include <cmath>
+#include <cstring>
 #include <fstream>
-#include <sstream>
-#include <string>
 #include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
 #include <map>
 #include <set>
-#include <cstring>
-#include <cmath>
+#include <sstream>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string>
+#include <vector>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 
-#include "lapack.h"
 #include "gzstream.h"
-#include "param.h"
-#include "varcov.h"
 #include "io.h"
+#include "lapack.h"
 #include "mathfunc.h"
+#include "param.h"
+#include "varcov.h"
 
 using namespace std;
 
-void VARCOV::CopyFromParam (PARAM &cPar) {
-	d_pace=cPar.d_pace;
+void VARCOV::CopyFromParam(PARAM &cPar) {
+  d_pace = cPar.d_pace;
 
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
 
-	time_opt=0.0;
+  time_opt = 0.0;
 
-	window_cm=cPar.window_cm;
-	window_bp=cPar.window_bp;
-	window_ns=cPar.window_ns;
+  window_cm = cPar.window_cm;
+  window_bp = cPar.window_bp;
+  window_ns = cPar.window_ns;
 
-	indicator_idv=cPar.indicator_idv;
-	indicator_snp=cPar.indicator_snp;
-	snpInfo=cPar.snpInfo;
+  indicator_idv = cPar.indicator_idv;
+  indicator_snp = cPar.indicator_snp;
+  snpInfo = cPar.snpInfo;
 
-	return;
+  return;
 }
 
-void VARCOV::CopyToParam (PARAM &cPar) {
-	cPar.time_opt=time_opt;
-	return;
+void VARCOV::CopyToParam(PARAM &cPar) {
+  cPar.time_opt = time_opt;
+  return;
 }
 
-void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
-		       const vector<vector<double> > &Cov_mat) {
+void VARCOV::WriteCov(const int flag, const vector<SNPINFO> &snpInfo_sub,
+                      const vector<vector<double>> &Cov_mat) {
   string file_cov;
-  file_cov=path_out+"/"+file_out;
-  file_cov+=".cor.txt";
+  file_cov = path_out + "/" + file_out;
+  file_cov += ".cor.txt";
 
   ofstream outfile;
 
-  if (flag==0) {
-    outfile.open (file_cov.c_str(), ofstream::out);
-    if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;}
+  if (flag == 0) {
+    outfile.open(file_cov.c_str(), ofstream::out);
+    if (!outfile) {
+      cout << "error writing file: " << file_cov << endl;
+      return;
+    }
 
-    outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"
-	   <<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"
-	   <<"\t"<<"af"<<"\t"<<"window_size"
-	   <<"\t"<<"var"<<"\t"<<"cor"<<endl;
+    outfile << "chr"
+            << "\t"
+            << "rs"
+            << "\t"
+            << "ps"
+            << "\t"
+            << "n_mis"
+            << "\t"
+            << "n_obs"
+            << "\t"
+            << "allele1"
+            << "\t"
+            << "allele0"
+            << "\t"
+            << "af"
+            << "\t"
+            << "window_size"
+            << "\t"
+            << "var"
+            << "\t"
+            << "cor" << endl;
   } else {
-    outfile.open (file_cov.c_str(), ofstream::app);
-    if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;}
-
-    for (size_t i=0; i<Cov_mat.size(); i++) {
-      outfile << snpInfo_sub[i].chr << "\t" << snpInfo_sub[i].rs_number <<
-	"\t" << snpInfo_sub[i].base_position << "\t" <<
-	snpInfo_sub[i].n_miss << "\t" << snpInfo_sub[i].n_idv << "\t" <<
-	snpInfo_sub[i].a_minor << "\t" << snpInfo_sub[i].a_major << "\t" <<
-	fixed << setprecision(3) << snpInfo_sub[i].maf << "\t" <<
-	Cov_mat[i].size()-1 << "\t";
-      outfile<<scientific<<setprecision(6)<<Cov_mat[i][0]<<"\t";
-
-      if (Cov_mat[i].size()==1) {
-	outfile<<"NA";
+    outfile.open(file_cov.c_str(), ofstream::app);
+    if (!outfile) {
+      cout << "error writing file: " << file_cov << endl;
+      return;
+    }
+
+    for (size_t i = 0; i < Cov_mat.size(); i++) {
+      outfile << snpInfo_sub[i].chr << "\t" << snpInfo_sub[i].rs_number << "\t"
+              << snpInfo_sub[i].base_position << "\t" << snpInfo_sub[i].n_miss
+              << "\t" << snpInfo_sub[i].n_idv << "\t" << snpInfo_sub[i].a_minor
+              << "\t" << snpInfo_sub[i].a_major << "\t" << fixed
+              << setprecision(3) << snpInfo_sub[i].maf << "\t"
+              << Cov_mat[i].size() - 1 << "\t";
+      outfile << scientific << setprecision(6) << Cov_mat[i][0] << "\t";
+
+      if (Cov_mat[i].size() == 1) {
+        outfile << "NA";
       } else {
-	for (size_t j=1; j<Cov_mat[i].size(); j++) {
-	  if (j==(Cov_mat[i].size()-1)) {
-	    outfile<<Cov_mat[i][j];
-	  } else {
-	    outfile<<Cov_mat[i][j]<<",";
-	  }
-	}
+        for (size_t j = 1; j < Cov_mat[i].size(); j++) {
+          if (j == (Cov_mat[i].size() - 1)) {
+            outfile << Cov_mat[i][j];
+          } else {
+            outfile << Cov_mat[i][j] << ",";
+          }
+        }
       }
 
-      outfile<<endl;
+      outfile << endl;
     }
   }
 
@@ -121,18 +144,18 @@ void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
   return;
 }
 
-bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) {
-  int c_chr=snpInfo1.chr.compare(snpInfo2.chr);
-  long int c_bp=snpInfo1.base_position-snpInfo2.base_position;
+bool CompareSNPinfo(const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) {
+  int c_chr = snpInfo1.chr.compare(snpInfo2.chr);
+  long int c_bp = snpInfo1.base_position - snpInfo2.base_position;
 
-  if(c_chr<0) {
+  if (c_chr < 0) {
     return true;
-  } else if (c_chr>0) {
+  } else if (c_chr > 0) {
     return false;
   } else {
-    if (c_bp<0) {
+    if (c_bp < 0) {
       return true;
-    } else if (c_bp>0) {
+    } else if (c_bp > 0) {
       return false;
     } else {
       return true;
@@ -140,64 +163,73 @@ bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) {
   }
 }
 
-
 // Do not sort SNPs (because gzip files do not support random access)
 // then calculate n_nb, the number of neighbours, for each SNP.
-void VARCOV::CalcNB (vector<SNPINFO> &snpInfo_sort) {
-  size_t t2=0, n_nb=0;
-  for (size_t t=0; t<indicator_snp.size(); ++t) {
-    if (indicator_snp[t]==0) {continue;}
-
-    if (snpInfo_sort[t].chr=="-9" ||
-	(snpInfo_sort[t].cM==-9 && window_cm!=0) ||
-	(snpInfo_sort[t].base_position==-9 && window_bp!=0) ) {
-      snpInfo_sort[t].n_nb=0; continue;
+void VARCOV::CalcNB(vector<SNPINFO> &snpInfo_sort) {
+  size_t t2 = 0, n_nb = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    if (snpInfo_sort[t].chr == "-9" ||
+        (snpInfo_sort[t].cM == -9 && window_cm != 0) ||
+        (snpInfo_sort[t].base_position == -9 && window_bp != 0)) {
+      snpInfo_sort[t].n_nb = 0;
+      continue;
     }
 
-    if (t==indicator_snp.size()-1) {snpInfo_sort[t].n_nb=0; continue;}
+    if (t == indicator_snp.size() - 1) {
+      snpInfo_sort[t].n_nb = 0;
+      continue;
+    }
 
-    t2=t+1; n_nb=0;
+    t2 = t + 1;
+    n_nb = 0;
 
-    while (t2<indicator_snp.size() &&
-	   snpInfo_sort[t2].chr == snpInfo_sort[t].chr &&
-	   indicator_snp[t2]==0) {
+    while (t2 < indicator_snp.size() &&
+           snpInfo_sort[t2].chr == snpInfo_sort[t].chr &&
+           indicator_snp[t2] == 0) {
       t2++;
     }
 
-    while (t2<indicator_snp.size() &&
-	   snpInfo_sort[t2].chr==snpInfo_sort[t].chr &&
-	   (snpInfo_sort[t2].cM-snpInfo_sort[t].cM<window_cm ||
-	    window_cm==0) &&
-	   (snpInfo_sort[t2].base_position-snpInfo_sort[t].base_position <
-	    window_bp || window_bp==0) && (n_nb<window_ns|| window_ns==0)) {
-      t2++; n_nb++;
-      while (t2<indicator_snp.size() &&
-	     snpInfo_sort[t2].chr==snpInfo_sort[t].chr &&
-	     indicator_snp[t2]==0) {
-	t2++;
+    while (t2 < indicator_snp.size() &&
+           snpInfo_sort[t2].chr == snpInfo_sort[t].chr &&
+           (snpInfo_sort[t2].cM - snpInfo_sort[t].cM < window_cm ||
+            window_cm == 0) &&
+           (snpInfo_sort[t2].base_position - snpInfo_sort[t].base_position <
+                window_bp ||
+            window_bp == 0) &&
+           (n_nb < window_ns || window_ns == 0)) {
+      t2++;
+      n_nb++;
+      while (t2 < indicator_snp.size() &&
+             snpInfo_sort[t2].chr == snpInfo_sort[t].chr &&
+             indicator_snp[t2] == 0) {
+        t2++;
       }
     }
 
-    snpInfo_sort[t].n_nb=n_nb;
+    snpInfo_sort[t].n_nb = n_nb;
   }
 
   return;
 }
 
 // Vector double is centered to have mean 0.
-void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec) {
+void Calc_Cor(vector<vector<double>> &X_mat, vector<double> &cov_vec) {
   cov_vec.clear();
 
   double v1, v2, r;
-  vector<double> x_vec=X_mat[0];
+  vector<double> x_vec = X_mat[0];
 
   lapack_ddot(x_vec, x_vec, v1);
-  cov_vec.push_back(v1/(double)x_vec.size() );
+  cov_vec.push_back(v1 / (double)x_vec.size());
 
-  for (size_t i=1; i<X_mat.size(); i++) {
+  for (size_t i = 1; i < X_mat.size(); i++) {
     lapack_ddot(X_mat[i], x_vec, r);
     lapack_ddot(X_mat[i], X_mat[i], v2);
-    r/=sqrt(v1*v2);
+    r /= sqrt(v1 * v2);
 
     cov_vec.push_back(r);
   }
@@ -214,10 +246,10 @@ void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec) {
 // window_size (which can vary if cM was used) read bimbam mean
 // genotype file and calculate the covariance matrix for neighboring
 // SNPs output values at 10000-SNP-interval.
-void VARCOV::AnalyzeBimbam () {
-  igzstream infile (file_geno.c_str(), igzstream::in);
+void VARCOV::AnalyzeBimbam() {
+  igzstream infile(file_geno.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error reading genotype file:"<<file_geno<<endl;
+    cout << "error reading genotype file:" << file_geno << endl;
     return;
   }
 
@@ -225,58 +257,64 @@ void VARCOV::AnalyzeBimbam () {
   vector<SNPINFO> snpInfo_sub;
   CalcNB(snpInfo);
 
-  size_t ni_test=0;
-  for (size_t i=0; i<indicator_idv.size(); i++) {
-    ni_test+=indicator_idv[i];
+  size_t ni_test = 0;
+  for (size_t i = 0; i < indicator_idv.size(); i++) {
+    ni_test += indicator_idv[i];
   }
 
-  gsl_vector *geno=gsl_vector_alloc (ni_test);
+  gsl_vector *geno = gsl_vector_alloc(ni_test);
   double geno_mean;
 
   vector<double> x_vec, cov_vec;
-  vector<vector<double> > X_mat, Cov_mat;
+  vector<vector<double>> X_mat, Cov_mat;
 
-  for (size_t i=0; i<ni_test; i++) {
+  for (size_t i = 0; i < ni_test; i++) {
     x_vec.push_back(0);
   }
 
-  WriteCov (0, snpInfo_sub, Cov_mat);
+  WriteCov(0, snpInfo_sub, Cov_mat);
 
-  size_t t2=0, inc;
-  int n_nb=0;
+  size_t t2 = 0, inc;
+  int n_nb = 0;
 
-  for (size_t t=0; t<indicator_snp.size(); ++t) {
-    if (t%d_pace==0 || t==(indicator_snp.size()-1))
-      {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
-    if (indicator_snp[t]==0) {continue;}
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % d_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
 
-    if (X_mat.size()==0) {
-      n_nb=snpInfo[t].n_nb+1;
+    if (X_mat.size() == 0) {
+      n_nb = snpInfo[t].n_nb + 1;
     } else {
-      n_nb=snpInfo[t].n_nb-n_nb+1;
+      n_nb = snpInfo[t].n_nb - n_nb + 1;
     }
 
-    for (int i=0; i<n_nb; i++) {
-      if (X_mat.size()==0) {t2=t;}
+    for (int i = 0; i < n_nb; i++) {
+      if (X_mat.size() == 0) {
+        t2 = t;
+      }
 
       // Read a line of the snp is filtered out.
-      inc=0;
-      while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
-	t2++; inc++;
+      inc = 0;
+      while (t2 < indicator_snp.size() && indicator_snp[t2] == 0) {
+        t2++;
+        inc++;
       }
 
-      Bimbam_ReadOneSNP (inc, indicator_idv, infile, geno, geno_mean);
-      gsl_vector_add_constant (geno, -1.0*geno_mean);
+      Bimbam_ReadOneSNP(inc, indicator_idv, infile, geno, geno_mean);
+      gsl_vector_add_constant(geno, -1.0 * geno_mean);
 
-      for (size_t j=0; j<geno->size; j++) {
-	x_vec[j]=gsl_vector_get(geno, j);
+      for (size_t j = 0; j < geno->size; j++) {
+        x_vec[j] = gsl_vector_get(geno, j);
       }
       X_mat.push_back(x_vec);
 
       t2++;
     }
 
-    n_nb=snpInfo[t].n_nb;
+    n_nb = snpInfo[t].n_nb;
 
     Calc_Cor(X_mat, cov_vec);
     Cov_mat.push_back(cov_vec);
@@ -285,15 +323,15 @@ void VARCOV::AnalyzeBimbam () {
     X_mat.erase(X_mat.begin());
 
     // Write out var/cov values.
-    if (Cov_mat.size()==10000) {
-      WriteCov (1, snpInfo_sub, Cov_mat);
+    if (Cov_mat.size() == 10000) {
+      WriteCov(1, snpInfo_sub, Cov_mat);
       Cov_mat.clear();
       snpInfo_sub.clear();
     }
   }
 
-  if (Cov_mat.size()!=0) {
-    WriteCov (1, snpInfo_sub, Cov_mat);
+  if (Cov_mat.size() != 0) {
+    WriteCov(1, snpInfo_sub, Cov_mat);
     Cov_mat.clear();
     snpInfo_sub.clear();
   }
@@ -306,68 +344,76 @@ void VARCOV::AnalyzeBimbam () {
   return;
 }
 
-void VARCOV::AnalyzePlink () {
-  string file_bed=file_bfile+".bed";
-  ifstream infile (file_bed.c_str(), ios::binary);
-  if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+void VARCOV::AnalyzePlink() {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
 
   // Calculate the number of right-hand-side neighbours for each SNP.
   vector<SNPINFO> snpInfo_sub;
   CalcNB(snpInfo);
 
-  size_t ni_test=0;
-  for (size_t i=0; i<indicator_idv.size(); i++) {
-    ni_test+=indicator_idv[i];
+  size_t ni_test = 0;
+  for (size_t i = 0; i < indicator_idv.size(); i++) {
+    ni_test += indicator_idv[i];
   }
 
-  gsl_vector *geno=gsl_vector_alloc (ni_test);
+  gsl_vector *geno = gsl_vector_alloc(ni_test);
   double geno_mean;
 
   vector<double> x_vec, cov_vec;
-  vector<vector<double> > X_mat, Cov_mat;
+  vector<vector<double>> X_mat, Cov_mat;
 
-  for (size_t i=0; i<ni_test; i++) {
+  for (size_t i = 0; i < ni_test; i++) {
     x_vec.push_back(0);
   }
 
-  WriteCov (0, snpInfo_sub, Cov_mat);
+  WriteCov(0, snpInfo_sub, Cov_mat);
 
-  size_t t2=0, inc;
-  int n_nb=0;
+  size_t t2 = 0, inc;
+  int n_nb = 0;
 
-  for (size_t t=0; t<indicator_snp.size(); ++t) {
-    if (t%d_pace==0 || t==(indicator_snp.size()-1))
-      {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
-    if (indicator_snp[t]==0) {continue;}
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % d_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
 
-    if (X_mat.size()==0) {
-      n_nb=snpInfo[t].n_nb+1;
+    if (X_mat.size() == 0) {
+      n_nb = snpInfo[t].n_nb + 1;
     } else {
-      n_nb=snpInfo[t].n_nb-n_nb+1;
+      n_nb = snpInfo[t].n_nb - n_nb + 1;
     }
 
-    for (int i=0; i<n_nb; i++) {
-      if (X_mat.size()==0) {t2=t;}
+    for (int i = 0; i < n_nb; i++) {
+      if (X_mat.size() == 0) {
+        t2 = t;
+      }
 
       // Read a line if the SNP is filtered out.
-      inc=0;
-      while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
-	t2++;
-	inc++;
+      inc = 0;
+      while (t2 < indicator_snp.size() && indicator_snp[t2] == 0) {
+        t2++;
+        inc++;
       }
 
-      Plink_ReadOneSNP (t2, indicator_idv, infile, geno, geno_mean);
-      gsl_vector_add_constant (geno, -1.0*geno_mean);
+      Plink_ReadOneSNP(t2, indicator_idv, infile, geno, geno_mean);
+      gsl_vector_add_constant(geno, -1.0 * geno_mean);
 
-      for (size_t j=0; j<geno->size; j++) {
-	x_vec[j]=gsl_vector_get(geno, j);
+      for (size_t j = 0; j < geno->size; j++) {
+        x_vec[j] = gsl_vector_get(geno, j);
       }
       X_mat.push_back(x_vec);
 
       t2++;
     }
 
-    n_nb=snpInfo[t].n_nb;
+    n_nb = snpInfo[t].n_nb;
 
     Calc_Cor(X_mat, cov_vec);
     Cov_mat.push_back(cov_vec);
@@ -376,15 +422,15 @@ void VARCOV::AnalyzePlink () {
     X_mat.erase(X_mat.begin());
 
     // Write out var/cov values.
-    if (Cov_mat.size()==10000) {
-      WriteCov (1, snpInfo_sub, Cov_mat);
+    if (Cov_mat.size() == 10000) {
+      WriteCov(1, snpInfo_sub, Cov_mat);
       Cov_mat.clear();
       snpInfo_sub.clear();
     }
   }
 
-  if (Cov_mat.size()!=0) {
-    WriteCov (1, snpInfo_sub, Cov_mat);
+  if (Cov_mat.size() != 0) {
+    WriteCov(1, snpInfo_sub, Cov_mat);
     Cov_mat.clear();
     snpInfo_sub.clear();
   }
diff --git a/src/varcov.h b/src/varcov.h
index 4a1eb3a..47b4f9d 100644
--- a/src/varcov.h
+++ b/src/varcov.h
@@ -19,45 +19,43 @@
 #ifndef __VARCOV_H__
 #define __VARCOV_H__
 
-#include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
 #include "io.h"
+#include "param.h"
 
 using namespace std;
 
 class VARCOV {
 
 public:
-	// IO-related parameters.
-	string file_out;
-	string path_out;
-	string file_geno;
-	string file_bfile;
-	int d_pace;
-
-	vector<int> indicator_idv;
-	vector<int> indicator_snp;
-
-	vector<SNPINFO> snpInfo;
-
-	double time_opt;
-
-	// Class-specific parameters.
-	double window_cm;
-	size_t window_bp;
-	size_t window_ns;
-
-	// Main functions.
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
-	void CalcNB (vector<SNPINFO> &snpInfo_sort);
-	void WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
-		       const vector<vector<double> > &Cov_mat);
-	void AnalyzeBimbam ();
-	void AnalyzePlink ();
+  // IO-related parameters.
+  string file_out;
+  string path_out;
+  string file_geno;
+  string file_bfile;
+  int d_pace;
+
+  vector<int> indicator_idv;
+  vector<int> indicator_snp;
+
+  vector<SNPINFO> snpInfo;
+
+  double time_opt;
+
+  // Class-specific parameters.
+  double window_cm;
+  size_t window_bp;
+  size_t window_ns;
+
+  // Main functions.
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
+  void CalcNB(vector<SNPINFO> &snpInfo_sort);
+  void WriteCov(const int flag, const vector<SNPINFO> &snpInfo_sub,
+                const vector<vector<double>> &Cov_mat);
+  void AnalyzeBimbam();
+  void AnalyzePlink();
 };
 
 #endif
-
-
diff --git a/src/vc.cpp b/src/vc.cpp
index e8ccece..b5f36c0 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -16,216 +16,216 @@
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-#include <iomanip>
+#include <bitset>
 #include <cmath>
+#include <cstring>
+#include <iomanip>
 #include <iostream>
+#include <map>
+#include <set>
 #include <stdio.h>
 #include <stdlib.h>
-#include <bitset>
-#include <vector>
-#include <set>
-#include <map>
 #include <string>
-#include <cstring>
+#include <vector>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
 
 #include "gsl/gsl_cdf.h"
-#include "gsl/gsl_multiroots.h"
 #include "gsl/gsl_min.h"
+#include "gsl/gsl_multiroots.h"
 
 #include "Eigen/Dense"
 
-#include "param.h"
-#include "io.h"
-#include "lapack.h"
 #include "eigenlib.h"
 #include "gzstream.h"
-#include "mathfunc.h"
+#include "io.h"
+#include "lapack.h"
 #include "lmm.h"
+#include "mathfunc.h"
+#include "param.h"
 #include "vc.h"
 
 using namespace std;
 using namespace Eigen;
 
 // In this file, X, Y are already transformed (i.e. UtX and UtY).
-void VC::CopyFromParam (PARAM &cPar) {
-  a_mode=cPar.a_mode;
+void VC::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
 
-  file_cat=cPar.file_cat;
-  file_beta=cPar.file_beta;
-  file_cor=cPar.file_cor;
+  file_cat = cPar.file_cat;
+  file_beta = cPar.file_beta;
+  file_cor = cPar.file_cor;
 
-  setSnps=cPar.setSnps;
+  setSnps = cPar.setSnps;
 
-  file_out=cPar.file_out;
-  path_out=cPar.path_out;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
 
-  time_UtX=0.0;
-  time_opt=0.0;
+  time_UtX = 0.0;
+  time_opt = 0.0;
 
-  v_traceG=cPar.v_traceG;
+  v_traceG = cPar.v_traceG;
 
-  ni_total=cPar.ni_total;
-  ns_total=cPar.ns_total;
-  ns_test=cPar.ns_test;
+  ni_total = cPar.ni_total;
+  ns_total = cPar.ns_total;
+  ns_test = cPar.ns_test;
 
-  crt=cPar.crt;
-  window_cm=cPar.window_cm;
-  window_bp=cPar.window_bp;
-  window_ns=cPar.window_ns;
+  crt = cPar.crt;
+  window_cm = cPar.window_cm;
+  window_bp = cPar.window_bp;
+  window_ns = cPar.window_ns;
 
-  n_vc=cPar.n_vc;
+  n_vc = cPar.n_vc;
 
   return;
 }
 
-void VC::CopyToParam (PARAM &cPar) {
-	cPar.time_UtX=time_UtX;
-	cPar.time_opt=time_opt;
+void VC::CopyToParam(PARAM &cPar) {
+  cPar.time_UtX = time_UtX;
+  cPar.time_opt = time_opt;
 
-	cPar.v_pve=v_pve;
-	cPar.v_se_pve=v_se_pve;
-	cPar.v_sigma2=v_sigma2;
-	cPar.v_se_sigma2=v_se_sigma2;
-	cPar.pve_total=pve_total;
-	cPar.se_pve_total=se_pve_total;
-	cPar.v_traceG=v_traceG;
+  cPar.v_pve = v_pve;
+  cPar.v_se_pve = v_se_pve;
+  cPar.v_sigma2 = v_sigma2;
+  cPar.v_se_sigma2 = v_se_sigma2;
+  cPar.pve_total = pve_total;
+  cPar.se_pve_total = se_pve_total;
+  cPar.v_traceG = v_traceG;
 
-	cPar.v_beta=v_beta;
-	cPar.v_se_beta=v_se_beta;
+  cPar.v_beta = v_beta;
+  cPar.v_se_beta = v_se_beta;
 
-	cPar.ni_total=ni_total;
-	cPar.ns_total=ns_total;
-	cPar.ns_test=ns_test;
+  cPar.ni_total = ni_total;
+  cPar.ns_total = ns_total;
+  cPar.ns_test = ns_test;
 
-	cPar.n_vc=n_vc;
+  cPar.n_vc = n_vc;
 
-	return;
+  return;
 }
 
-void VC::WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec,
-		       const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
-		       const gsl_matrix *Svar_mat) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".qvec.txt";
-
-	ofstream outfile_q (file_str.c_str(), ofstream::out);
-	if (!outfile_q) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	for (size_t i=0; i<s_vec->size; i++) {
-	  outfile_q<<gsl_vector_get(s_vec, i)<<endl;
-	}
-	for (size_t i=0; i<q_vec->size; i++) {
-	  outfile_q<<gsl_vector_get(q_vec, i)<<endl;
-	}
-	for (size_t i=0; i<qvar_vec->size; i++) {
-	  outfile_q<<gsl_vector_get(qvar_vec, i)<<endl;
-	}
-
-	outfile_q.clear();
-	outfile_q.close();
-
-	file_str=path_out+"/"+file_out;
-	file_str+=".smat.txt";
-
-	ofstream outfile_s (file_str.c_str(), ofstream::out);
-	if (!outfile_s) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	for (size_t i=0; i<S_mat->size1; i++) {
-	  for (size_t j=0; j<S_mat->size2; j++) {
-	    outfile_s<<gsl_matrix_get(S_mat, i, j)<<"\t";
-	  }
-	  outfile_s<<endl;
-	}
-	for (size_t i=0; i<Svar_mat->size1; i++) {
-	  for (size_t j=0; j<Svar_mat->size2; j++) {
-	    outfile_s<<gsl_matrix_get(Svar_mat, i, j)<<"\t";
-	  }
-	  outfile_s<<endl;
-	}
-
-	outfile_s.clear();
-	outfile_s.close();
-
-	return;
+void VC::WriteFile_qs(const gsl_vector *s_vec, const gsl_vector *q_vec,
+                      const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
+                      const gsl_matrix *Svar_mat) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".qvec.txt";
+
+  ofstream outfile_q(file_str.c_str(), ofstream::out);
+  if (!outfile_q) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  for (size_t i = 0; i < s_vec->size; i++) {
+    outfile_q << gsl_vector_get(s_vec, i) << endl;
+  }
+  for (size_t i = 0; i < q_vec->size; i++) {
+    outfile_q << gsl_vector_get(q_vec, i) << endl;
+  }
+  for (size_t i = 0; i < qvar_vec->size; i++) {
+    outfile_q << gsl_vector_get(qvar_vec, i) << endl;
+  }
+
+  outfile_q.clear();
+  outfile_q.close();
+
+  file_str = path_out + "/" + file_out;
+  file_str += ".smat.txt";
+
+  ofstream outfile_s(file_str.c_str(), ofstream::out);
+  if (!outfile_s) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  for (size_t i = 0; i < S_mat->size1; i++) {
+    for (size_t j = 0; j < S_mat->size2; j++) {
+      outfile_s << gsl_matrix_get(S_mat, i, j) << "\t";
+    }
+    outfile_s << endl;
+  }
+  for (size_t i = 0; i < Svar_mat->size1; i++) {
+    for (size_t j = 0; j < Svar_mat->size2; j++) {
+      outfile_s << gsl_matrix_get(Svar_mat, i, j) << "\t";
+    }
+    outfile_s << endl;
+  }
+
+  outfile_s.clear();
+  outfile_s.close();
+
+  return;
 }
 
-void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
-  size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1, n_cvt=(p->W)->size2;
+void UpdateParam(const gsl_vector *log_sigma2, VC_PARAM *p) {
+  size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1, n_cvt = (p->W)->size2;
 
-  gsl_matrix *K_temp=gsl_matrix_alloc(n1, n1);
-  gsl_matrix *HiW=gsl_matrix_alloc(n1, n_cvt);
-  gsl_matrix *WtHiW=gsl_matrix_alloc(n_cvt, n_cvt);
-  gsl_matrix *WtHiWi=gsl_matrix_alloc(n_cvt, n_cvt);
-  gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1);
+  gsl_matrix *K_temp = gsl_matrix_alloc(n1, n1);
+  gsl_matrix *HiW = gsl_matrix_alloc(n1, n_cvt);
+  gsl_matrix *WtHiW = gsl_matrix_alloc(n_cvt, n_cvt);
+  gsl_matrix *WtHiWi = gsl_matrix_alloc(n_cvt, n_cvt);
+  gsl_matrix *WtHiWiWtHi = gsl_matrix_alloc(n_cvt, n1);
 
   double sigma2;
 
   // Calculate H = \sum_i^{k+1} \sigma_i^2 K_i.
-  gsl_matrix_set_zero (p->P);
-  for (size_t i=0; i<n_vc+1; i++) {
-    if (i==n_vc) {
-      gsl_matrix_set_identity (K_temp);
+  gsl_matrix_set_zero(p->P);
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    if (i == n_vc) {
+      gsl_matrix_set_identity(K_temp);
     } else {
-      gsl_matrix_const_view K_sub=
-	gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
-      gsl_matrix_memcpy (K_temp, &K_sub.matrix);
+      gsl_matrix_const_view K_sub =
+          gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1);
+      gsl_matrix_memcpy(K_temp, &K_sub.matrix);
     }
 
     // When unconstrained, update on sigma2 instead of log_sigma2.
     if (p->noconstrain) {
-      sigma2=gsl_vector_get (log_sigma2, i);
+      sigma2 = gsl_vector_get(log_sigma2, i);
     } else {
-      sigma2=exp(gsl_vector_get (log_sigma2, i) );
+      sigma2 = exp(gsl_vector_get(log_sigma2, i));
     }
     gsl_matrix_scale(K_temp, sigma2);
-    gsl_matrix_add (p->P, K_temp);
+    gsl_matrix_add(p->P, K_temp);
   }
 
   // Calculate H^{-1}.
   eigenlib_invert(p->P);
 
-  eigenlib_dgemm ("N", "N", 1.0, p->P, p->W, 0.0, HiW);
-  eigenlib_dgemm ("T", "N", 1.0, p->W, HiW, 0.0, WtHiW);
+  eigenlib_dgemm("N", "N", 1.0, p->P, p->W, 0.0, HiW);
+  eigenlib_dgemm("T", "N", 1.0, p->W, HiW, 0.0, WtHiW);
 
   eigenlib_invert(WtHiW);
   gsl_matrix_memcpy(WtHiWi, WtHiW);
 
-  eigenlib_dgemm ("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
-  eigenlib_dgemm ("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
+  eigenlib_dgemm("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
+  eigenlib_dgemm("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
 
   // Calculate Py, KPy, PKPy.
   gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py);
 
   double d;
-  for (size_t i=0; i<n_vc+1; i++) {
-    gsl_vector_view KPy=gsl_matrix_column (p->KPy_mat, i);
-    gsl_vector_view PKPy=gsl_matrix_column (p->PKPy_mat, i);
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    gsl_vector_view KPy = gsl_matrix_column(p->KPy_mat, i);
+    gsl_vector_view PKPy = gsl_matrix_column(p->PKPy_mat, i);
 
-    if (i==n_vc) {
-      gsl_vector_memcpy (&KPy.vector, p->Py);
+    if (i == n_vc) {
+      gsl_vector_memcpy(&KPy.vector, p->Py);
     } else {
-      gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
+      gsl_matrix_const_view K_sub =
+          gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1);
 
       // Seems to be important to use gsl dgemv here instead of
       // eigenlib_dgemv; otherwise.
-      gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0,
-		     &KPy.vector);
+      gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector);
     }
 
     gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector);
@@ -233,64 +233,64 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
     // When phenotypes are not normalized well, then some values in
     // the following matrix maybe NaN; change that to 0; this seems to
     // only happen when eigenlib_dgemv was used above.
-    for (size_t j=0; j<p->KPy_mat->size1; j++) {
-      d=gsl_matrix_get (p->KPy_mat, j, i);
+    for (size_t j = 0; j < p->KPy_mat->size1; j++) {
+      d = gsl_matrix_get(p->KPy_mat, j, i);
       if (std::isnan(d)) {
-	gsl_matrix_set (p->KPy_mat, j, i, 0);
-	cout<<"nan appears in "<<i<<" "<<j<<endl;
+        gsl_matrix_set(p->KPy_mat, j, i, 0);
+        cout << "nan appears in " << i << " " << j << endl;
       }
-      d=gsl_matrix_get (p->PKPy_mat, j, i);
+      d = gsl_matrix_get(p->PKPy_mat, j, i);
       if (std::isnan(d)) {
-	gsl_matrix_set (p->PKPy_mat, j, i, 0);
-	cout<<"nan appears in "<<i<<" "<<j<<endl;
+        gsl_matrix_set(p->PKPy_mat, j, i, 0);
+        cout << "nan appears in " << i << " " << j << endl;
       }
     }
   }
 
-  gsl_matrix_free (K_temp);
-  gsl_matrix_free (HiW);
-  gsl_matrix_free (WtHiW);
-  gsl_matrix_free (WtHiWi);
-  gsl_matrix_free (WtHiWiWtHi);
+  gsl_matrix_free(K_temp);
+  gsl_matrix_free(HiW);
+  gsl_matrix_free(WtHiW);
+  gsl_matrix_free(WtHiWi);
+  gsl_matrix_free(WtHiWiWtHi);
 
   return;
 }
 
 // Below are functions for AI algorithm.
-int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) {
-  VC_PARAM *p=(VC_PARAM *) params;
+int LogRL_dev1(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) {
+  VC_PARAM *p = (VC_PARAM *)params;
 
-  size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1;
+  size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1;
 
   double tr, d;
 
   // Update parameters.
-  UpdateParam (log_sigma2, p);
+  UpdateParam(log_sigma2, p);
 
   // Calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy.
-  for (size_t i=0; i<n_vc+1; i++) {
-    if (i==n_vc) {
-      tr=0;
-      for (size_t l=0; l<n1; l++) {
-	tr+=gsl_matrix_get (p->P, l, l);
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    if (i == n_vc) {
+      tr = 0;
+      for (size_t l = 0; l < n1; l++) {
+        tr += gsl_matrix_get(p->P, l, l);
       }
     } else {
-      tr=0;
-      for (size_t l=0; l<n1; l++) {
-	gsl_vector_view P_row=gsl_matrix_row (p->P, l);
-	gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l);
-	gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
-	tr+=d;
+      tr = 0;
+      for (size_t l = 0; l < n1; l++) {
+        gsl_vector_view P_row = gsl_matrix_row(p->P, l);
+        gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l);
+        gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
+        tr += d;
       }
     }
 
-    gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
+    gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
     gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
 
     if (p->noconstrain) {
-      d=(-0.5*tr+0.5*d);
+      d = (-0.5 * tr + 0.5 * d);
     } else {
-      d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i));
+      d = (-0.5 * tr + 0.5 * d) * exp(gsl_vector_get(log_sigma2, i));
     }
 
     gsl_vector_set(dev1, i, d);
@@ -299,324 +299,354 @@ int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) {
   return GSL_SUCCESS;
 }
 
-int LogRL_dev2 (const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) {
-  VC_PARAM *p=(VC_PARAM *) params;
+int LogRL_dev2(const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) {
+  VC_PARAM *p = (VC_PARAM *)params;
 
-  size_t n_vc=log_sigma2->size-1;
+  size_t n_vc = log_sigma2->size - 1;
 
   double d, sigma2_i, sigma2_j;
 
   // Update parameters.
-  UpdateParam (log_sigma2, p);
+  UpdateParam(log_sigma2, p);
 
   // Calculate dev2 = 0.5(yPKPKPy).
-  for (size_t i=0; i<n_vc+1; i++) {
-    gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
     if (p->noconstrain) {
-      sigma2_i=gsl_vector_get(log_sigma2, i);
+      sigma2_i = gsl_vector_get(log_sigma2, i);
     } else {
-      sigma2_i=exp(gsl_vector_get(log_sigma2, i));
+      sigma2_i = exp(gsl_vector_get(log_sigma2, i));
     }
 
-    for (size_t j=i; j<n_vc+1; j++) {
-      gsl_vector_view PKPy_j=gsl_matrix_column (p->PKPy_mat, j);
+    for (size_t j = i; j < n_vc + 1; j++) {
+      gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j);
 
       gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
       if (p->noconstrain) {
-	sigma2_j=gsl_vector_get(log_sigma2, j);
-	d*=-0.5;
+        sigma2_j = gsl_vector_get(log_sigma2, j);
+        d *= -0.5;
       } else {
-	sigma2_j=exp(gsl_vector_get(log_sigma2, j));
-	d*=-0.5*sigma2_i*sigma2_j;
+        sigma2_j = exp(gsl_vector_get(log_sigma2, j));
+        d *= -0.5 * sigma2_i * sigma2_j;
       }
 
       gsl_matrix_set(dev2, i, j, d);
-      if (j!=i) {gsl_matrix_set(dev2, j, i, d);}
+      if (j != i) {
+        gsl_matrix_set(dev2, j, i, d);
+      }
     }
   }
 
-  gsl_matrix_memcpy (p->Hessian, dev2);
+  gsl_matrix_memcpy(p->Hessian, dev2);
   return GSL_SUCCESS;
 }
 
-int LogRL_dev12 (const gsl_vector *log_sigma2, void *params,
-		 gsl_vector *dev1, gsl_matrix *dev2) {
-  VC_PARAM *p=(VC_PARAM *) params;
+int LogRL_dev12(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1,
+                gsl_matrix *dev2) {
+  VC_PARAM *p = (VC_PARAM *)params;
 
-  size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1;
+  size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1;
 
   double tr, d, sigma2_i, sigma2_j;
 
   // Update parameters.
-  UpdateParam (log_sigma2, p);
+  UpdateParam(log_sigma2, p);
 
-  for (size_t i=0; i<n_vc+1; i++) {
-    if (i==n_vc) {
-      tr=0;
-      for (size_t l=0; l<n1; l++) {
-	tr+=gsl_matrix_get (p->P, l, l);
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    if (i == n_vc) {
+      tr = 0;
+      for (size_t l = 0; l < n1; l++) {
+        tr += gsl_matrix_get(p->P, l, l);
       }
     } else {
-      tr=0;
-      for (size_t l=0; l<n1; l++) {
-	gsl_vector_view P_row=gsl_matrix_row (p->P, l);
-	gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l);
-	gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
-	tr+=d;
+      tr = 0;
+      for (size_t l = 0; l < n1; l++) {
+        gsl_vector_view P_row = gsl_matrix_row(p->P, l);
+        gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l);
+        gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
+        tr += d;
       }
     }
 
-    gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
+    gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
     gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
 
     if (p->noconstrain) {
-      sigma2_i=gsl_vector_get(log_sigma2, i);
-      d=(-0.5*tr+0.5*d);
+      sigma2_i = gsl_vector_get(log_sigma2, i);
+      d = (-0.5 * tr + 0.5 * d);
     } else {
-      sigma2_i=exp(gsl_vector_get(log_sigma2, i));
-      d=(-0.5*tr+0.5*d)*sigma2_i;
+      sigma2_i = exp(gsl_vector_get(log_sigma2, i));
+      d = (-0.5 * tr + 0.5 * d) * sigma2_i;
     }
 
     gsl_vector_set(dev1, i, d);
 
-    for (size_t j=i; j<n_vc+1; j++) {
-      gsl_vector_view PKPy_j=gsl_matrix_column (p->PKPy_mat, j);
+    for (size_t j = i; j < n_vc + 1; j++) {
+      gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j);
       gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
 
       if (p->noconstrain) {
-	sigma2_j=gsl_vector_get(log_sigma2, j);
-	d*=-0.5;
+        sigma2_j = gsl_vector_get(log_sigma2, j);
+        d *= -0.5;
       } else {
-	sigma2_j=exp(gsl_vector_get(log_sigma2, j));
-	d*=-0.5*sigma2_i*sigma2_j;
+        sigma2_j = exp(gsl_vector_get(log_sigma2, j));
+        d *= -0.5 * sigma2_i * sigma2_j;
       }
 
       gsl_matrix_set(dev2, i, j, d);
-      if (j!=i) {gsl_matrix_set(dev2, j, i, d);}
+      if (j != i) {
+        gsl_matrix_set(dev2, j, i, d);
+      }
     }
-
   }
 
-  gsl_matrix_memcpy (p->Hessian, dev2);
+  gsl_matrix_memcpy(p->Hessian, dev2);
 
   return GSL_SUCCESS;
 }
 
 // Read header to determine which column contains which item.
-bool ReadHeader_vc (const string &line, HEADER &header) {
-  string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID",
-		   "rsid","RSID"};
-  set<string> rs_set(rs_ptr, rs_ptr+10);
-  string chr_ptr[]={"chr","CHR"};
-  set<string> chr_set(chr_ptr, chr_ptr+2);
-  string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION",
-		    "bp", "BP"};
-  set<string> pos_set(pos_ptr, pos_ptr+8);
-  string cm_ptr[]={"cm","CM"};
-  set<string> cm_set(cm_ptr, cm_ptr+2);
-  string a1_ptr[]={"a1","A1","allele1","ALLELE1"};
-  set<string> a1_set(a1_ptr, a1_ptr+4);
-  string a0_ptr[]={"a0","A0","allele0","ALLELE0"};
-  set<string> a0_set(a0_ptr, a0_ptr+4);
-
-  string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
-  set<string> z_set(z_ptr, z_ptr+6);
-  string beta_ptr[]={"beta","BETA","b","B"};
-  set<string> beta_set(beta_ptr, beta_ptr+4);
-  string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"};
-  set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4);
-  string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"};
-  set<string> chisq_set(chisq_ptr, chisq_ptr+4);
-  string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"};
-  set<string> p_set(p_ptr, p_ptr+6);
-
-  string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"};
-  set<string> n_set(n_ptr, n_ptr+6);
-  string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"};
-  set<string> nmis_set(nmis_ptr, nmis_ptr+6);
-  string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"};
-  set<string> nobs_set(nobs_ptr, nobs_ptr+4);
-
-  string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq",
-		   "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"};
-  set<string> af_set(af_ptr, af_ptr+10);
-  string var_ptr[]={"var","VAR"};
-  set<string> var_set(var_ptr, var_ptr+2);
-
-  string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"};
-  set<string> ws_set(ws_ptr, ws_ptr+4);
-  string cor_ptr[]={"cor","COR","r","R"};
-  set<string> cor_set(cor_ptr, cor_ptr+4);
-
-  header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0;
-  header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0;
-  header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0;
-  header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0;
-  header.cor_col=0; header.coln=0;
+bool ReadHeader_vc(const string &line, HEADER &header) {
+  string rs_ptr[] = {"rs",   "RS",    "snp",   "SNP",  "snps",
+                     "SNPS", "snpid", "SNPID", "rsid", "RSID"};
+  set<string> rs_set(rs_ptr, rs_ptr + 10);
+  string chr_ptr[] = {"chr", "CHR"};
+  set<string> chr_set(chr_ptr, chr_ptr + 2);
+  string pos_ptr[] = {
+      "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"};
+  set<string> pos_set(pos_ptr, pos_ptr + 8);
+  string cm_ptr[] = {"cm", "CM"};
+  set<string> cm_set(cm_ptr, cm_ptr + 2);
+  string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1"};
+  set<string> a1_set(a1_ptr, a1_ptr + 4);
+  string a0_ptr[] = {"a0", "A0", "allele0", "ALLELE0"};
+  set<string> a0_set(a0_ptr, a0_ptr + 4);
+
+  string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"};
+  set<string> z_set(z_ptr, z_ptr + 6);
+  string beta_ptr[] = {"beta", "BETA", "b", "B"};
+  set<string> beta_set(beta_ptr, beta_ptr + 4);
+  string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"};
+  set<string> sebeta_set(sebeta_ptr, sebeta_ptr + 4);
+  string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"};
+  set<string> chisq_set(chisq_ptr, chisq_ptr + 4);
+  string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"};
+  set<string> p_set(p_ptr, p_ptr + 6);
+
+  string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"};
+  set<string> n_set(n_ptr, n_ptr + 6);
+  string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"};
+  set<string> nmis_set(nmis_ptr, nmis_ptr + 6);
+  string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"};
+  set<string> nobs_set(nobs_ptr, nobs_ptr + 4);
+
+  string af_ptr[] = {"af",
+                     "AF",
+                     "maf",
+                     "MAF",
+                     "f",
+                     "F",
+                     "allele_freq",
+                     "ALLELE_FREQ",
+                     "allele_frequency",
+                     "ALLELE_FREQUENCY"};
+  set<string> af_set(af_ptr, af_ptr + 10);
+  string var_ptr[] = {"var", "VAR"};
+  set<string> var_set(var_ptr, var_ptr + 2);
+
+  string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"};
+  set<string> ws_set(ws_ptr, ws_ptr + 4);
+  string cor_ptr[] = {"cor", "COR", "r", "R"};
+  set<string> cor_set(cor_ptr, cor_ptr + 4);
+
+  header.rs_col = 0;
+  header.chr_col = 0;
+  header.pos_col = 0;
+  header.a1_col = 0;
+  header.a0_col = 0;
+  header.z_col = 0;
+  header.beta_col = 0;
+  header.sebeta_col = 0;
+  header.chisq_col = 0;
+  header.p_col = 0;
+  header.n_col = 0;
+  header.nmis_col = 0;
+  header.nobs_col = 0;
+  header.af_col = 0;
+  header.var_col = 0;
+  header.ws_col = 0;
+  header.cor_col = 0;
+  header.coln = 0;
 
   char *ch_ptr;
   string type;
-  size_t n_error=0;
-
-  ch_ptr=strtok ((char *)line.c_str(), " , \t");
-  while (ch_ptr!=NULL) {
-    type=ch_ptr;
-    if (rs_set.count(type)!=0) {
-      if (header.rs_col==0) {
-	header.rs_col=header.coln+1;
+  size_t n_error = 0;
+
+  ch_ptr = strtok((char *)line.c_str(), " , \t");
+  while (ch_ptr != NULL) {
+    type = ch_ptr;
+    if (rs_set.count(type) != 0) {
+      if (header.rs_col == 0) {
+        header.rs_col = header.coln + 1;
       } else {
-	cout<<"error! more than two rs columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two rs columns in the file." << endl;
+        n_error++;
       }
-    } else if (chr_set.count(type)!=0) {
-      if (header.chr_col==0) {
-	header.chr_col=header.coln+1;
+    } else if (chr_set.count(type) != 0) {
+      if (header.chr_col == 0) {
+        header.chr_col = header.coln + 1;
       } else {
-	cout<<"error! more than two chr columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two chr columns in the file." << endl;
+        n_error++;
       }
-    } else if (pos_set.count(type)!=0) {
-      if (header.pos_col==0) {
-	header.pos_col=header.coln+1;
+    } else if (pos_set.count(type) != 0) {
+      if (header.pos_col == 0) {
+        header.pos_col = header.coln + 1;
       } else {
-	cout<<"error! more than two pos columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two pos columns in the file." << endl;
+        n_error++;
       }
-    } else if (cm_set.count(type)!=0) {
-      if (header.cm_col==0) {
-	header.cm_col=header.coln+1;
+    } else if (cm_set.count(type) != 0) {
+      if (header.cm_col == 0) {
+        header.cm_col = header.coln + 1;
       } else {
-	cout<<"error! more than two cm columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two cm columns in the file." << endl;
+        n_error++;
       }
-    } else if (a1_set.count(type)!=0) {
-      if (header.a1_col==0) {
-	header.a1_col=header.coln+1;
+    } else if (a1_set.count(type) != 0) {
+      if (header.a1_col == 0) {
+        header.a1_col = header.coln + 1;
       } else {
-	cout<<"error! more than two allele1 columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two allele1 columns in the file." << endl;
+        n_error++;
       }
-    } else if (a0_set.count(type)!=0) {
-      if (header.a0_col==0) {
-	header.a0_col=header.coln+1;
+    } else if (a0_set.count(type) != 0) {
+      if (header.a0_col == 0) {
+        header.a0_col = header.coln + 1;
       } else {
-	cout<<"error! more than two allele0 columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two allele0 columns in the file." << endl;
+        n_error++;
       }
-    } else if (z_set.count(type)!=0) {
-      if (header.z_col==0) {
-	header.z_col=header.coln+1;
+    } else if (z_set.count(type) != 0) {
+      if (header.z_col == 0) {
+        header.z_col = header.coln + 1;
       } else {
-	cout<<"error! more than two z columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two z columns in the file." << endl;
+        n_error++;
       }
-    } else if (beta_set.count(type)!=0) {
-      if (header.beta_col==0) {
-	header.beta_col=header.coln+1;
+    } else if (beta_set.count(type) != 0) {
+      if (header.beta_col == 0) {
+        header.beta_col = header.coln + 1;
       } else {
-	cout<<"error! more than two beta columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two beta columns in the file." << endl;
+        n_error++;
       }
-    } else if (sebeta_set.count(type)!=0) {
-      if (header.sebeta_col==0) {
-	header.sebeta_col=header.coln+1;
+    } else if (sebeta_set.count(type) != 0) {
+      if (header.sebeta_col == 0) {
+        header.sebeta_col = header.coln + 1;
       } else {
-	cout<<"error! more than two se_beta columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two se_beta columns in the file." << endl;
+        n_error++;
       }
-    } else if (chisq_set.count(type)!=0) {
-      if (header.chisq_col==0) {
-	header.chisq_col=header.coln+1;
+    } else if (chisq_set.count(type) != 0) {
+      if (header.chisq_col == 0) {
+        header.chisq_col = header.coln + 1;
       } else {
-	cout<<"error! more than two z columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two z columns in the file." << endl;
+        n_error++;
       }
-    } else if (p_set.count(type)!=0) {
-      if (header.p_col==0) {
-	header.p_col=header.coln+1;
+    } else if (p_set.count(type) != 0) {
+      if (header.p_col == 0) {
+        header.p_col = header.coln + 1;
       } else {
-	cout<<"error! more than two p columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two p columns in the file." << endl;
+        n_error++;
       }
-    } else if (n_set.count(type)!=0) {
-      if (header.n_col==0) {
-	header.n_col=header.coln+1;
+    } else if (n_set.count(type) != 0) {
+      if (header.n_col == 0) {
+        header.n_col = header.coln + 1;
       } else {
-	cout<<"error! more than two n_total columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two n_total columns in the file." << endl;
+        n_error++;
       }
-    } else if (nmis_set.count(type)!=0) {
-      if (header.nmis_col==0) {
-	header.nmis_col=header.coln+1;
+    } else if (nmis_set.count(type) != 0) {
+      if (header.nmis_col == 0) {
+        header.nmis_col = header.coln + 1;
       } else {
-	cout<<"error! more than two n_mis columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two n_mis columns in the file." << endl;
+        n_error++;
       }
-    } else if (nobs_set.count(type)!=0) {
-      if (header.nobs_col==0) {
-	header.nobs_col=header.coln+1;
+    } else if (nobs_set.count(type) != 0) {
+      if (header.nobs_col == 0) {
+        header.nobs_col = header.coln + 1;
       } else {
-	cout<<"error! more than two n_obs columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two n_obs columns in the file." << endl;
+        n_error++;
       }
-    } else if (ws_set.count(type)!=0) {
-      if (header.ws_col==0) {
-	header.ws_col=header.coln+1;
+    } else if (ws_set.count(type) != 0) {
+      if (header.ws_col == 0) {
+        header.ws_col = header.coln + 1;
       } else {
-	cout<<"error! more than two window_size columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two window_size columns in the file." << endl;
+        n_error++;
       }
-    } else if (af_set.count(type)!=0) {
-      if (header.af_col==0) {
-	header.af_col=header.coln+1;
+    } else if (af_set.count(type) != 0) {
+      if (header.af_col == 0) {
+        header.af_col = header.coln + 1;
       } else {
-	cout<<"error! more than two af columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two af columns in the file." << endl;
+        n_error++;
       }
-    } else if (cor_set.count(type)!=0) {
-      if (header.cor_col==0) {
-	header.cor_col=header.coln+1;
+    } else if (cor_set.count(type) != 0) {
+      if (header.cor_col == 0) {
+        header.cor_col = header.coln + 1;
       } else {
-	cout<<"error! more than two cor columns in the file."<<endl;
-	n_error++;
+        cout << "error! more than two cor columns in the file." << endl;
+        n_error++;
       }
-    } else {}
+    } else {
+    }
 
-    ch_ptr=strtok (NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
     header.coln++;
   }
 
-  if (header.cor_col!=0 && header.cor_col!=header.coln) {
-    cout<<"error! the cor column should be the last column."<<endl;
+  if (header.cor_col != 0 && header.cor_col != header.coln) {
+    cout << "error! the cor column should be the last column." << endl;
     n_error++;
   }
 
-  if (header.rs_col==0) {
-    if (header.chr_col!=0 && header.pos_col!=0) {
-      cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl;
+  if (header.rs_col == 0) {
+    if (header.chr_col != 0 && header.pos_col != 0) {
+      cout << "missing an rs column. rs id will be replaced by chr:pos" << endl;
     } else {
-      cout<<"error! missing an rs column."<<endl; n_error++;
+      cout << "error! missing an rs column." << endl;
+      n_error++;
     }
   }
 
-  if (n_error==0) {return true;} else {return false;}
+  if (n_error == 0) {
+    return true;
+  } else {
+    return false;
+  }
 }
 
 // Read cov file the first time, record mapRS2in, mapRS2var (in case
 // var is not provided in the z file), store vec_n and vec_rs.
-void ReadFile_cor (const string &file_cor, const set<string> &setSnps,
-		   vector<string> &vec_rs, vector<size_t> &vec_n,
-		   vector<double> &vec_cm, vector<double> &vec_bp,
-		   map<string, size_t> &mapRS2in, map<string,
-		   double> &mapRS2var) {
+void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
+                  vector<string> &vec_rs, vector<size_t> &vec_n,
+                  vector<double> &vec_cm, vector<double> &vec_bp,
+                  map<string, size_t> &mapRS2in,
+                  map<string, double> &mapRS2var) {
   vec_rs.clear();
   vec_n.clear();
   mapRS2in.clear();
   mapRS2var.clear();
 
-  igzstream infile (file_cor.c_str(), igzstream::in);
+  igzstream infile(file_cor.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open cov file: "<<file_cor<<endl;
+    cout << "error! fail to open cov file: " << file_cor << endl;
     return;
   }
 
@@ -624,88 +654,124 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps,
   char *ch_ptr;
 
   string rs, chr, a1, a0, pos, cm;
-  double af=0, var_x=0, d_pos, d_cm;
-  size_t n_total=0, n_mis=0, n_obs=0, ni_total=0;
-  size_t ns_test=0, ns_total=0;
+  double af = 0, var_x = 0, d_pos, d_cm;
+  size_t n_total = 0, n_mis = 0, n_obs = 0, ni_total = 0;
+  size_t ns_test = 0, ns_total = 0;
 
   HEADER header;
 
   // Header.
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader_vc(line, header);
 
-  if (header.n_col==0 ) {
-    if (header.nobs_col==0 && header.nmis_col==0) {
-      cout<<"error! missing sample size in the cor file."<<endl;
+  if (header.n_col == 0) {
+    if (header.nobs_col == 0 && header.nmis_col == 0) {
+      cout << "error! missing sample size in the cor file." << endl;
     } else {
-      cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+      cout << "total sample size will be replaced by obs/mis sample size."
+           << endl;
     }
   }
 
   while (!safeGetline(infile, line).eof()) {
 
-    //do not read cor values this time; upto col_n-1.
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
-    n_total=0; n_mis=0; n_obs=0; af=0; var_x=0; d_cm=0; d_pos=0;
-    for (size_t i=0; i<header.coln-1; i++) {
-      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
-      if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
-      if (header.pos_col!=0 && header.pos_col==i+1) {
-	pos=ch_ptr; d_pos=atof(ch_ptr);
+    // do not read cor values this time; upto col_n-1.
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+    n_total = 0;
+    n_mis = 0;
+    n_obs = 0;
+    af = 0;
+    var_x = 0;
+    d_cm = 0;
+    d_pos = 0;
+    for (size_t i = 0; i < header.coln - 1; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      }
+      if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      }
+      if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+        d_pos = atof(ch_ptr);
       }
-      if (header.cm_col!=0 && header.cm_col==i+1) {
-	cm=ch_ptr; d_cm=atof(ch_ptr);
+      if (header.cm_col != 0 && header.cm_col == i + 1) {
+        cm = ch_ptr;
+        d_cm = atof(ch_ptr);
+      }
+      if (header.a1_col != 0 && header.a1_col == i + 1) {
+        a1 = ch_ptr;
+      }
+      if (header.a0_col != 0 && header.a0_col == i + 1) {
+        a0 = ch_ptr;
       }
-      if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
-      if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
 
-      if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
-      if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
-      if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+      if (header.n_col != 0 && header.n_col == i + 1) {
+        n_total = atoi(ch_ptr);
+      }
+      if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+        n_mis = atoi(ch_ptr);
+      }
+      if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+        n_obs = atoi(ch_ptr);
+      }
 
-      if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
-      if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+      if (header.af_col != 0 && header.af_col == i + 1) {
+        af = atof(ch_ptr);
+      }
+      if (header.var_col != 0 && header.var_col == i + 1) {
+        var_x = atof(ch_ptr);
+      }
 
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
 
-    if (header.rs_col==0) {
-      rs=chr+":"+pos;
+    if (header.rs_col == 0) {
+      rs = chr + ":" + pos;
     }
 
-    if (header.n_col==0) {
-      n_total=n_mis+n_obs;
+    if (header.n_col == 0) {
+      n_total = n_mis + n_obs;
     }
 
     // Record rs, n.
     vec_rs.push_back(rs);
     vec_n.push_back(n_total);
-    if (d_cm>0) {vec_cm.push_back(d_cm);} else {vec_cm.push_back(d_cm);}
-    if (d_pos>0) {vec_bp.push_back(d_pos);} else {vec_bp.push_back(d_pos);}
+    if (d_cm > 0) {
+      vec_cm.push_back(d_cm);
+    } else {
+      vec_cm.push_back(d_cm);
+    }
+    if (d_pos > 0) {
+      vec_bp.push_back(d_pos);
+    } else {
+      vec_bp.push_back(d_pos);
+    }
 
     // Record mapRS2in and mapRS2var.
-    if (setSnps.size()==0 || setSnps.count(rs)!=0) {
-      if (mapRS2in.count(rs)==0) {
-	mapRS2in[rs]=1;
+    if (setSnps.size() == 0 || setSnps.count(rs) != 0) {
+      if (mapRS2in.count(rs) == 0) {
+        mapRS2in[rs] = 1;
 
-	if (header.var_col!=0) {
-	  mapRS2var[rs]=var_x;
-	} else if (header.af_col!=0) {
-	  var_x=2.0*af*(1.0-af);
-	  mapRS2var[rs]=var_x;
-	} else {}
+        if (header.var_col != 0) {
+          mapRS2var[rs] = var_x;
+        } else if (header.af_col != 0) {
+          var_x = 2.0 * af * (1.0 - af);
+          mapRS2var[rs] = var_x;
+        } else {
+        }
 
-	ns_test++;
+        ns_test++;
 
       } else {
-	cout<<"error! more than one snp has the same id "<<rs<<
-	  " in cor file?"<<endl;
+        cout << "error! more than one snp has the same id " << rs
+             << " in cor file?" << endl;
       }
     }
 
     // Record max pos.
-    ni_total=max(ni_total, n_total);
+    ni_total = max(ni_total, n_total);
     ns_total++;
   }
 
@@ -717,19 +783,18 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps,
 
 // Read beta file, store mapRS2var if var is provided here, calculate
 // q and var_y.
-void ReadFile_beta (const bool flag_priorscale, const string &file_beta,
-		    const map<string, size_t> &mapRS2cat,
-		    map<string, size_t> &mapRS2in,
-		    map<string, double> &mapRS2var,
-		    map<string, size_t> &mapRS2nsamp,
-		    gsl_vector *q_vec, gsl_vector *qvar_vec,
-		    gsl_vector *s_vec, size_t &ni_total,
-		    size_t &ns_total) {
+void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
+                   const map<string, size_t> &mapRS2cat,
+                   map<string, size_t> &mapRS2in,
+                   map<string, double> &mapRS2var,
+                   map<string, size_t> &mapRS2nsamp, gsl_vector *q_vec,
+                   gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total,
+                   size_t &ns_total) {
   mapRS2nsamp.clear();
 
-  igzstream infile (file_beta.c_str(), igzstream::in);
+  igzstream infile(file_beta.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open beta file: "<<file_beta<<endl;
+    cout << "error! fail to open beta file: " << file_beta << endl;
     return;
   }
 
@@ -738,13 +803,15 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta,
   string type;
 
   string rs, chr, a1, a0, pos, cm;
-  double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
-  size_t n_total=0, n_mis=0, n_obs=0;
-  size_t ns_test=0;
-  ns_total=0; ni_total=0;
+  double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0,
+         af = 0, var_x = 0;
+  size_t n_total = 0, n_mis = 0, n_obs = 0;
+  size_t ns_test = 0;
+  ns_total = 0;
+  ni_total = 0;
 
   vector<double> vec_q, vec_qvar, vec_s;
-  for (size_t i=0; i<q_vec->size; i++) {
+  for (size_t i = 0; i < q_vec->size; i++) {
     vec_q.push_back(0.0);
     vec_qvar.push_back(0.0);
     vec_s.push_back(0.0);
@@ -753,122 +820,166 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta,
   // Read header.
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader_vc(line, header);
 
-  if (header.n_col==0 ) {
-    if (header.nobs_col==0 && header.nmis_col==0) {
-      cout<<"error! missing sample size in the beta file."<<endl;
+  if (header.n_col == 0) {
+    if (header.nobs_col == 0 && header.nmis_col == 0) {
+      cout << "error! missing sample size in the beta file." << endl;
     } else {
-      cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+      cout << "total sample size will be replaced by obs/mis sample size."
+           << endl;
     }
   }
 
-  if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) &&
-      header.chisq_col==0 && header.p_col==0) {
-    cout<<"error! missing z scores in the beta file."<<endl;
+  if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0) &&
+      header.chisq_col == 0 && header.p_col == 0) {
+    cout << "error! missing z scores in the beta file." << endl;
   }
 
-  if (header.af_col==0 && header.var_col==0 && mapRS2var.size()==0) {
-    cout<<"error! missing allele frequency in the beta file."<<endl;
+  if (header.af_col == 0 && header.var_col == 0 && mapRS2var.size() == 0) {
+    cout << "error! missing allele frequency in the beta file." << endl;
   }
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
-    z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
-    n_total=0; n_mis=0; n_obs=0; af=0; var_x=0;
-    for (size_t i=0; i<header.coln; i++) {
-      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
-      if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
-      if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
-      if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
-      if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
-      if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+    z = 0;
+    beta = 0;
+    se_beta = 0;
+    chisq = 0;
+    pvalue = 0;
+    n_total = 0;
+    n_mis = 0;
+    n_obs = 0;
+    af = 0;
+    var_x = 0;
+    for (size_t i = 0; i < header.coln; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      }
+      if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      }
+      if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+      }
+      if (header.cm_col != 0 && header.cm_col == i + 1) {
+        cm = ch_ptr;
+      }
+      if (header.a1_col != 0 && header.a1_col == i + 1) {
+        a1 = ch_ptr;
+      }
+      if (header.a0_col != 0 && header.a0_col == i + 1) {
+        a0 = ch_ptr;
+      }
 
-      if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
-      if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
-      if (header.sebeta_col!=0 && header.sebeta_col==i+1) {
-	se_beta=atof(ch_ptr);
+      if (header.z_col != 0 && header.z_col == i + 1) {
+        z = atof(ch_ptr);
+      }
+      if (header.beta_col != 0 && header.beta_col == i + 1) {
+        beta = atof(ch_ptr);
+      }
+      if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
+        se_beta = atof(ch_ptr);
+      }
+      if (header.chisq_col != 0 && header.chisq_col == i + 1) {
+        chisq = atof(ch_ptr);
+      }
+      if (header.p_col != 0 && header.p_col == i + 1) {
+        pvalue = atof(ch_ptr);
       }
-      if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
-      if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
 
-      if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
-      if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
-      if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+      if (header.n_col != 0 && header.n_col == i + 1) {
+        n_total = atoi(ch_ptr);
+      }
+      if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+        n_mis = atoi(ch_ptr);
+      }
+      if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+        n_obs = atoi(ch_ptr);
+      }
 
-      if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
-      if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+      if (header.af_col != 0 && header.af_col == i + 1) {
+        af = atof(ch_ptr);
+      }
+      if (header.var_col != 0 && header.var_col == i + 1) {
+        var_x = atof(ch_ptr);
+      }
 
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
 
-    if (header.rs_col==0) {
-      rs=chr+":"+pos;
+    if (header.rs_col == 0) {
+      rs = chr + ":" + pos;
     }
 
-    if (header.n_col==0) {
-      n_total=n_mis+n_obs;
+    if (header.n_col == 0) {
+      n_total = n_mis + n_obs;
     }
 
     // Both z values and beta/se_beta have directions, while
     // chisq/pvalue do not.
-    if (header.z_col!=0) {
-      zsquare=z*z;
-    } else if (header.beta_col!=0 && header.sebeta_col!=0) {
-      z=beta/se_beta;
-      zsquare=z*z;
-    } else if (header.chisq_col!=0) {
-      zsquare=chisq;
-    } else if (header.p_col!=0) {
-      zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
-    } else {zsquare=0;}
+    if (header.z_col != 0) {
+      zsquare = z * z;
+    } else if (header.beta_col != 0 && header.sebeta_col != 0) {
+      z = beta / se_beta;
+      zsquare = z * z;
+    } else if (header.chisq_col != 0) {
+      zsquare = chisq;
+    } else if (header.p_col != 0) {
+      zsquare = gsl_cdf_chisq_Qinv(pvalue, 1);
+    } else {
+      zsquare = 0;
+    }
 
     // If the snp is also present in cor file, then do calculations.
-    if ((header.var_col!=0 || header.af_col!=0 || mapRS2var.count(rs)!=0) &&
-	mapRS2in.count(rs)!=0 &&
-	(mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
-      if (mapRS2in.at(rs)>1) {
-	cout<<"error! more than one snp has the same id "<<rs<<
-	  " in beta file?"<<endl;
-	break;
+    if ((header.var_col != 0 || header.af_col != 0 ||
+         mapRS2var.count(rs) != 0) &&
+        mapRS2in.count(rs) != 0 &&
+        (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) {
+      if (mapRS2in.at(rs) > 1) {
+        cout << "error! more than one snp has the same id " << rs
+             << " in beta file?" << endl;
+        break;
       }
 
-      if (header.var_col==0) {
-	if (header.af_col!=0) {
-	  var_x=2.0*af*(1.0-af);
-	} else {
-	  var_x=mapRS2var.at(rs);
-	}
+      if (header.var_col == 0) {
+        if (header.af_col != 0) {
+          var_x = 2.0 * af * (1.0 - af);
+        } else {
+          var_x = mapRS2var.at(rs);
+        }
       }
 
-      if (flag_priorscale) {var_x=1;}
+      if (flag_priorscale) {
+        var_x = 1;
+      }
 
       mapRS2in[rs]++;
-      mapRS2var[rs]=var_x;
-      mapRS2nsamp[rs]=n_total;
-
-      if (mapRS2cat.size()!=0) {
-	vec_q[mapRS2cat.at(rs) ]+=(zsquare-1.0)*var_x/(double)n_total;
-	vec_s[mapRS2cat.at(rs) ]+=var_x;
-	vec_qvar[mapRS2cat.at(rs) ]+=
-	  var_x*var_x/((double)n_total*(double)n_total);
+      mapRS2var[rs] = var_x;
+      mapRS2nsamp[rs] = n_total;
+
+      if (mapRS2cat.size() != 0) {
+        vec_q[mapRS2cat.at(rs)] += (zsquare - 1.0) * var_x / (double)n_total;
+        vec_s[mapRS2cat.at(rs)] += var_x;
+        vec_qvar[mapRS2cat.at(rs)] +=
+            var_x * var_x / ((double)n_total * (double)n_total);
       } else {
-	vec_q[0]+=(zsquare-1.0)*var_x/(double)n_total;
-	vec_s[0]+=var_x;
-	vec_qvar[0]+=var_x*var_x/((double)n_total*(double)n_total);
+        vec_q[0] += (zsquare - 1.0) * var_x / (double)n_total;
+        vec_s[0] += var_x;
+        vec_qvar[0] += var_x * var_x / ((double)n_total * (double)n_total);
       }
 
-      ni_total=max(ni_total, n_total);
+      ni_total = max(ni_total, n_total);
       ns_test++;
     }
 
     ns_total++;
   }
 
-  for (size_t i=0; i<q_vec->size; i++) {
+  for (size_t i = 0; i < q_vec->size; i++) {
     gsl_vector_set(q_vec, i, vec_q[i]);
-    gsl_vector_set(qvar_vec, i, 2.0*vec_qvar[i]);
+    gsl_vector_set(qvar_vec, i, 2.0 * vec_qvar[i]);
     gsl_vector_set(s_vec, i, vec_s[i]);
   }
 
@@ -882,21 +993,20 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta,
 // Look for rs, n_mis+n_obs, var, window_size, cov.
 // If window_cm/bp/ns is provided, then use these max values to
 // calibrate estimates.
-void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
-		   const vector<size_t> &vec_n, const vector<double> &vec_cm,
-		   const vector<double> &vec_bp,
-		   const map<string, size_t> &mapRS2cat,
-		   const map<string, size_t> &mapRS2in,
-		   const map<string, double> &mapRS2var,
-		   const map<string, size_t> &mapRS2nsamp,
-		   const size_t crt, const double &window_cm,
-		   const double &window_bp, const double &window_ns,
-		   gsl_matrix *S_mat, gsl_matrix *Svar_mat,
-		   gsl_vector *qvar_vec, size_t &ni_total,
-		   size_t &ns_total, size_t &ns_test, size_t &ns_pair) {
-  igzstream infile (file_cor.c_str(), igzstream::in);
+void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
+                  const vector<size_t> &vec_n, const vector<double> &vec_cm,
+                  const vector<double> &vec_bp,
+                  const map<string, size_t> &mapRS2cat,
+                  const map<string, size_t> &mapRS2in,
+                  const map<string, double> &mapRS2var,
+                  const map<string, size_t> &mapRS2nsamp, const size_t crt,
+                  const double &window_cm, const double &window_bp,
+                  const double &window_ns, gsl_matrix *S_mat,
+                  gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total,
+                  size_t &ns_total, size_t &ns_test, size_t &ns_pair) {
+  igzstream infile(file_cor.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open cov file: "<<file_cor<<endl;
+    cout << "error! fail to open cov file: " << file_cor << endl;
     return;
   }
 
@@ -905,172 +1015,192 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
 
   string rs1, rs2;
   double d1, d2, d3, cor, var1, var2;
-  size_t n_nb, nsamp1, nsamp2, n12, bin_size=10, bin;
+  size_t n_nb, nsamp1, nsamp2, n12, bin_size = 10, bin;
 
-  vector<vector<double> > mat_S, mat_Svar, mat_tmp;
+  vector<vector<double>> mat_S, mat_Svar, mat_tmp;
   vector<double> vec_qvar, vec_tmp;
-  vector<vector<vector<double> > > mat3d_Sbin;
+  vector<vector<vector<double>>> mat3d_Sbin;
 
-  for (size_t i=0; i<S_mat->size1; i++) {
+  for (size_t i = 0; i < S_mat->size1; i++) {
     vec_qvar.push_back(0.0);
   }
 
-  for (size_t i=0; i<S_mat->size1; i++) {
+  for (size_t i = 0; i < S_mat->size1; i++) {
     mat_S.push_back(vec_qvar);
     mat_Svar.push_back(vec_qvar);
   }
 
-  for (size_t k=0; k<bin_size; k++) {
+  for (size_t k = 0; k < bin_size; k++) {
     vec_tmp.push_back(0.0);
   }
-  for (size_t i=0; i<S_mat->size1; i++) {
+  for (size_t i = 0; i < S_mat->size1; i++) {
     mat_tmp.push_back(vec_tmp);
   }
-  for (size_t i=0; i<S_mat->size1; i++) {
+  for (size_t i = 0; i < S_mat->size1; i++) {
     mat3d_Sbin.push_back(mat_tmp);
   }
 
   string rs, chr, a1, a0, type, pos, cm;
-  size_t n_total=0, n_mis=0, n_obs=0;
+  size_t n_total = 0, n_mis = 0, n_obs = 0;
   double d_pos1, d_pos2, d_pos, d_cm1, d_cm2, d_cm;
-  ns_test=0; ns_total=0; ns_pair=0; ni_total=0;
+  ns_test = 0;
+  ns_total = 0;
+  ns_pair = 0;
+  ni_total = 0;
 
   // Header.
   HEADER header;
 
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader_vc(line, header);
 
   while (!safeGetline(infile, line).eof()) {
 
     // Do not read cor values this time; upto col_n-1.
-    d_pos1=0; d_cm1=0;
-    ch_ptr=strtok ((char *)line.c_str(), " , \t");
-    for (size_t i=0; i<header.coln-1; i++) {
-      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
-      if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
-      if (header.pos_col!=0 && header.pos_col==i+1) {
-	pos=ch_ptr;
-	d_pos1=atof(ch_ptr);
+    d_pos1 = 0;
+    d_cm1 = 0;
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    for (size_t i = 0; i < header.coln - 1; i++) {
+      if (header.rs_col != 0 && header.rs_col == i + 1) {
+        rs = ch_ptr;
+      }
+      if (header.chr_col != 0 && header.chr_col == i + 1) {
+        chr = ch_ptr;
+      }
+      if (header.pos_col != 0 && header.pos_col == i + 1) {
+        pos = ch_ptr;
+        d_pos1 = atof(ch_ptr);
+      }
+      if (header.cm_col != 0 && header.cm_col == i + 1) {
+        cm = ch_ptr;
+        d_cm1 = atof(ch_ptr);
       }
-      if (header.cm_col!=0 && header.cm_col==i+1) {
-	cm=ch_ptr;
-	d_cm1=atof(ch_ptr);
+      if (header.a1_col != 0 && header.a1_col == i + 1) {
+        a1 = ch_ptr;
+      }
+      if (header.a0_col != 0 && header.a0_col == i + 1) {
+        a0 = ch_ptr;
       }
-      if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
-      if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
 
-      if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
-      if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
-      if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+      if (header.n_col != 0 && header.n_col == i + 1) {
+        n_total = atoi(ch_ptr);
+      }
+      if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+        n_mis = atoi(ch_ptr);
+      }
+      if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+        n_obs = atoi(ch_ptr);
+      }
 
-      ch_ptr=strtok (NULL, " , \t");
+      ch_ptr = strtok(NULL, " , \t");
     }
 
-    if (header.rs_col==0) {
-      rs=chr+":"+pos;
+    if (header.rs_col == 0) {
+      rs = chr + ":" + pos;
     }
 
-    if (header.n_col==0) {
-      n_total=n_mis+n_obs;
+    if (header.n_col == 0) {
+      n_total = n_mis + n_obs;
     }
 
-    rs1=rs;
-
-    if ( (mapRS2cat.size()==0 || mapRS2cat.count(rs1)!=0) &&
-	 mapRS2in.count(rs1)!=0 && mapRS2in.at(rs1)==2) {
-      var1=mapRS2var.at(rs1);
-      nsamp1=mapRS2nsamp.at(rs1);
-      d2=var1*var1;
-
-      if (mapRS2cat.size()!=0) {
-	mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ]+=
-	  (1-1.0/(double)vec_n[ns_total])*d2;
-	mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ]+=
-	  d2*d2/((double)vec_n[ns_total]*(double)vec_n[ns_total]);
-	if (crt==1) {
-	  mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ][0]+=
-	    (1-1.0/(double)vec_n[ns_total])*d2;
-	}
+    rs1 = rs;
+
+    if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs1) != 0) &&
+        mapRS2in.count(rs1) != 0 && mapRS2in.at(rs1) == 2) {
+      var1 = mapRS2var.at(rs1);
+      nsamp1 = mapRS2nsamp.at(rs1);
+      d2 = var1 * var1;
+
+      if (mapRS2cat.size() != 0) {
+        mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] +=
+            (1 - 1.0 / (double)vec_n[ns_total]) * d2;
+        mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] +=
+            d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]);
+        if (crt == 1) {
+          mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)][0] +=
+              (1 - 1.0 / (double)vec_n[ns_total]) * d2;
+        }
       } else {
-	mat_S[0][0]+=(1-1.0/(double)vec_n[ns_total])*d2;
-	mat_Svar[0][0]+=
-	  d2*d2/((double)vec_n[ns_total]*(double)vec_n[ns_total]);
-	if (crt==1) {
-	  mat3d_Sbin[0][0][0]+=(1-1.0/(double)vec_n[ns_total])*d2;
-	}
-      }
-
-      n_nb=0;
-      while(ch_ptr!=NULL) {
-	type=ch_ptr;
-	if (type.compare("NA")!=0 && type.compare("na")!=0 &&
-	    type.compare("nan")!=0 && type.compare("-nan")!=0) {
-	  cor=atof(ch_ptr);
-	  rs2=vec_rs[ns_total+n_nb+1];
-	  d_pos2=vec_bp[ns_total+n_nb+1];
-	  d_cm2=vec_cm[ns_total+n_nb+1];
-	  d_pos=abs(d_pos2-d_pos1);
-	  d_cm=abs(d_cm2-d_cm1);
-
-	  if ( (mapRS2cat.size()==0 || mapRS2cat.count(rs2)!=0) &&
-	       mapRS2in.count(rs2)!=0 && mapRS2in.at(rs2)==2) {
-	    var2=mapRS2var.at(rs2);
-	    nsamp2=mapRS2nsamp.at(rs2);
-	    d1=cor*cor-1.0/(double)min(vec_n[ns_total],
-				       vec_n[ns_total+n_nb+1]);
-	    d2=var1*var2;
-	    d3=cor*cor/((double)nsamp1*(double)nsamp2);
-	    n12=min(vec_n[ns_total], vec_n[ns_total+n_nb+1]);
-
-	    // Compute bin.
-	    if (crt==1) {
-	      if (window_cm!=0 && d_cm1!=0 && d_cm2!=0) {
-		bin=min( (int)floor(d_cm/window_cm*bin_size), (int)bin_size);
-	      } else if (window_bp!=0 && d_pos1!=0 && d_pos2!=0) {
-		bin=min( (int)floor(d_pos/window_bp*bin_size), (int)bin_size);
-	      } else if (window_ns!=0) {
-		bin=min( (int)floor(((double)n_nb+1)/window_ns*bin_size),
-			 (int)bin_size);
-	      }
-	    }
-
-	    if (mapRS2cat.size()!=0) {
-	      if (mapRS2cat.at(rs1)==mapRS2cat.at(rs2)) {
-		vec_qvar[mapRS2cat.at(rs1)]+=2*d3*d2;
-		mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=2*d1*d2;
-		mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=
-		  2*d2*d2/((double)n12*(double)n12);
-		if (crt==1) {
-		  mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ][bin]+=
-		    2*d1*d2;
-		}
-	      } else {
-		mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=d1*d2;
-		mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=
-		  d2*d2/((double)n12*(double)n12);
-		if (crt==1) {
-		  mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ][bin]+=
-		    d1*d2;
-		}
-	      }
-	    } else {
-	      vec_qvar[0]+=2*d3*d2;
-	      mat_S[0][0]+=2*d1*d2;
-	      mat_Svar[0][0]+=2*d2*d2/((double)n12*(double)n12);
-
-	      if (crt==1) {
-		mat3d_Sbin[0][0][bin]+=2*d1*d2;
-	      }
-	    }
-	    ns_pair++;
-	  }
-	}
-
-	ch_ptr=strtok (NULL, " , \t");
-	n_nb++;
-      }
-      ni_total=max(ni_total, n_total);
+        mat_S[0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2;
+        mat_Svar[0][0] +=
+            d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]);
+        if (crt == 1) {
+          mat3d_Sbin[0][0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2;
+        }
+      }
+
+      n_nb = 0;
+      while (ch_ptr != NULL) {
+        type = ch_ptr;
+        if (type.compare("NA") != 0 && type.compare("na") != 0 &&
+            type.compare("nan") != 0 && type.compare("-nan") != 0) {
+          cor = atof(ch_ptr);
+          rs2 = vec_rs[ns_total + n_nb + 1];
+          d_pos2 = vec_bp[ns_total + n_nb + 1];
+          d_cm2 = vec_cm[ns_total + n_nb + 1];
+          d_pos = abs(d_pos2 - d_pos1);
+          d_cm = abs(d_cm2 - d_cm1);
+
+          if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs2) != 0) &&
+              mapRS2in.count(rs2) != 0 && mapRS2in.at(rs2) == 2) {
+            var2 = mapRS2var.at(rs2);
+            nsamp2 = mapRS2nsamp.at(rs2);
+            d1 = cor * cor -
+                 1.0 / (double)min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]);
+            d2 = var1 * var2;
+            d3 = cor * cor / ((double)nsamp1 * (double)nsamp2);
+            n12 = min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]);
+
+            // Compute bin.
+            if (crt == 1) {
+              if (window_cm != 0 && d_cm1 != 0 && d_cm2 != 0) {
+                bin =
+                    min((int)floor(d_cm / window_cm * bin_size), (int)bin_size);
+              } else if (window_bp != 0 && d_pos1 != 0 && d_pos2 != 0) {
+                bin = min((int)floor(d_pos / window_bp * bin_size),
+                          (int)bin_size);
+              } else if (window_ns != 0) {
+                bin = min((int)floor(((double)n_nb + 1) / window_ns * bin_size),
+                          (int)bin_size);
+              }
+            }
+
+            if (mapRS2cat.size() != 0) {
+              if (mapRS2cat.at(rs1) == mapRS2cat.at(rs2)) {
+                vec_qvar[mapRS2cat.at(rs1)] += 2 * d3 * d2;
+                mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += 2 * d1 * d2;
+                mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] +=
+                    2 * d2 * d2 / ((double)n12 * (double)n12);
+                if (crt == 1) {
+                  mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] +=
+                      2 * d1 * d2;
+                }
+              } else {
+                mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += d1 * d2;
+                mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] +=
+                    d2 * d2 / ((double)n12 * (double)n12);
+                if (crt == 1) {
+                  mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] +=
+                      d1 * d2;
+                }
+              }
+            } else {
+              vec_qvar[0] += 2 * d3 * d2;
+              mat_S[0][0] += 2 * d1 * d2;
+              mat_Svar[0][0] += 2 * d2 * d2 / ((double)n12 * (double)n12);
+
+              if (crt == 1) {
+                mat3d_Sbin[0][0][bin] += 2 * d1 * d2;
+              }
+            }
+            ns_pair++;
+          }
+        }
+
+        ch_ptr = strtok(NULL, " , \t");
+        n_nb++;
+      }
+      ni_total = max(ni_total, n_total);
       ns_test++;
     }
 
@@ -1081,70 +1211,83 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
   // x=seq(0.5,bin_size-0.5,by=1) and then compute a correlation
   // factor as a percentage.
   double a, b, x, y, n, var_y, var_x, mean_y, mean_x, cov_xy, crt_factor;
-  if (crt==1) {
-    for (size_t i=0; i<S_mat->size1; i++) {
-      for (size_t j=i; j<S_mat->size2; j++) {
-
-	// Correct mat_S.
-	n=0; var_y=0; var_x=0; mean_y=0; mean_x=0; cov_xy=0;
-	for (size_t k=0; k<bin_size; k++) {
-	  if (j==i) {
-	    y=mat3d_Sbin[i][j][k];
-	  } else {
-	    y=mat3d_Sbin[i][j][k]+mat3d_Sbin[j][i][k];
-	  }
-	  x=k+0.5;
-	  cout<<y<<", ";
-	  if (y>0) {
-	    y=1/sqrt(y);
-	    mean_x+=x; mean_y+=y; var_x+=x*x; var_y+=y*y; cov_xy+=x*y;
-	    n++;
-	  }
-	}
-	cout<<endl;
-
-	if (n>=5) {
-	  mean_x/=n; mean_y/=n; var_x/=n; var_y/=n; cov_xy/=n;
-	  var_x-=mean_x*mean_x; var_y-=mean_y*mean_y; cov_xy-=mean_x*mean_y;
-	  b=cov_xy/var_x;
-	  a=mean_y-b*mean_x;
-	  crt_factor=a/(b*(bin_size+0.5))+1;
-	  if (i==j) {
-	    mat_S[i][j]*=crt_factor;
-	  } else {
-	    mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor;
-	  }
-	  cout<<crt_factor<<endl;
-
-	  // Correct qvar.
-	  if (i==j) {
-	    vec_qvar[i]*=crt_factor;
-	  }
-	}
+  if (crt == 1) {
+    for (size_t i = 0; i < S_mat->size1; i++) {
+      for (size_t j = i; j < S_mat->size2; j++) {
+
+        // Correct mat_S.
+        n = 0;
+        var_y = 0;
+        var_x = 0;
+        mean_y = 0;
+        mean_x = 0;
+        cov_xy = 0;
+        for (size_t k = 0; k < bin_size; k++) {
+          if (j == i) {
+            y = mat3d_Sbin[i][j][k];
+          } else {
+            y = mat3d_Sbin[i][j][k] + mat3d_Sbin[j][i][k];
+          }
+          x = k + 0.5;
+          cout << y << ", ";
+          if (y > 0) {
+            y = 1 / sqrt(y);
+            mean_x += x;
+            mean_y += y;
+            var_x += x * x;
+            var_y += y * y;
+            cov_xy += x * y;
+            n++;
+          }
+        }
+        cout << endl;
+
+        if (n >= 5) {
+          mean_x /= n;
+          mean_y /= n;
+          var_x /= n;
+          var_y /= n;
+          cov_xy /= n;
+          var_x -= mean_x * mean_x;
+          var_y -= mean_y * mean_y;
+          cov_xy -= mean_x * mean_y;
+          b = cov_xy / var_x;
+          a = mean_y - b * mean_x;
+          crt_factor = a / (b * (bin_size + 0.5)) + 1;
+          if (i == j) {
+            mat_S[i][j] *= crt_factor;
+          } else {
+            mat_S[i][j] *= crt_factor;
+            mat_S[j][i] *= crt_factor;
+          }
+          cout << crt_factor << endl;
+
+          // Correct qvar.
+          if (i == j) {
+            vec_qvar[i] *= crt_factor;
+          }
+        }
       }
     }
   }
 
   // Save to gsl_vector and gsl_matrix: qvar_vec, S_mat, Svar_mat.
-  for (size_t i=0; i<S_mat->size1; i++) {
-    d1=gsl_vector_get(qvar_vec, i)+2*vec_qvar[i];
+  for (size_t i = 0; i < S_mat->size1; i++) {
+    d1 = gsl_vector_get(qvar_vec, i) + 2 * vec_qvar[i];
     gsl_vector_set(qvar_vec, i, d1);
-    for (size_t j=0; j<S_mat->size2; j++) {
-      if (i==j) {
-	gsl_matrix_set(S_mat, i, j, mat_S[i][i]);
-	gsl_matrix_set(Svar_mat, i, j,
-		       2.0*mat_Svar[i][i]*ns_test*ns_test/(2.0*ns_pair) );
+    for (size_t j = 0; j < S_mat->size2; j++) {
+      if (i == j) {
+        gsl_matrix_set(S_mat, i, j, mat_S[i][i]);
+        gsl_matrix_set(Svar_mat, i, j, 2.0 * mat_Svar[i][i] * ns_test *
+                                           ns_test / (2.0 * ns_pair));
       } else {
-	gsl_matrix_set(S_mat, i, j, mat_S[i][j]+mat_S[j][i]);
-	gsl_matrix_set(Svar_mat, i, j,
-		       2.0*(mat_Svar[i][j]+mat_Svar[j][i])*
-		       ns_test*ns_test/(2.0*ns_pair) );
+        gsl_matrix_set(S_mat, i, j, mat_S[i][j] + mat_S[j][i]);
+        gsl_matrix_set(Svar_mat, i, j, 2.0 * (mat_Svar[i][j] + mat_Svar[j][i]) *
+                                           ns_test * ns_test / (2.0 * ns_pair));
       }
     }
   }
 
-
-
   infile.clear();
   infile.close();
 
@@ -1157,170 +1300,175 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
 // compute the variance for S, use a set of genotypes, phenotypes, and
 // individual ids, and snp category label.
 void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
-	      const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
-	      const gsl_vector *s_vec, const double df,
-	      vector<double> &v_pve, vector<double> &v_se_pve,
-	      double &pve_total, double &se_pve_total,
-	      vector<double> &v_sigma2, vector<double> &v_se_sigma2,
-	      vector<double> &v_enrich, vector<double> &v_se_enrich) {
-  size_t n_vc=S_mat->size1;
-
-  gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
-
-  gsl_vector *pve=gsl_vector_alloc (n_vc);
-  gsl_vector *pve_plus=gsl_vector_alloc (n_vc+1);
-  gsl_vector *tmp=gsl_vector_alloc (n_vc+1);
-  gsl_vector *sigma2persnp=gsl_vector_alloc (n_vc);
-  gsl_vector *enrich=gsl_vector_alloc (n_vc);
-  gsl_vector *se_pve=gsl_vector_alloc (n_vc);
-  gsl_vector *se_sigma2persnp=gsl_vector_alloc (n_vc);
-  gsl_vector *se_enrich=gsl_vector_alloc (n_vc);
+              const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
+              const gsl_vector *s_vec, const double df, vector<double> &v_pve,
+              vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
+              vector<double> &v_sigma2, vector<double> &v_se_sigma2,
+              vector<double> &v_enrich, vector<double> &v_se_enrich) {
+  size_t n_vc = S_mat->size1;
+
+  gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
+
+  gsl_vector *pve = gsl_vector_alloc(n_vc);
+  gsl_vector *pve_plus = gsl_vector_alloc(n_vc + 1);
+  gsl_vector *tmp = gsl_vector_alloc(n_vc + 1);
+  gsl_vector *sigma2persnp = gsl_vector_alloc(n_vc);
+  gsl_vector *enrich = gsl_vector_alloc(n_vc);
+  gsl_vector *se_pve = gsl_vector_alloc(n_vc);
+  gsl_vector *se_sigma2persnp = gsl_vector_alloc(n_vc);
+  gsl_vector *se_enrich = gsl_vector_alloc(n_vc);
 
   double d;
 
   // Calculate S^{-1}q.
-  gsl_matrix_memcpy (tmp_mat, S_mat);
+  gsl_matrix_memcpy(tmp_mat, S_mat);
   int sig;
-  gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
-  LUDecomp (tmp_mat, pmt, &sig);
-  LUInvert (tmp_mat, pmt, Si_mat);
+  gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
+  LUDecomp(tmp_mat, pmt, &sig);
+  LUInvert(tmp_mat, pmt, Si_mat);
 
   // Calculate sigma2snp and pve.
-  gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
   gsl_vector_memcpy(sigma2persnp, pve);
   gsl_vector_div(sigma2persnp, s_vec);
 
   // Get qvar_mat.
-  gsl_matrix_memcpy (qvar_mat, Vq);
-  gsl_matrix_scale (qvar_mat, 1.0/(df*df));
+  gsl_matrix_memcpy(qvar_mat, Vq);
+  gsl_matrix_scale(qvar_mat, 1.0 / (df * df));
 
   // Calculate variance for these estimates.
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t j=i; j<n_vc; j++) {
-      d=gsl_matrix_get(Svar_mat, i, j);
-      d*=gsl_vector_get(pve, i)*gsl_vector_get(pve, j);
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t j = i; j < n_vc; j++) {
+      d = gsl_matrix_get(Svar_mat, i, j);
+      d *= gsl_vector_get(pve, i) * gsl_vector_get(pve, j);
 
-      d+=gsl_matrix_get(qvar_mat, i, j);
+      d += gsl_matrix_get(qvar_mat, i, j);
       gsl_matrix_set(Var_mat, i, j, d);
-      if (i!=j) {gsl_matrix_set(Var_mat, j, i, d);}
+      if (i != j) {
+        gsl_matrix_set(Var_mat, j, i, d);
+      }
     }
   }
 
-  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat,
-		 0.0, tmp_mat);
-  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat,
-		 0.0, Var_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0,
+                 tmp_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
+                 Var_mat);
 
-  for (size_t i=0; i<n_vc; i++) {
-    d=sqrt(gsl_matrix_get(Var_mat, i, i));
+  for (size_t i = 0; i < n_vc; i++) {
+    d = sqrt(gsl_matrix_get(Var_mat, i, i));
     gsl_vector_set(se_pve, i, d);
-    d/=gsl_vector_get(s_vec, i);
+    d /= gsl_vector_get(s_vec, i);
     gsl_vector_set(se_sigma2persnp, i, d);
   }
 
   // Compute pve_total, se_pve_total.
-  pve_total=0; se_pve_total=0;
-  for (size_t i=0; i<n_vc; i++) {
-    pve_total+=gsl_vector_get(pve, i);
+  pve_total = 0;
+  se_pve_total = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    pve_total += gsl_vector_get(pve, i);
 
-    for (size_t j=0; j<n_vc; j++) {
-      se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+    for (size_t j = 0; j < n_vc; j++) {
+      se_pve_total += gsl_matrix_get(Var_mat, i, j);
     }
   }
-  se_pve_total=sqrt(se_pve_total);
+  se_pve_total = sqrt(se_pve_total);
 
   // Compute enrichment and its variance.
-  double s_pve=0, s_snp=0;
-  for (size_t i=0; i<n_vc; i++) {
-    s_pve+=gsl_vector_get(pve, i);
-    s_snp+=gsl_vector_get(s_vec, i);
+  double s_pve = 0, s_snp = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    s_pve += gsl_vector_get(pve, i);
+    s_snp += gsl_vector_get(s_vec, i);
   }
-  gsl_vector_memcpy (enrich, sigma2persnp);
-  gsl_vector_scale (enrich, s_snp/s_pve);
+  gsl_vector_memcpy(enrich, sigma2persnp);
+  gsl_vector_scale(enrich, s_snp / s_pve);
 
   gsl_matrix_set_identity(tmp_mat);
 
   double d1;
-  for (size_t i=0; i<n_vc; i++) {
-    d=gsl_vector_get(pve, i)/s_pve;
-    d1=gsl_vector_get(s_vec, i);
-    for (size_t j=0; j<n_vc; j++) {
-      if (i==j) {
-	gsl_matrix_set(tmp_mat, i, j, (1-d)/d1*s_snp/s_pve);
+  for (size_t i = 0; i < n_vc; i++) {
+    d = gsl_vector_get(pve, i) / s_pve;
+    d1 = gsl_vector_get(s_vec, i);
+    for (size_t j = 0; j < n_vc; j++) {
+      if (i == j) {
+        gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve);
       } else {
-	gsl_matrix_set(tmp_mat, i, j, -1*d/d1*s_snp/s_pve);
+        gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve);
       }
     }
   }
   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0,
-		 tmp_mat1);
+                 tmp_mat1);
   gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0,
-		 VarEnrich_mat);
+                 VarEnrich_mat);
 
-  for (size_t i=0; i<n_vc; i++) {
-    d=sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
+  for (size_t i = 0; i < n_vc; i++) {
+    d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
     gsl_vector_set(se_enrich, i, d);
   }
 
-  cout<<"pve = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<gsl_vector_get(pve, i)<<" ";
+  cout << "pve = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << gsl_vector_get(pve, i) << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(pve) = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<gsl_vector_get(se_pve, i)<<" ";
+  cout << "se(pve) = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << gsl_vector_get(se_pve, i) << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"sigma2 per snp = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<gsl_vector_get(sigma2persnp, i)<<" ";
+  cout << "sigma2 per snp = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << gsl_vector_get(sigma2persnp, i) << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(sigma2 per snp) = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<gsl_vector_get(se_sigma2persnp, i)<<" ";
+  cout << "se(sigma2 per snp) = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << gsl_vector_get(se_sigma2persnp, i) << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"enrichment = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<gsl_vector_get(enrich, i)<<" ";
+  cout << "enrichment = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << gsl_vector_get(enrich, i) << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(enrichment) = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<gsl_vector_get(se_enrich, i)<<" ";
+  cout << "se(enrichment) = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << gsl_vector_get(se_enrich, i) << " ";
   }
-  cout<<endl;
+  cout << endl;
 
   // Save data.
-  v_pve.clear(); v_se_pve.clear();
-  v_sigma2.clear(); v_se_sigma2.clear();
-  v_enrich.clear(); v_se_enrich.clear();
-  for (size_t i=0; i<n_vc; i++) {
-    d=gsl_vector_get(pve, i);
+  v_pve.clear();
+  v_se_pve.clear();
+  v_sigma2.clear();
+  v_se_sigma2.clear();
+  v_enrich.clear();
+  v_se_enrich.clear();
+  for (size_t i = 0; i < n_vc; i++) {
+    d = gsl_vector_get(pve, i);
     v_pve.push_back(d);
-    d=gsl_vector_get(se_pve, i);
+    d = gsl_vector_get(se_pve, i);
     v_se_pve.push_back(d);
 
-    d=gsl_vector_get(sigma2persnp, i);
+    d = gsl_vector_get(sigma2persnp, i);
     v_sigma2.push_back(d);
-    d=gsl_vector_get(se_sigma2persnp, i);
+    d = gsl_vector_get(se_sigma2persnp, i);
     v_se_sigma2.push_back(d);
 
-    d=gsl_vector_get(enrich, i);
+    d = gsl_vector_get(enrich, i);
     v_enrich.push_back(d);
-    d=gsl_vector_get(se_enrich, i);
+    d = gsl_vector_get(se_enrich, i);
     v_se_enrich.push_back(d);
   }
 
@@ -1345,196 +1493,206 @@ void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
 }
 
 // Ks are not scaled.
-void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W,
-		   const gsl_vector *y) {
-  size_t n1=K->size1, n2=K->size2;
-  size_t n_vc=n2/n1;
+void VC::CalcVChe(const gsl_matrix *K, const gsl_matrix *W,
+                  const gsl_vector *y) {
+  size_t n1 = K->size1, n2 = K->size2;
+  size_t n_vc = n2 / n1;
 
-  double r=(double)n1/(double)(n1 - W->size2);
+  double r = (double)n1 / (double)(n1 - W->size2);
   double var_y, var_y_new;
   double d, tr, s, v;
   vector<double> traceG_new;
 
   // New matrices/vectors.
-  gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2);
-  gsl_vector *y_scale=gsl_vector_alloc (n1);
-  gsl_matrix *Kry=gsl_matrix_alloc (n1, n_vc);
-  gsl_matrix *yKrKKry=gsl_matrix_alloc (n_vc, n_vc*(n_vc+1) );
-  gsl_vector *KKry=gsl_vector_alloc (n1);
+  gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
+  gsl_vector *y_scale = gsl_vector_alloc(n1);
+  gsl_matrix *Kry = gsl_matrix_alloc(n1, n_vc);
+  gsl_matrix *yKrKKry = gsl_matrix_alloc(n_vc, n_vc * (n_vc + 1));
+  gsl_vector *KKry = gsl_vector_alloc(n1);
 
   // Old matrices/vectors.
-  gsl_vector *pve=gsl_vector_alloc (n_vc);
-  gsl_vector *se_pve=gsl_vector_alloc (n_vc);
-  gsl_vector *q_vec=gsl_vector_alloc (n_vc);
-  gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_vector *pve = gsl_vector_alloc(n_vc);
+  gsl_vector *se_pve = gsl_vector_alloc(n_vc);
+  gsl_vector *q_vec = gsl_vector_alloc(n_vc);
+  gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
 
   // Center and scale K by W.
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     gsl_matrix_view Kscale_sub =
-      gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+        gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
     gsl_matrix_const_view K_sub =
-      gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1);
-    gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix);
+        gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1);
+    gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix);
 
-    CenterMatrix (&Kscale_sub.matrix, W);
-    d=ScaleMatrix (&Kscale_sub.matrix);
+    CenterMatrix(&Kscale_sub.matrix, W);
+    d = ScaleMatrix(&Kscale_sub.matrix);
     traceG_new.push_back(d);
   }
 
   // Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1).
-  gsl_vector_memcpy (y_scale, y);
-  CenterVector (y_scale, W);
+  gsl_vector_memcpy(y_scale, y);
+  CenterVector(y_scale, W);
 
-  var_y=VectorVar (y);
-  var_y_new=VectorVar (y_scale);
+  var_y = VectorVar(y);
+  var_y_new = VectorVar(y_scale);
 
-  StandardizeVector (y_scale);
+  StandardizeVector(y_scale);
 
   // Compute Kry, which is used for confidence interval; also compute
   // q_vec (*n^2).
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     gsl_matrix_const_view Kscale_sub =
-      gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
-    gsl_vector_view Kry_col=gsl_matrix_column (Kry, i);
+        gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
+    gsl_vector_view Kry_col = gsl_matrix_column(Kry, i);
 
-    gsl_vector_memcpy (&Kry_col.vector, y_scale);
-    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0*r,
-		   &Kry_col.vector);
+    gsl_vector_memcpy(&Kry_col.vector, y_scale);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0 * r,
+                   &Kry_col.vector);
 
-    gsl_blas_ddot (&Kry_col.vector, y_scale, &d);
+    gsl_blas_ddot(&Kry_col.vector, y_scale, &d);
     gsl_vector_set(q_vec, i, d);
   }
 
   // Compute yKrKKry, which is used later for confidence interval.
-  for (size_t i=0; i<n_vc; i++) {
-    gsl_vector_const_view Kry_coli=gsl_matrix_const_column (Kry, i);
-    for (size_t j=i; j<n_vc; j++) {
-      gsl_vector_const_view Kry_colj=gsl_matrix_const_column (Kry, j);
-      for (size_t l=0; l<n_vc; l++) {
-	gsl_matrix_const_view Kscale_sub =
-	  gsl_matrix_const_submatrix (K_scale, 0, n1*l, n1, n1);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, &Kscale_sub.matrix,
-			&Kry_coli.vector, 0.0, KKry);
-	gsl_blas_ddot (&Kry_colj.vector, KKry, &d);
-	gsl_matrix_set(yKrKKry, i, l*n_vc+j, d);
-	if (i!=j) {gsl_matrix_set(yKrKKry, j, l*n_vc+i, d);}
+  for (size_t i = 0; i < n_vc; i++) {
+    gsl_vector_const_view Kry_coli = gsl_matrix_const_column(Kry, i);
+    for (size_t j = i; j < n_vc; j++) {
+      gsl_vector_const_view Kry_colj = gsl_matrix_const_column(Kry, j);
+      for (size_t l = 0; l < n_vc; l++) {
+        gsl_matrix_const_view Kscale_sub =
+            gsl_matrix_const_submatrix(K_scale, 0, n1 * l, n1, n1);
+        gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, &Kry_coli.vector,
+                       0.0, KKry);
+        gsl_blas_ddot(&Kry_colj.vector, KKry, &d);
+        gsl_matrix_set(yKrKKry, i, l * n_vc + j, d);
+        if (i != j) {
+          gsl_matrix_set(yKrKKry, j, l * n_vc + i, d);
+        }
+      }
+      gsl_blas_ddot(&Kry_coli.vector, &Kry_colj.vector, &d);
+      gsl_matrix_set(yKrKKry, i, n_vc * n_vc + j, d);
+      if (i != j) {
+        gsl_matrix_set(yKrKKry, j, n_vc * n_vc + i, d);
       }
-      gsl_blas_ddot (&Kry_coli.vector, &Kry_colj.vector, &d);
-      gsl_matrix_set(yKrKKry, i, n_vc*n_vc+j, d);
-      if (i!=j) {gsl_matrix_set(yKrKKry, j, n_vc*n_vc+i, d);}
     }
   }
 
   // Compute Sij (*n^2).
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t j=i; j<n_vc; j++) {
-      tr=0;
-      for (size_t l=0; l<n1; l++) {
-	gsl_vector_const_view Ki_col=gsl_matrix_const_column (K_scale, i*n1+l);
-	gsl_vector_const_view Kj_col=gsl_matrix_const_column (K_scale, j*n1+l);
-	gsl_blas_ddot (&Ki_col.vector, &Kj_col.vector, &d);
-	tr+=d;
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t j = i; j < n_vc; j++) {
+      tr = 0;
+      for (size_t l = 0; l < n1; l++) {
+        gsl_vector_const_view Ki_col =
+            gsl_matrix_const_column(K_scale, i * n1 + l);
+        gsl_vector_const_view Kj_col =
+            gsl_matrix_const_column(K_scale, j * n1 + l);
+        gsl_blas_ddot(&Ki_col.vector, &Kj_col.vector, &d);
+        tr += d;
       }
 
-      tr=tr-r*(double)n1;
-      gsl_matrix_set (S_mat, i, j, tr);
-      if (i!=j) {gsl_matrix_set (S_mat, j, i, tr);}
+      tr = tr - r * (double)n1;
+      gsl_matrix_set(S_mat, i, j, tr);
+      if (i != j) {
+        gsl_matrix_set(S_mat, j, i, tr);
+      }
     }
   }
 
   // Compute S^{-1}q.
   int sig;
-  gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
-  LUDecomp (S_mat, pmt, &sig);
-  LUInvert (S_mat, pmt, Si_mat);
+  gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
+  LUDecomp(S_mat, pmt, &sig);
+  LUInvert(S_mat, pmt, Si_mat);
 
   // Compute pve (on the transformed scale).
-  gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
 
   // Compute q_var (*n^4).
-  gsl_matrix_set_zero (qvar_mat);
-  s=1;
-  for (size_t i=0; i<n_vc; i++) {
-    d=gsl_vector_get(pve, i);
-    gsl_matrix_view yKrKKry_sub=
-      gsl_matrix_submatrix(yKrKKry, 0, i*n_vc, n_vc, n_vc);
-    gsl_matrix_memcpy (tmp_mat, &yKrKKry_sub.matrix);
+  gsl_matrix_set_zero(qvar_mat);
+  s = 1;
+  for (size_t i = 0; i < n_vc; i++) {
+    d = gsl_vector_get(pve, i);
+    gsl_matrix_view yKrKKry_sub =
+        gsl_matrix_submatrix(yKrKKry, 0, i * n_vc, n_vc, n_vc);
+    gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix);
     gsl_matrix_scale(tmp_mat, d);
-    gsl_matrix_add (qvar_mat, tmp_mat);
-    s-=d;
+    gsl_matrix_add(qvar_mat, tmp_mat);
+    s -= d;
   }
-  gsl_matrix_view yKrKKry_sub=gsl_matrix_submatrix(yKrKKry, 0, n_vc*n_vc,
-						   n_vc, n_vc);
-  gsl_matrix_memcpy (tmp_mat, &yKrKKry_sub.matrix);
+  gsl_matrix_view yKrKKry_sub =
+      gsl_matrix_submatrix(yKrKKry, 0, n_vc * n_vc, n_vc, n_vc);
+  gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix);
   gsl_matrix_scale(tmp_mat, s);
-  gsl_matrix_add (qvar_mat, tmp_mat);
+  gsl_matrix_add(qvar_mat, tmp_mat);
 
   gsl_matrix_scale(qvar_mat, 2.0);
 
   // Compute S^{-1}var_qS^{-1}.
-  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat,
-		 0.0, tmp_mat);
-  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat,
-		 0.0, Var_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat, 0.0,
+                 tmp_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
+                 Var_mat);
 
   // Transform pve back to the original scale and save data.
-  v_pve.clear(); v_se_pve.clear();
-  v_sigma2.clear(); v_se_sigma2.clear();
-
-  s=1.0, v=0, pve_total=0, se_pve_total=0;
-  for (size_t i=0; i<n_vc; i++) {
-    d=gsl_vector_get (pve, i);
-    v_sigma2.push_back(d*var_y_new/traceG_new[i]);
-    v_pve.push_back(d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y));
-    s-=d;
-    pve_total+=d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y);
-
-    d=sqrt(gsl_matrix_get (Var_mat, i, i));
-    v_se_sigma2.push_back(d*var_y_new/traceG_new[i]);
-    v_se_pve.push_back(d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y));
-
-    for (size_t j=0; j<n_vc; j++) {
-      v+=gsl_matrix_get(Var_mat, i, j);
-      se_pve_total+=gsl_matrix_get(Var_mat, i, j)*
-	(var_y_new/traceG_new[i])*(v_traceG[i]/var_y)*
-	(var_y_new/traceG_new[j])*(v_traceG[j]/var_y);
+  v_pve.clear();
+  v_se_pve.clear();
+  v_sigma2.clear();
+  v_se_sigma2.clear();
+
+  s = 1.0, v = 0, pve_total = 0, se_pve_total = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    d = gsl_vector_get(pve, i);
+    v_sigma2.push_back(d * var_y_new / traceG_new[i]);
+    v_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y));
+    s -= d;
+    pve_total += d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y);
+
+    d = sqrt(gsl_matrix_get(Var_mat, i, i));
+    v_se_sigma2.push_back(d * var_y_new / traceG_new[i]);
+    v_se_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y));
+
+    for (size_t j = 0; j < n_vc; j++) {
+      v += gsl_matrix_get(Var_mat, i, j);
+      se_pve_total += gsl_matrix_get(Var_mat, i, j) *
+                      (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y) *
+                      (var_y_new / traceG_new[j]) * (v_traceG[j] / var_y);
     }
   }
-  v_sigma2.push_back(s*r*var_y_new);
-  v_se_sigma2.push_back(sqrt(v)*r*var_y_new);
-  se_pve_total=sqrt(se_pve_total);
+  v_sigma2.push_back(s * r * var_y_new);
+  v_se_sigma2.push_back(sqrt(v) * r * var_y_new);
+  se_pve_total = sqrt(se_pve_total);
 
-  cout<<"sigma2 = ";
-  for (size_t i=0; i<n_vc+1; i++) {
-    cout<<v_sigma2[i]<<" ";
+  cout << "sigma2 = ";
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    cout << v_sigma2[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(sigma2) = ";
-  for (size_t i=0; i<n_vc+1; i++) {
-    cout<<v_se_sigma2[i]<<" ";
+  cout << "se(sigma2) = ";
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    cout << v_se_sigma2[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"pve = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_pve[i]<<" ";
+  cout << "pve = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_pve[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(pve) = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_se_pve[i]<<" ";
+  cout << "se(pve) = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_se_pve[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  if (n_vc>1) {
-    cout<<"total pve = "<<pve_total<<endl;
-    cout<<"se(total pve) = "<<se_pve_total<<endl;
+  if (n_vc > 1) {
+    cout << "total pve = " << pve_total << endl;
+    cout << "se(total pve) = " << se_pve_total << endl;
   }
 
   gsl_permutation_free(pmt);
@@ -1558,188 +1716,195 @@ void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W,
 }
 
 // REML for log(sigma2) based on the AI algorithm.
-void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K,
-		     const gsl_matrix *W, const gsl_vector *y) {
-  size_t n1=K->size1, n2=K->size2;
-  size_t n_vc=n2/n1;
-  gsl_vector *log_sigma2=gsl_vector_alloc (n_vc+1);
+void VC::CalcVCreml(bool noconstrain, const gsl_matrix *K, const gsl_matrix *W,
+                    const gsl_vector *y) {
+  size_t n1 = K->size1, n2 = K->size2;
+  size_t n_vc = n2 / n1;
+  gsl_vector *log_sigma2 = gsl_vector_alloc(n_vc + 1);
   double d, s;
 
   // Set up params.
-  gsl_matrix *P=gsl_matrix_alloc (n1, n1);
-  gsl_vector *Py=gsl_vector_alloc (n1);
-  gsl_matrix *KPy_mat=gsl_matrix_alloc (n1, n_vc+1);
-  gsl_matrix *PKPy_mat=gsl_matrix_alloc (n1, n_vc+1);
-  gsl_vector *dev1=gsl_vector_alloc (n_vc+1);
-  gsl_matrix *dev2=gsl_matrix_alloc (n_vc+1, n_vc+1);
-  gsl_matrix *Hessian=gsl_matrix_alloc (n_vc+1, n_vc+1);
-  VC_PARAM params={K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain};
+  gsl_matrix *P = gsl_matrix_alloc(n1, n1);
+  gsl_vector *Py = gsl_vector_alloc(n1);
+  gsl_matrix *KPy_mat = gsl_matrix_alloc(n1, n_vc + 1);
+  gsl_matrix *PKPy_mat = gsl_matrix_alloc(n1, n_vc + 1);
+  gsl_vector *dev1 = gsl_vector_alloc(n_vc + 1);
+  gsl_matrix *dev2 = gsl_matrix_alloc(n_vc + 1, n_vc + 1);
+  gsl_matrix *Hessian = gsl_matrix_alloc(n_vc + 1, n_vc + 1);
+  VC_PARAM params = {K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain};
 
   // Initialize sigma2/log_sigma2.
-  CalcVChe (K, W, y);
+  CalcVChe(K, W, y);
 
-  gsl_blas_ddot (y, y, &s);
-  s/=(double)n1;
-  for (size_t i=0; i<n_vc+1; i++) {
+  gsl_blas_ddot(y, y, &s);
+  s /= (double)n1;
+  for (size_t i = 0; i < n_vc + 1; i++) {
     if (noconstrain) {
-      d=v_sigma2[i];
+      d = v_sigma2[i];
     } else {
-      if (v_sigma2[i]<=0) {d=log(0.1);} else {d=log(v_sigma2[i]);}
+      if (v_sigma2[i] <= 0) {
+        d = log(0.1);
+      } else {
+        d = log(v_sigma2[i]);
+      }
     }
-    gsl_vector_set (log_sigma2, i, d);
+    gsl_vector_set(log_sigma2, i, d);
   }
 
-  cout<<"iteration "<<0<<endl;
-  cout<<"sigma2 = ";
-  for (size_t i=0; i<n_vc+1; i++) {
+  cout << "iteration " << 0 << endl;
+  cout << "sigma2 = ";
+  for (size_t i = 0; i < n_vc + 1; i++) {
     if (noconstrain) {
-      cout<<gsl_vector_get(log_sigma2, i)<<" ";
+      cout << gsl_vector_get(log_sigma2, i) << " ";
     } else {
-      cout<<exp(gsl_vector_get(log_sigma2, i))<<" ";
+      cout << exp(gsl_vector_get(log_sigma2, i)) << " ";
     }
   }
-  cout<<endl;
+  cout << endl;
 
   // Set up fdf.
   gsl_multiroot_function_fdf FDF;
-  FDF.n=n_vc+1;
-  FDF.params=&params;
-  FDF.f=&LogRL_dev1;
-  FDF.df=&LogRL_dev2;
-  FDF.fdf=&LogRL_dev12;
+  FDF.n = n_vc + 1;
+  FDF.params = &params;
+  FDF.f = &LogRL_dev1;
+  FDF.df = &LogRL_dev2;
+  FDF.fdf = &LogRL_dev12;
 
   // Set up solver.
   int status;
-  int iter=0, max_iter=100;
+  int iter = 0, max_iter = 100;
 
   const gsl_multiroot_fdfsolver_type *T_fdf;
   gsl_multiroot_fdfsolver *s_fdf;
-  T_fdf=gsl_multiroot_fdfsolver_hybridsj;
-  s_fdf=gsl_multiroot_fdfsolver_alloc (T_fdf, n_vc+1);
+  T_fdf = gsl_multiroot_fdfsolver_hybridsj;
+  s_fdf = gsl_multiroot_fdfsolver_alloc(T_fdf, n_vc + 1);
 
-  gsl_multiroot_fdfsolver_set (s_fdf, &FDF, log_sigma2);
+  gsl_multiroot_fdfsolver_set(s_fdf, &FDF, log_sigma2);
 
   do {
     iter++;
-    status=gsl_multiroot_fdfsolver_iterate (s_fdf);
+    status = gsl_multiroot_fdfsolver_iterate(s_fdf);
 
-    if (status) break;
+    if (status)
+      break;
 
-    cout<<"iteration "<<iter<<endl;
-    cout<<"sigma2 = ";
-    for (size_t i=0; i<n_vc+1; i++) {
+    cout << "iteration " << iter << endl;
+    cout << "sigma2 = ";
+    for (size_t i = 0; i < n_vc + 1; i++) {
       if (noconstrain) {
-	cout<<gsl_vector_get(s_fdf->x, i)<<" ";
+        cout << gsl_vector_get(s_fdf->x, i) << " ";
       } else {
-	cout<<exp(gsl_vector_get(s_fdf->x, i))<<" ";
+        cout << exp(gsl_vector_get(s_fdf->x, i)) << " ";
       }
     }
-    cout<<endl;
-    status=gsl_multiroot_test_residual (s_fdf->f, 1e-3);
-  }
-  while (status==GSL_CONTINUE && iter<max_iter);
+    cout << endl;
+    status = gsl_multiroot_test_residual(s_fdf->f, 1e-3);
+  } while (status == GSL_CONTINUE && iter < max_iter);
 
   // Obtain Hessian and Hessian inverse.
-  int sig=LogRL_dev12 (s_fdf->x, &params, dev1, dev2);
+  int sig = LogRL_dev12(s_fdf->x, &params, dev1, dev2);
 
-  gsl_permutation * pmt=gsl_permutation_alloc (n_vc+1);
-  LUDecomp (dev2, pmt, &sig);
-  LUInvert (dev2, pmt, Hessian);
+  gsl_permutation *pmt = gsl_permutation_alloc(n_vc + 1);
+  LUDecomp(dev2, pmt, &sig);
+  LUInvert(dev2, pmt, Hessian);
   gsl_permutation_free(pmt);
 
   // Save sigma2 and se_sigma2.
-  v_sigma2.clear(); v_se_sigma2.clear();
-  for (size_t i=0; i<n_vc+1; i++) {
+  v_sigma2.clear();
+  v_se_sigma2.clear();
+  for (size_t i = 0; i < n_vc + 1; i++) {
     if (noconstrain) {
-      d=gsl_vector_get(s_fdf->x, i);
+      d = gsl_vector_get(s_fdf->x, i);
     } else {
-      d=exp(gsl_vector_get(s_fdf->x, i));
+      d = exp(gsl_vector_get(s_fdf->x, i));
     }
     v_sigma2.push_back(d);
 
     if (noconstrain) {
-      d=-1.0*gsl_matrix_get(Hessian, i, i);
+      d = -1.0 * gsl_matrix_get(Hessian, i, i);
     } else {
-      d=-1.0*d*d*gsl_matrix_get(Hessian, i, i);
+      d = -1.0 * d * d * gsl_matrix_get(Hessian, i, i);
     }
     v_se_sigma2.push_back(sqrt(d));
   }
 
-  s=0;
-  for (size_t i=0; i<n_vc; i++) {
-    s+=v_traceG[i]*v_sigma2[i];
+  s = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    s += v_traceG[i] * v_sigma2[i];
   }
-  s+=v_sigma2[n_vc];
+  s += v_sigma2[n_vc];
 
   // Compute pve.
-  v_pve.clear(); pve_total=0;
-  for (size_t i=0; i<n_vc; i++) {
-    d=v_traceG[i]*v_sigma2[i]/s;
+  v_pve.clear();
+  pve_total = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    d = v_traceG[i] * v_sigma2[i] / s;
     v_pve.push_back(d);
-    pve_total+=d;
+    pve_total += d;
   }
 
   // Compute se_pve; k=n_vc+1: total.
   double d1, d2;
-  v_se_pve.clear(); se_pve_total=0;
-  for (size_t k=0; k<n_vc+1; k++) {
-    d=0;
-    for (size_t i=0; i<n_vc+1; i++) {
+  v_se_pve.clear();
+  se_pve_total = 0;
+  for (size_t k = 0; k < n_vc + 1; k++) {
+    d = 0;
+    for (size_t i = 0; i < n_vc + 1; i++) {
       if (noconstrain) {
-	d1=gsl_vector_get(s_fdf->x, i);
-	d1=1;
+        d1 = gsl_vector_get(s_fdf->x, i);
+        d1 = 1;
       } else {
-	d1=exp(gsl_vector_get(s_fdf->x, i));
+        d1 = exp(gsl_vector_get(s_fdf->x, i));
       }
 
-      if (k<n_vc) {
-	if (i==k) {
-	  d1*=v_traceG[k]*(s-v_sigma2[k]*v_traceG[k])/(s*s);
-	} else if (i==n_vc) {
-	  d1*=-1*v_traceG[k]*v_sigma2[k]/(s*s);
-	} else {
-	  d1*=-1*v_traceG[i]*v_traceG[k]*v_sigma2[k]/(s*s);
-	}
+      if (k < n_vc) {
+        if (i == k) {
+          d1 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s);
+        } else if (i == n_vc) {
+          d1 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s);
+        } else {
+          d1 *= -1 * v_traceG[i] * v_traceG[k] * v_sigma2[k] / (s * s);
+        }
       } else {
-	if (i==k) {
-	  d1*=-1*(s-v_sigma2[n_vc])/(s*s);
-	} else {
-	  d1*=v_traceG[i]*v_sigma2[n_vc]/(s*s);
-	}
-      }
-
-      for (size_t j=0; j<n_vc+1; j++) {
-	if (noconstrain) {
-	  d2=gsl_vector_get(s_fdf->x, j);
-	  d2=1;
-	} else {
-	  d2=exp(gsl_vector_get(s_fdf->x, j));
-	}
-
-	if (k<n_vc) {
-	  if (j==k) {
-	    d2*=v_traceG[k]*(s-v_sigma2[k]*v_traceG[k])/(s*s);
-	  } else if (j==n_vc) {
-	    d2*=-1*v_traceG[k]*v_sigma2[k]/(s*s);
-	  } else {
-	    d2*=-1*v_traceG[j]*v_traceG[k]*v_sigma2[k]/(s*s);
-	  }
-	} else {
-	  if (j==k) {
-	    d2*=-1*(s-v_sigma2[n_vc])/(s*s);
-	  } else {
-	    d2*=v_traceG[j]*v_sigma2[n_vc]/(s*s);
-	  }
-	}
-
-	d+=-1.0*d1*d2*gsl_matrix_get(Hessian, i, j);
-      }
-    }
-
-    if (k<n_vc) {
-      v_se_pve.push_back(sqrt(d) );
+        if (i == k) {
+          d1 *= -1 * (s - v_sigma2[n_vc]) / (s * s);
+        } else {
+          d1 *= v_traceG[i] * v_sigma2[n_vc] / (s * s);
+        }
+      }
+
+      for (size_t j = 0; j < n_vc + 1; j++) {
+        if (noconstrain) {
+          d2 = gsl_vector_get(s_fdf->x, j);
+          d2 = 1;
+        } else {
+          d2 = exp(gsl_vector_get(s_fdf->x, j));
+        }
+
+        if (k < n_vc) {
+          if (j == k) {
+            d2 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s);
+          } else if (j == n_vc) {
+            d2 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s);
+          } else {
+            d2 *= -1 * v_traceG[j] * v_traceG[k] * v_sigma2[k] / (s * s);
+          }
+        } else {
+          if (j == k) {
+            d2 *= -1 * (s - v_sigma2[n_vc]) / (s * s);
+          } else {
+            d2 *= v_traceG[j] * v_sigma2[n_vc] / (s * s);
+          }
+        }
+
+        d += -1.0 * d1 * d2 * gsl_matrix_get(Hessian, i, j);
+      }
+    }
+
+    if (k < n_vc) {
+      v_se_pve.push_back(sqrt(d));
     } else {
-      se_pve_total=sqrt(d);
+      se_pve_total = sqrt(d);
     }
   }
 
@@ -1758,252 +1923,265 @@ void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K,
 }
 
 // Ks are not scaled.
-void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W,
-		    const gsl_vector *y) {
-  size_t n1=K->size1, n2=K->size2;
-  size_t n_vc=n2/n1;
+void VC::CalcVCacl(const gsl_matrix *K, const gsl_matrix *W,
+                   const gsl_vector *y) {
+  size_t n1 = K->size1, n2 = K->size2;
+  size_t n_vc = n2 / n1;
 
   double d, y2_sum, tau_inv, se_tau_inv;
 
   // New matrices/vectors.
-  gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2);
-  gsl_vector *y_scale=gsl_vector_alloc (n1);
-  gsl_vector *y2=gsl_vector_alloc (n1);
-  gsl_vector *n1_vec=gsl_vector_alloc (n1);
-  gsl_matrix *Ay=gsl_matrix_alloc (n1, n_vc);
-  gsl_matrix *K2=gsl_matrix_alloc (n1, n_vc*n_vc);
-  gsl_matrix *K_tmp=gsl_matrix_alloc (n1, n1);
-  gsl_matrix *V_mat=gsl_matrix_alloc (n1, n1);
+  gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
+  gsl_vector *y_scale = gsl_vector_alloc(n1);
+  gsl_vector *y2 = gsl_vector_alloc(n1);
+  gsl_vector *n1_vec = gsl_vector_alloc(n1);
+  gsl_matrix *Ay = gsl_matrix_alloc(n1, n_vc);
+  gsl_matrix *K2 = gsl_matrix_alloc(n1, n_vc * n_vc);
+  gsl_matrix *K_tmp = gsl_matrix_alloc(n1, n1);
+  gsl_matrix *V_mat = gsl_matrix_alloc(n1, n1);
 
   // Old matrices/vectors.
-  gsl_vector *pve=gsl_vector_alloc (n_vc);
-  gsl_vector *se_pve=gsl_vector_alloc (n_vc);
-  gsl_vector *q_vec=gsl_vector_alloc (n_vc);
-  gsl_matrix *S1=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *S2=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *J_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_vector *pve = gsl_vector_alloc(n_vc);
+  gsl_vector *se_pve = gsl_vector_alloc(n_vc);
+  gsl_vector *q_vec = gsl_vector_alloc(n_vc);
+  gsl_matrix *S1 = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *S2 = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *J_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
 
   int sig;
-  gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+  gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
 
   // Center and scale K by W, and standardize K further so that all
   // diagonal elements are 1
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     gsl_matrix_view Kscale_sub =
-      gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+        gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
     gsl_matrix_const_view K_sub =
-      gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1);
-    gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix);
+        gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1);
+    gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix);
 
-    CenterMatrix (&Kscale_sub.matrix, W);
-    StandardizeMatrix (&Kscale_sub.matrix);
+    CenterMatrix(&Kscale_sub.matrix, W);
+    StandardizeMatrix(&Kscale_sub.matrix);
   }
 
   // Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1)
-  gsl_vector_memcpy (y_scale, y);
-  CenterVector (y_scale, W);
+  gsl_vector_memcpy(y_scale, y);
+  CenterVector(y_scale, W);
 
   // Compute y^2 and sum(y^2), which is also the variance of y*n1.
-  gsl_vector_memcpy (y2, y_scale);
-  gsl_vector_mul (y2, y_scale);
+  gsl_vector_memcpy(y2, y_scale);
+  gsl_vector_mul(y2, y_scale);
 
-  y2_sum=0;
-  for (size_t i=0; i<y2->size; i++) {
-    y2_sum+=gsl_vector_get(y2, i);
+  y2_sum = 0;
+  for (size_t i = 0; i < y2->size; i++) {
+    y2_sum += gsl_vector_get(y2, i);
   }
 
   // Compute the n_vc size q vector.
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     gsl_matrix_const_view Kscale_sub =
-      gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+        gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
 
-    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale,
-		   0.0, n1_vec);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, n1_vec);
 
-    gsl_blas_ddot (n1_vec, y_scale, &d);
-    gsl_vector_set(q_vec, i, d-y2_sum);
+    gsl_blas_ddot(n1_vec, y_scale, &d);
+    gsl_vector_set(q_vec, i, d - y2_sum);
   }
 
   // Compute the n_vc by n_vc S1 and S2 matrix (and eventually
   // S=S1-\tau^{-1}S2).
-  for (size_t i=0; i<n_vc; i++) {
+  for (size_t i = 0; i < n_vc; i++) {
     gsl_matrix_const_view Kscale_sub1 =
-      gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+        gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
 
-    for (size_t j=i; j<n_vc; j++) {
+    for (size_t j = i; j < n_vc; j++) {
       gsl_matrix_const_view Kscale_sub2 =
-	gsl_matrix_const_submatrix (K_scale, 0, n1*j, n1, n1);
+          gsl_matrix_const_submatrix(K_scale, 0, n1 * j, n1, n1);
 
-      gsl_matrix_memcpy (K_tmp, &Kscale_sub1.matrix);
-      gsl_matrix_mul_elements (K_tmp, &Kscale_sub2.matrix);
+      gsl_matrix_memcpy(K_tmp, &Kscale_sub1.matrix);
+      gsl_matrix_mul_elements(K_tmp, &Kscale_sub2.matrix);
 
       gsl_vector_set_zero(n1_vec);
-      for (size_t t=0; t<K_tmp->size1; t++) {
-	gsl_vector_view Ktmp_col=gsl_matrix_column (K_tmp, t);
-	gsl_vector_add (n1_vec, &Ktmp_col.vector);
+      for (size_t t = 0; t < K_tmp->size1; t++) {
+        gsl_vector_view Ktmp_col = gsl_matrix_column(K_tmp, t);
+        gsl_vector_add(n1_vec, &Ktmp_col.vector);
       }
-      gsl_vector_add_constant (n1_vec, -1.0);
+      gsl_vector_add_constant(n1_vec, -1.0);
 
       // Compute S1.
-      gsl_blas_ddot (n1_vec, y2, &d);
-      gsl_matrix_set (S1, i, j, 2*d);
-      if (i!=j) {gsl_matrix_set (S1, j, i, 2*d);}
+      gsl_blas_ddot(n1_vec, y2, &d);
+      gsl_matrix_set(S1, i, j, 2 * d);
+      if (i != j) {
+        gsl_matrix_set(S1, j, i, 2 * d);
+      }
 
       // Compute S2.
-      d=0;
-      for (size_t t=0; t<n1_vec->size; t++) {
-	d+=gsl_vector_get (n1_vec, t);
+      d = 0;
+      for (size_t t = 0; t < n1_vec->size; t++) {
+        d += gsl_vector_get(n1_vec, t);
+      }
+      gsl_matrix_set(S2, i, j, d);
+      if (i != j) {
+        gsl_matrix_set(S2, j, i, d);
       }
-      gsl_matrix_set (S2, i, j, d);
-      if (i!=j) {gsl_matrix_set (S2, j, i, d);}
 
       // Save information to compute J.
-      gsl_vector_view K2col1=gsl_matrix_column (K2, n_vc*i+j);
-      gsl_vector_view K2col2=gsl_matrix_column (K2, n_vc*j+i);
+      gsl_vector_view K2col1 = gsl_matrix_column(K2, n_vc * i + j);
+      gsl_vector_view K2col2 = gsl_matrix_column(K2, n_vc * j + i);
 
       gsl_vector_memcpy(&K2col1.vector, n1_vec);
-      if (i!=j) {gsl_vector_memcpy(&K2col2.vector, n1_vec);}
+      if (i != j) {
+        gsl_vector_memcpy(&K2col2.vector, n1_vec);
+      }
     }
   }
 
   // Iterate to solve tau and h's.
-  size_t it=0;
-  double s=1;
-  while (abs(s)>1e-3 && it<100) {
+  size_t it = 0;
+  double s = 1;
+  while (abs(s) > 1e-3 && it < 100) {
 
     // Update tau_inv.
-    gsl_blas_ddot (q_vec, pve, &d);
-    if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;}
-    tau_inv=y2_sum/(double)n1-d/((double)n1*((double)n1-1));
-    if (it>0) {s/=tau_inv;}
+    gsl_blas_ddot(q_vec, pve, &d);
+    if (it > 0) {
+      s = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1)) - tau_inv;
+    }
+    tau_inv = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1));
+    if (it > 0) {
+      s /= tau_inv;
+    }
 
     // Update S.
-    gsl_matrix_memcpy (S_mat, S2);
-    gsl_matrix_scale (S_mat, -1*tau_inv);
-    gsl_matrix_add (S_mat, S1);
+    gsl_matrix_memcpy(S_mat, S2);
+    gsl_matrix_scale(S_mat, -1 * tau_inv);
+    gsl_matrix_add(S_mat, S1);
 
     // Update h=S^{-1}q.
     int sig;
-    gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
-    LUDecomp (S_mat, pmt, &sig);
-    LUInvert (S_mat, pmt, Si_mat);
-    gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+    gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
+    LUDecomp(S_mat, pmt, &sig);
+    LUInvert(S_mat, pmt, Si_mat);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
 
     it++;
   }
 
   // Compute V matrix and A matrix (K_scale is destroyed, so need to
   // compute V first).
-  gsl_matrix_set_zero (V_mat);
-  for (size_t i=0; i<n_vc; i++) {
+  gsl_matrix_set_zero(V_mat);
+  for (size_t i = 0; i < n_vc; i++) {
     gsl_matrix_view Kscale_sub =
-      gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+        gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
 
     // Compute V.
-    gsl_matrix_memcpy (K_tmp, &Kscale_sub.matrix);
-    gsl_matrix_scale (K_tmp, gsl_vector_get(pve, i));
-    gsl_matrix_add (V_mat, K_tmp);
+    gsl_matrix_memcpy(K_tmp, &Kscale_sub.matrix);
+    gsl_matrix_scale(K_tmp, gsl_vector_get(pve, i));
+    gsl_matrix_add(V_mat, K_tmp);
 
     // Compute A; the corresponding Kscale is destroyed.
     gsl_matrix_const_view K2_sub =
-      gsl_matrix_const_submatrix (K2, 0, n_vc*i, n1, n_vc);
-    gsl_blas_dgemv (CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec);
+        gsl_matrix_const_submatrix(K2, 0, n_vc * i, n1, n_vc);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec);
 
-    for (size_t t=0; t<n1; t++) {
-      gsl_matrix_set (K_scale, t, n1*i+t, gsl_vector_get(n1_vec, t) );
+    for (size_t t = 0; t < n1; t++) {
+      gsl_matrix_set(K_scale, t, n1 * i + t, gsl_vector_get(n1_vec, t));
     }
 
     // Compute Ay.
-    gsl_vector_view Ay_col=gsl_matrix_column (Ay, i);
-    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale,
-		   0.0, &Ay_col.vector);
+    gsl_vector_view Ay_col = gsl_matrix_column(Ay, i);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0,
+                   &Ay_col.vector);
   }
-  gsl_matrix_scale (V_mat, tau_inv);
+  gsl_matrix_scale(V_mat, tau_inv);
 
   // Compute J matrix.
-  for (size_t i=0; i<n_vc; i++) {
-    gsl_vector_view Ay_col1=gsl_matrix_column (Ay, i);
+  for (size_t i = 0; i < n_vc; i++) {
+    gsl_vector_view Ay_col1 = gsl_matrix_column(Ay, i);
     gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, &Ay_col1.vector, 0.0, n1_vec);
 
-    for (size_t j=i; j<n_vc; j++) {
-      gsl_vector_view Ay_col2=gsl_matrix_column (Ay, j);
+    for (size_t j = i; j < n_vc; j++) {
+      gsl_vector_view Ay_col2 = gsl_matrix_column(Ay, j);
 
-      gsl_blas_ddot (&Ay_col2.vector, n1_vec, &d);
-      gsl_matrix_set (J_mat, i, j, 2.0*d);
-      if (i!=j) {gsl_matrix_set (J_mat, j, i, 2.0*d);}
+      gsl_blas_ddot(&Ay_col2.vector, n1_vec, &d);
+      gsl_matrix_set(J_mat, i, j, 2.0 * d);
+      if (i != j) {
+        gsl_matrix_set(J_mat, j, i, 2.0 * d);
+      }
     }
   }
 
   // Compute H^{-1}JH^{-1} as V(\hat h), where H=S2*tau_inv; this is
   // stored in Var_mat.
-  gsl_matrix_memcpy (S_mat, S2);
-  gsl_matrix_scale (S_mat, tau_inv);
+  gsl_matrix_memcpy(S_mat, S2);
+  gsl_matrix_scale(S_mat, tau_inv);
 
-  LUDecomp (S_mat, pmt, &sig);
-  LUInvert (S_mat, pmt, Si_mat);
+  LUDecomp(S_mat, pmt, &sig);
+  LUInvert(S_mat, pmt, Si_mat);
 
   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, J_mat, 0.0, S_mat);
   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, S_mat, Si_mat, 0.0, Var_mat);
 
   // Compute variance for tau_inv.
   gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, y_scale, 0.0, n1_vec);
-  gsl_blas_ddot (y_scale, n1_vec, &d);
-  se_tau_inv=sqrt(2*d)/(double)n1;
+  gsl_blas_ddot(y_scale, n1_vec, &d);
+  se_tau_inv = sqrt(2 * d) / (double)n1;
 
   // Transform pve back to the original scale and save data.
-  v_pve.clear(); v_se_pve.clear();
-  v_sigma2.clear(); v_se_sigma2.clear();
+  v_pve.clear();
+  v_se_pve.clear();
+  v_sigma2.clear();
+  v_se_sigma2.clear();
 
-  pve_total=0, se_pve_total=0;
-  for (size_t i=0; i<n_vc; i++) {
-    d=gsl_vector_get (pve, i);
-    pve_total+=d;
+  pve_total = 0, se_pve_total = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    d = gsl_vector_get(pve, i);
+    pve_total += d;
 
     v_pve.push_back(d);
-    v_sigma2.push_back(d*tau_inv/v_traceG[i] );
+    v_sigma2.push_back(d * tau_inv / v_traceG[i]);
 
-    d=sqrt(gsl_matrix_get (Var_mat, i, i));
+    d = sqrt(gsl_matrix_get(Var_mat, i, i));
     v_se_pve.push_back(d);
-    v_se_sigma2.push_back(d*tau_inv/v_traceG[i]);
+    v_se_sigma2.push_back(d * tau_inv / v_traceG[i]);
 
-    for (size_t j=0; j<n_vc; j++) {
-      se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+    for (size_t j = 0; j < n_vc; j++) {
+      se_pve_total += gsl_matrix_get(Var_mat, i, j);
     }
   }
-  v_sigma2.push_back( (1-pve_total)*tau_inv );
-  v_se_sigma2.push_back(sqrt(se_pve_total)*tau_inv );
-  se_pve_total=sqrt(se_pve_total);
+  v_sigma2.push_back((1 - pve_total) * tau_inv);
+  v_se_sigma2.push_back(sqrt(se_pve_total) * tau_inv);
+  se_pve_total = sqrt(se_pve_total);
 
-  cout<<"sigma2 = ";
-  for (size_t i=0; i<n_vc+1; i++) {
-    cout<<v_sigma2[i]<<" ";
+  cout << "sigma2 = ";
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    cout << v_sigma2[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(sigma2) = ";
-  for (size_t i=0; i<n_vc+1; i++) {
-    cout<<v_se_sigma2[i]<<" ";
+  cout << "se(sigma2) = ";
+  for (size_t i = 0; i < n_vc + 1; i++) {
+    cout << v_se_sigma2[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"pve = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_pve[i]<<" ";
+  cout << "pve = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_pve[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(pve) = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_se_pve[i]<<" ";
+  cout << "se(pve) = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_se_pve[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  if (n_vc>1) {
-    cout<<"total pve = "<<pve_total<<endl;
-    cout<<"se(total pve) = "<<se_pve_total<<endl;
+  if (n_vc > 1) {
+    cout << "total pve = " << pve_total << endl;
+    cout << "se(total pve) = " << se_pve_total << endl;
   }
 
   gsl_permutation_free(pmt);
@@ -2031,234 +2209,248 @@ void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W,
 }
 
 // Read bimbam mean genotype file and compute XWz.
-bool BimbamXwz (const string &file_geno, const int display_pace,
-		vector<int> &indicator_idv, vector<int> &indicator_snp,
-		const vector<size_t> &vec_cat, const gsl_vector *w,
-		const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	size_t n_miss;
-	double d, geno_mean, geno_var;
-
-	size_t ni_test=XWz->size1;
-	gsl_vector *geno=gsl_vector_alloc (ni_test);
-	gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
-	gsl_vector *wz=gsl_vector_alloc (w->size);
-	gsl_vector_memcpy (wz, z);
-	gsl_vector_mul(wz, w);
-
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		!safeGetline(infile, line).eof();
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
-		  ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		geno_mean=0.0; n_miss=0; geno_var=0.0;
-		gsl_vector_set_all(geno_miss, 0);
-
-		size_t j=0;
-		for (size_t i=0; i<indicator_idv.size(); ++i) {
-		  if (indicator_idv[i]==0) {continue;}
-			ch_ptr=strtok (NULL, " , \t");
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set(geno_miss, i, 0);
-			  n_miss++;
-			} else {
-				d=atof(ch_ptr);
-				gsl_vector_set (geno, j, d);
-				gsl_vector_set (geno_miss, j, 1);
-				geno_mean+=d;
-				geno_var+=d*d;
-			}
-			j++;
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_test;
-		geno_var-=geno_mean*geno_mean;
-
-		for (size_t i=0; i<ni_test; ++i) {
-			if (gsl_vector_get (geno_miss, i)==0) {
-			  gsl_vector_set(geno, i, geno_mean);
-			}
-		}
-
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
-
-		gsl_vector_view XWz_col=
-		  gsl_matrix_column(XWz, vec_cat[ns_test]);
-		d=gsl_vector_get (wz, ns_test);
-		gsl_blas_daxpy (d/sqrt(geno_var), geno, &XWz_col.vector);
-
-		ns_test++;
-	}
-
-	cout<<endl;
-
-	gsl_vector_free (geno);
-	gsl_vector_free (geno_miss);
-	gsl_vector_free (wz);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool BimbamXwz(const string &file_geno, const int display_pace,
+               vector<int> &indicator_idv, vector<int> &indicator_snp,
+               const vector<size_t> &vec_cat, const gsl_vector *w,
+               const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  size_t n_miss;
+  double d, geno_mean, geno_var;
+
+  size_t ni_test = XWz->size1;
+  gsl_vector *geno = gsl_vector_alloc(ni_test);
+  gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
+  gsl_vector *wz = gsl_vector_alloc(w->size);
+  gsl_vector_memcpy(wz, z);
+  gsl_vector_mul(wz, w);
+
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    geno_mean = 0.0;
+    n_miss = 0;
+    geno_var = 0.0;
+    gsl_vector_set_all(geno_miss, 0);
+
+    size_t j = 0;
+    for (size_t i = 0; i < indicator_idv.size(); ++i) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+      ch_ptr = strtok(NULL, " , \t");
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(geno_miss, i, 0);
+        n_miss++;
+      } else {
+        d = atof(ch_ptr);
+        gsl_vector_set(geno, j, d);
+        gsl_vector_set(geno_miss, j, 1);
+        geno_mean += d;
+        geno_var += d * d;
+      }
+      j++;
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_test;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(geno_miss, i) == 0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]);
+    d = gsl_vector_get(wz, ns_test);
+    gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector);
+
+    ns_test++;
+  }
+
+  cout << endl;
+
+  gsl_vector_free(geno);
+  gsl_vector_free(geno_miss);
+  gsl_vector_free(wz);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read PLINK bed file and compute XWz.
-bool PlinkXwz (const string &file_bed, const int display_pace,
-	       vector<int> &indicator_idv, vector<int> &indicator_snp,
-	       const vector<size_t> &vec_cat, const gsl_vector *w,
-	       const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return false;
-	}
-
-	char ch[1];
-	bitset<8> b;
-
-	size_t n_miss, ci_total, ci_test;
-	double d, geno_mean, geno_var;
-
-	size_t ni_test=XWz->size1;
-	size_t ni_total=indicator_idv.size();
-	gsl_vector *geno=gsl_vector_alloc (ni_test);
-	gsl_vector *wz=gsl_vector_alloc (w->size);
-	gsl_vector_memcpy (wz, z);
-	gsl_vector_mul(wz, w);
-
-	int n_bit;
-
-	// Calculate n_bit and c, the number of bit for each snp.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
-		  ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		geno_mean=0.0;	n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
-		for (int i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {
-				if ((i==(n_bit-1)) && ci_total==ni_total) {
-				  break;
-				}
-				if (indicator_idv[ci_total]==0) {
-				  ci_total++;
-				  continue;
-				}
-
-				if (b[2*j]==0) {
-					if (b[2*j+1]==0) {
-					  gsl_vector_set(geno, ci_test, 2.0);
-					  geno_mean+=2.0; geno_var+=4.0;
-					}
-					else {
-					  gsl_vector_set(geno, ci_test, 1.0);
-					  geno_mean+=1.0; geno_var+=1.0;
-					}
-				}
-				else {
-					if (b[2*j+1]==1) {
-					  gsl_vector_set(geno, ci_test, 0.0);
-					}
-					else {
-					  gsl_vector_set(geno, ci_test, -9.0);
-					  n_miss++;
-					}
-				}
-
-				ci_test++;
-				ci_total++;
-			}
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_test;
-		geno_var-=geno_mean*geno_mean;
-
-		for (size_t i=0; i<ni_test; ++i) {
-			d=gsl_vector_get(geno,i);
-			if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
-		}
-
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
-
-		gsl_vector_view XWz_col=
-		  gsl_matrix_column(XWz, vec_cat[ns_test]);
-		d=gsl_vector_get (wz, ns_test);
-		gsl_blas_daxpy (d/sqrt(geno_var), geno, &XWz_col.vector);
-
-		ns_test++;
-    }
-	cout<<endl;
-
-	gsl_vector_free (geno);
-	gsl_vector_free (wz);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool PlinkXwz(const string &file_bed, const int display_pace,
+              vector<int> &indicator_idv, vector<int> &indicator_snp,
+              const vector<size_t> &vec_cat, const gsl_vector *w,
+              const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return false;
+  }
+
+  char ch[1];
+  bitset<8> b;
+
+  size_t n_miss, ci_total, ci_test;
+  double d, geno_mean, geno_var;
+
+  size_t ni_test = XWz->size1;
+  size_t ni_total = indicator_idv.size();
+  gsl_vector *geno = gsl_vector_alloc(ni_test);
+  gsl_vector *wz = gsl_vector_alloc(w->size);
+  gsl_vector_memcpy(wz, z);
+  gsl_vector_mul(wz, w);
+
+  int n_bit;
+
+  // Calculate n_bit and c, the number of bit for each snp.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    geno_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    geno_var = 0.0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(geno, ci_test, 2.0);
+            geno_mean += 2.0;
+            geno_var += 4.0;
+          } else {
+            gsl_vector_set(geno, ci_test, 1.0);
+            geno_mean += 1.0;
+            geno_var += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(geno, ci_test, 0.0);
+          } else {
+            gsl_vector_set(geno, ci_test, -9.0);
+            n_miss++;
+          }
+        }
+
+        ci_test++;
+        ci_total++;
+      }
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_test;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      d = gsl_vector_get(geno, i);
+      if (d == -9.0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]);
+    d = gsl_vector_get(wz, ns_test);
+    gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector);
+
+    ns_test++;
+  }
+  cout << endl;
+
+  gsl_vector_free(geno);
+  gsl_vector_free(wz);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read multiple genotype files and compute XWz.
-bool MFILEXwz (const size_t mfile_mode, const string &file_mfile,
-	       const int display_pace, vector<int> &indicator_idv,
-	       vector<vector<int> > &mindicator_snp,
-	       const vector<size_t> &vec_cat, const gsl_vector *w,
-	       const gsl_vector *z, gsl_matrix *XWz) {
+bool MFILEXwz(const size_t mfile_mode, const string &file_mfile,
+              const int display_pace, vector<int> &indicator_idv,
+              vector<vector<int>> &mindicator_snp,
+              const vector<size_t> &vec_cat, const gsl_vector *w,
+              const gsl_vector *z, gsl_matrix *XWz) {
   gsl_matrix_set_zero(XWz);
 
-  igzstream infile (file_mfile.c_str(), igzstream::in);
+  igzstream infile(file_mfile.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open mfile file: "<<file_mfile<<endl;
+    cout << "error! fail to open mfile file: " << file_mfile << endl;
     return false;
   }
 
   string file_name;
-  size_t l=0, ns_test=0;
+  size_t l = 0, ns_test = 0;
 
   while (!safeGetline(infile, file_name).eof()) {
-    if (mfile_mode==1) {
-      file_name+=".bed";
-      PlinkXwz (file_name, display_pace, indicator_idv, mindicator_snp[l],
-		vec_cat, w, z, ns_test, XWz);
+    if (mfile_mode == 1) {
+      file_name += ".bed";
+      PlinkXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
+               vec_cat, w, z, ns_test, XWz);
     } else {
-      BimbamXwz (file_name, display_pace, indicator_idv, mindicator_snp[l],
-		 vec_cat, w, z, ns_test, XWz);
+      BimbamXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
+                vec_cat, w, z, ns_test, XWz);
     }
 
     l++;
@@ -2271,228 +2463,241 @@ bool MFILEXwz (const size_t mfile_mode, const string &file_mfile,
 }
 
 // Read bimbam mean genotype file and compute X_i^TX_jWz.
-bool BimbamXtXwz (const string &file_geno, const int display_pace,
-		  vector<int> &indicator_idv, vector<int> &indicator_snp,
-		  const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return false;
-	}
-
-	string line;
-	char *ch_ptr;
-
-	size_t n_miss;
-	double d, geno_mean, geno_var;
-
-	size_t ni_test=XWz->size1;
-	gsl_vector *geno=gsl_vector_alloc (ni_test);
-	gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
-
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		!safeGetline(infile, line).eof();
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
-		  ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);
-		}
-		if (indicator_snp[t]==0) {continue;}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		geno_mean=0.0; n_miss=0; geno_var=0.0;
-		gsl_vector_set_all(geno_miss, 0);
-
-		size_t j=0;
-		for (size_t i=0; i<indicator_idv.size(); ++i) {
-		  if (indicator_idv[i]==0) {continue;}
-			ch_ptr=strtok (NULL, " , \t");
-			if (strcmp(ch_ptr, "NA")==0) {
-			  gsl_vector_set(geno_miss, i, 0);
-			  n_miss++;
-			}
-			else {
-				d=atof(ch_ptr);
-				gsl_vector_set (geno, j, d);
-				gsl_vector_set (geno_miss, j, 1);
-				geno_mean+=d;
-				geno_var+=d*d;
-			}
-			j++;
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_test;
-		geno_var-=geno_mean*geno_mean;
-
-		for (size_t i=0; i<ni_test; ++i) {
-			if (gsl_vector_get (geno_miss, i)==0) {
-			  gsl_vector_set(geno, i, geno_mean);
-			}
-		}
-
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
-
-		for (size_t i=0; i<XWz->size2; i++) {
-		  gsl_vector_const_view XWz_col=
-		    gsl_matrix_const_column(XWz, i);
-		  gsl_blas_ddot (geno, &XWz_col.vector, &d);
-		  gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var));
-		}
-
-		ns_test++;
-	}
-
-	cout<<endl;
-
-	gsl_vector_free (geno);
-	gsl_vector_free (geno_miss);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool BimbamXtXwz(const string &file_geno, const int display_pace,
+                 vector<int> &indicator_idv, vector<int> &indicator_snp,
+                 const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return false;
+  }
+
+  string line;
+  char *ch_ptr;
+
+  size_t n_miss;
+  double d, geno_mean, geno_var;
+
+  size_t ni_test = XWz->size1;
+  gsl_vector *geno = gsl_vector_alloc(ni_test);
+  gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
+
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    geno_mean = 0.0;
+    n_miss = 0;
+    geno_var = 0.0;
+    gsl_vector_set_all(geno_miss, 0);
+
+    size_t j = 0;
+    for (size_t i = 0; i < indicator_idv.size(); ++i) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+      ch_ptr = strtok(NULL, " , \t");
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(geno_miss, i, 0);
+        n_miss++;
+      } else {
+        d = atof(ch_ptr);
+        gsl_vector_set(geno, j, d);
+        gsl_vector_set(geno_miss, j, 1);
+        geno_mean += d;
+        geno_var += d * d;
+      }
+      j++;
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_test;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(geno_miss, i) == 0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    for (size_t i = 0; i < XWz->size2; i++) {
+      gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i);
+      gsl_blas_ddot(geno, &XWz_col.vector, &d);
+      gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var));
+    }
+
+    ns_test++;
+  }
+
+  cout << endl;
+
+  gsl_vector_free(geno);
+  gsl_vector_free(geno_miss);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read PLINK bed file and compute XWz.
-bool PlinkXtXwz (const string &file_bed, const int display_pace,
-		 vector<int> &indicator_idv, vector<int> &indicator_snp,
-		 const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return false;
-	}
-
-	char ch[1];
-	bitset<8> b;
-
-	size_t n_miss, ci_total, ci_test;
-	double d, geno_mean, geno_var;
-
-	size_t ni_test=XWz->size1;
-	size_t ni_total=indicator_idv.size();
-	gsl_vector *geno=gsl_vector_alloc (ni_test);
-
-	int n_bit;
-
-	// Calculate n_bit and c, the number of bit for each snp.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
-		if (indicator_snp[t]==0) {continue;}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		geno_mean=0.0;	n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
-		for (int i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0;
-			for (size_t j=0; j<4; ++j) {
-				if ((i==(n_bit-1)) && ci_total==ni_total) {
-				  break;
-				}
-				if (indicator_idv[ci_total]==0) {
-				  ci_total++;
-				  continue;
-				}
-
-				if (b[2*j]==0) {
-				  if (b[2*j+1]==0) {
-				    gsl_vector_set(geno, ci_test, 2.0);
-				    geno_mean+=2.0;
-				    geno_var+=4.0;
-				  }
-				  else {
-				    gsl_vector_set(geno, ci_test, 1.0);
-				    geno_mean+=1.0;
-				    geno_var+=1.0;
-				  }
-				}
-				else {
-					if (b[2*j+1]==1) {
-					  gsl_vector_set(geno, ci_test, 0.0);
-					}
-					else {
-					  gsl_vector_set(geno, ci_test, -9.0);
-					  n_miss++;
-					}
-				}
-
-				ci_test++;
-				ci_total++;
-			}
-		}
-
-		geno_mean/=(double)(ni_test-n_miss);
-		geno_var+=geno_mean*geno_mean*(double)n_miss;
-		geno_var/=(double)ni_test;
-		geno_var-=geno_mean*geno_mean;
-
-		for (size_t i=0; i<ni_test; ++i) {
-			d=gsl_vector_get(geno,i);
-			if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
-		}
-
-		gsl_vector_add_constant (geno, -1.0*geno_mean);
-
-		for (size_t i=0; i<XWz->size2; i++) {
-		  gsl_vector_const_view XWz_col=
-		    gsl_matrix_const_column(XWz, i);
-		  gsl_blas_ddot (geno, &XWz_col.vector, &d);
-		  gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var));
-		}
-
-		ns_test++;
-	}
-	cout<<endl;
-
-	gsl_vector_free (geno);
-
-	infile.close();
-	infile.clear();
-
-	return true;
+bool PlinkXtXwz(const string &file_bed, const int display_pace,
+                vector<int> &indicator_idv, vector<int> &indicator_snp,
+                const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return false;
+  }
+
+  char ch[1];
+  bitset<8> b;
+
+  size_t n_miss, ci_total, ci_test;
+  double d, geno_mean, geno_var;
+
+  size_t ni_test = XWz->size1;
+  size_t ni_total = indicator_idv.size();
+  gsl_vector *geno = gsl_vector_alloc(ni_test);
+
+  int n_bit;
+
+  // Calculate n_bit and c, the number of bit for each snp.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    geno_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    geno_var = 0.0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0;
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(geno, ci_test, 2.0);
+            geno_mean += 2.0;
+            geno_var += 4.0;
+          } else {
+            gsl_vector_set(geno, ci_test, 1.0);
+            geno_mean += 1.0;
+            geno_var += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(geno, ci_test, 0.0);
+          } else {
+            gsl_vector_set(geno, ci_test, -9.0);
+            n_miss++;
+          }
+        }
+
+        ci_test++;
+        ci_total++;
+      }
+    }
+
+    geno_mean /= (double)(ni_test - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_test;
+    geno_var -= geno_mean * geno_mean;
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      d = gsl_vector_get(geno, i);
+      if (d == -9.0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    for (size_t i = 0; i < XWz->size2; i++) {
+      gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i);
+      gsl_blas_ddot(geno, &XWz_col.vector, &d);
+      gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var));
+    }
+
+    ns_test++;
+  }
+  cout << endl;
+
+  gsl_vector_free(geno);
+
+  infile.close();
+  infile.clear();
+
+  return true;
 }
 
 // Read multiple genotype files and compute XWz.
-bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile,
-		 const int display_pace, vector<int> &indicator_idv,
-		 vector<vector<int> > &mindicator_snp, const gsl_matrix *XWz,
-		 gsl_matrix *XtXWz) {
+bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile,
+                const int display_pace, vector<int> &indicator_idv,
+                vector<vector<int>> &mindicator_snp, const gsl_matrix *XWz,
+                gsl_matrix *XtXWz) {
   gsl_matrix_set_zero(XtXWz);
 
-  igzstream infile (file_mfile.c_str(), igzstream::in);
+  igzstream infile(file_mfile.c_str(), igzstream::in);
   if (!infile) {
-    cout<<"error! fail to open mfile file: "<<file_mfile<<endl;
+    cout << "error! fail to open mfile file: " << file_mfile << endl;
     return false;
   }
 
   string file_name;
-  size_t l=0, ns_test=0;
+  size_t l = 0, ns_test = 0;
 
   while (!safeGetline(infile, file_name).eof()) {
-    if (mfile_mode==1) {
-      file_name+=".bed";
-      PlinkXtXwz (file_name, display_pace, indicator_idv, mindicator_snp[l],
-		  XWz, ns_test, XtXWz);
+    if (mfile_mode == 1) {
+      file_name += ".bed";
+      PlinkXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l], XWz,
+                 ns_test, XtXWz);
     } else {
-      BimbamXtXwz (file_name, display_pace, indicator_idv, mindicator_snp[l],
-		   XWz, ns_test, XtXWz);
+      BimbamXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
+                  XWz, ns_test, XtXWz);
     }
 
     l++;
@@ -2506,217 +2711,225 @@ bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile,
 
 // Compute confidence intervals from summary statistics.
 void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz,
-	      const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
-	      const gsl_matrix *Svar_mat, const gsl_vector *w,
-	      const gsl_vector *z, const gsl_vector *s_vec,
-	      const vector<size_t> &vec_cat, const vector<double> &v_pve,
-	      vector<double> &v_se_pve, double &pve_total,
-	      double &se_pve_total, vector<double> &v_sigma2,
-	      vector<double> &v_se_sigma2, vector<double> &v_enrich,
-	      vector<double> &v_se_enrich) {
-  size_t n_vc=XWz->size2, ns_test=w->size, ni_test=XWz->size1;
+              const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
+              const gsl_matrix *Svar_mat, const gsl_vector *w,
+              const gsl_vector *z, const gsl_vector *s_vec,
+              const vector<size_t> &vec_cat, const vector<double> &v_pve,
+              vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
+              vector<double> &v_sigma2, vector<double> &v_se_sigma2,
+              vector<double> &v_enrich, vector<double> &v_se_enrich) {
+  size_t n_vc = XWz->size2, ns_test = w->size, ni_test = XWz->size1;
 
   // Set up matrices.
-  gsl_vector *w_pve=gsl_vector_alloc (ns_test);
-  gsl_vector *wz=gsl_vector_alloc (ns_test);
-  gsl_vector *zwz=gsl_vector_alloc (n_vc);
-  gsl_vector *zz=gsl_vector_alloc (n_vc);
-  gsl_vector *Xz_pve=gsl_vector_alloc (ni_test);
-  gsl_vector *WXtXWz=gsl_vector_alloc (ns_test);
-
-  gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc);
-  gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_vector *w_pve = gsl_vector_alloc(ns_test);
+  gsl_vector *wz = gsl_vector_alloc(ns_test);
+  gsl_vector *zwz = gsl_vector_alloc(n_vc);
+  gsl_vector *zz = gsl_vector_alloc(n_vc);
+  gsl_vector *Xz_pve = gsl_vector_alloc(ni_test);
+  gsl_vector *WXtXWz = gsl_vector_alloc(ns_test);
+
+  gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc);
+  gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
 
   double d, s0, s1, s, s_pve, s_snp;
 
   // Compute wz and zwz.
-  gsl_vector_memcpy (wz, z);
-  gsl_vector_mul (wz, w);
+  gsl_vector_memcpy(wz, z);
+  gsl_vector_mul(wz, w);
 
-  gsl_vector_set_zero (zwz);
-  gsl_vector_set_zero (zz);
-  for (size_t i=0; i<w->size; i++) {
-    d=gsl_vector_get (wz, i)*gsl_vector_get (z, i);
-    d+=gsl_vector_get (zwz, vec_cat[i]);
-    gsl_vector_set (zwz, vec_cat[i], d);
+  gsl_vector_set_zero(zwz);
+  gsl_vector_set_zero(zz);
+  for (size_t i = 0; i < w->size; i++) {
+    d = gsl_vector_get(wz, i) * gsl_vector_get(z, i);
+    d += gsl_vector_get(zwz, vec_cat[i]);
+    gsl_vector_set(zwz, vec_cat[i], d);
 
-    d=gsl_vector_get (z, i)*gsl_vector_get (z, i);
-    d+=gsl_vector_get (zz, vec_cat[i]);
-    gsl_vector_set (zz, vec_cat[i], d);
+    d = gsl_vector_get(z, i) * gsl_vector_get(z, i);
+    d += gsl_vector_get(zz, vec_cat[i]);
+    gsl_vector_set(zz, vec_cat[i], d);
   }
 
   // Compute wz, ve and Xz_pve.
-  gsl_vector_set_zero (Xz_pve); s_pve=0; s_snp=0;
-  for (size_t i=0; i<n_vc; i++) {
-    s_pve+=v_pve[i];
-    s_snp+=gsl_vector_get(s_vec, i);
+  gsl_vector_set_zero(Xz_pve);
+  s_pve = 0;
+  s_snp = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    s_pve += v_pve[i];
+    s_snp += gsl_vector_get(s_vec, i);
 
-    gsl_vector_const_view Xz_col=gsl_matrix_const_column (Xz, i);
-    gsl_blas_daxpy (v_pve[i]/gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve);
+    gsl_vector_const_view Xz_col = gsl_matrix_const_column(Xz, i);
+    gsl_blas_daxpy(v_pve[i] / gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve);
   }
 
   // Set up wpve vector.
-  for (size_t i=0; i<w->size; i++) {
-    d=v_pve[vec_cat[i]]/gsl_vector_get(s_vec, vec_cat[i]);
-    gsl_vector_set (w_pve, i, d);
+  for (size_t i = 0; i < w->size; i++) {
+    d = v_pve[vec_cat[i]] / gsl_vector_get(s_vec, vec_cat[i]);
+    gsl_vector_set(w_pve, i, d);
   }
 
   // Compute Vq (in qvar_mat).
-  s0=1-s_pve;
-  for (size_t i=0; i<n_vc; i++) {
-    s0+=gsl_vector_get (zz, i)*v_pve[i]/gsl_vector_get(s_vec, i);
+  s0 = 1 - s_pve;
+  for (size_t i = 0; i < n_vc; i++) {
+    s0 += gsl_vector_get(zz, i) * v_pve[i] / gsl_vector_get(s_vec, i);
   }
 
-  for (size_t i=0; i<n_vc; i++) {
-    s1=s0;
-    s1-=gsl_vector_get (zwz, i)*(1-s_pve)/gsl_vector_get(s_vec, i);
+  for (size_t i = 0; i < n_vc; i++) {
+    s1 = s0;
+    s1 -= gsl_vector_get(zwz, i) * (1 - s_pve) / gsl_vector_get(s_vec, i);
 
-    gsl_vector_const_view XWz_col1=gsl_matrix_const_column (XWz, i);
-    gsl_vector_const_view XtXWz_col1=gsl_matrix_const_column (XtXWz, i);
+    gsl_vector_const_view XWz_col1 = gsl_matrix_const_column(XWz, i);
+    gsl_vector_const_view XtXWz_col1 = gsl_matrix_const_column(XtXWz, i);
 
-    gsl_vector_memcpy (WXtXWz, &XtXWz_col1.vector);
-    gsl_vector_mul (WXtXWz, w_pve);
+    gsl_vector_memcpy(WXtXWz, &XtXWz_col1.vector);
+    gsl_vector_mul(WXtXWz, w_pve);
 
-    gsl_blas_ddot (Xz_pve, &XWz_col1.vector, &d);
-    s1-=d/gsl_vector_get(s_vec, i);
+    gsl_blas_ddot(Xz_pve, &XWz_col1.vector, &d);
+    s1 -= d / gsl_vector_get(s_vec, i);
 
-    for (size_t j=0; j<n_vc; j++) {
-      s=s1;
+    for (size_t j = 0; j < n_vc; j++) {
+      s = s1;
 
-      s-=gsl_vector_get (zwz, j)*(1-s_pve)/gsl_vector_get(s_vec, j);
+      s -= gsl_vector_get(zwz, j) * (1 - s_pve) / gsl_vector_get(s_vec, j);
 
-      gsl_vector_const_view XWz_col2=gsl_matrix_const_column (XWz, j);
-      gsl_vector_const_view XtXWz_col2=gsl_matrix_const_column (XtXWz, j);
+      gsl_vector_const_view XWz_col2 = gsl_matrix_const_column(XWz, j);
+      gsl_vector_const_view XtXWz_col2 = gsl_matrix_const_column(XtXWz, j);
 
-      gsl_blas_ddot (WXtXWz, &XtXWz_col2.vector, &d);
-      s+=d/(gsl_vector_get(s_vec, i)*gsl_vector_get(s_vec, j));
+      gsl_blas_ddot(WXtXWz, &XtXWz_col2.vector, &d);
+      s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j));
 
-      gsl_blas_ddot (&XWz_col1.vector, &XWz_col2.vector, &d);
-      s+=d/(gsl_vector_get(s_vec, i)*gsl_vector_get(s_vec, j))*(1-s_pve);
+      gsl_blas_ddot(&XWz_col1.vector, &XWz_col2.vector, &d);
+      s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j)) *
+           (1 - s_pve);
 
-      gsl_blas_ddot (Xz_pve, &XWz_col2.vector, &d);
-      s-=d/gsl_vector_get(s_vec, j);
+      gsl_blas_ddot(Xz_pve, &XWz_col2.vector, &d);
+      s -= d / gsl_vector_get(s_vec, j);
 
-      gsl_matrix_set (qvar_mat, i, j, s);
+      gsl_matrix_set(qvar_mat, i, j, s);
     }
   }
 
-  d=(double)(ni_test-1);
-  gsl_matrix_scale (qvar_mat, 2.0/(d*d*d));
+  d = (double)(ni_test - 1);
+  gsl_matrix_scale(qvar_mat, 2.0 / (d * d * d));
 
   // Calculate S^{-1}.
-  gsl_matrix_memcpy (tmp_mat, S_mat);
+  gsl_matrix_memcpy(tmp_mat, S_mat);
   int sig;
-  gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
-  LUDecomp (tmp_mat, pmt, &sig);
-  LUInvert (tmp_mat, pmt, Si_mat);
+  gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
+  LUDecomp(tmp_mat, pmt, &sig);
+  LUInvert(tmp_mat, pmt, Si_mat);
 
   // Calculate variance for the estimates.
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t j=i; j<n_vc; j++) {
-      d=gsl_matrix_get(Svar_mat, i, j);
-      d*=v_pve[i]*v_pve[j];
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t j = i; j < n_vc; j++) {
+      d = gsl_matrix_get(Svar_mat, i, j);
+      d *= v_pve[i] * v_pve[j];
 
-      d+=gsl_matrix_get(qvar_mat, i, j);
+      d += gsl_matrix_get(qvar_mat, i, j);
       gsl_matrix_set(Var_mat, i, j, d);
-      if (i!=j) {gsl_matrix_set(Var_mat, j, i, d);}
+      if (i != j) {
+        gsl_matrix_set(Var_mat, j, i, d);
+      }
     }
   }
 
-  gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Si_mat,Var_mat,0.0,tmp_mat);
-  gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,tmp_mat,Si_mat,0.0,Var_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0,
+                 tmp_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
+                 Var_mat);
 
   // Compute sigma2 per snp, enrich.
-  v_sigma2.clear(); v_enrich.clear();
-  for (size_t i=0; i<n_vc; i++) {
-    v_sigma2.push_back(v_pve[i]/gsl_vector_get(s_vec, i) );
-    v_enrich.push_back(v_pve[i]/gsl_vector_get(s_vec, i)*s_snp/s_pve);
+  v_sigma2.clear();
+  v_enrich.clear();
+  for (size_t i = 0; i < n_vc; i++) {
+    v_sigma2.push_back(v_pve[i] / gsl_vector_get(s_vec, i));
+    v_enrich.push_back(v_pve[i] / gsl_vector_get(s_vec, i) * s_snp / s_pve);
   }
 
   // Compute se_pve, se_sigma2.
-  for (size_t i=0; i<n_vc; i++) {
-    d=sqrt(gsl_matrix_get(Var_mat, i, i));
+  for (size_t i = 0; i < n_vc; i++) {
+    d = sqrt(gsl_matrix_get(Var_mat, i, i));
     v_se_pve.push_back(d);
-    v_se_sigma2.push_back(d/gsl_vector_get(s_vec, i));
+    v_se_sigma2.push_back(d / gsl_vector_get(s_vec, i));
   }
 
   // Compute pve_total, se_pve_total.
-  pve_total=0;
-  for (size_t i=0; i<n_vc; i++) {
-    pve_total+=v_pve[i];
+  pve_total = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    pve_total += v_pve[i];
   }
 
-  se_pve_total=0;
-  for (size_t i=0; i<n_vc; i++) {
-    for (size_t j=0; j<n_vc; j++) {
-      se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+  se_pve_total = 0;
+  for (size_t i = 0; i < n_vc; i++) {
+    for (size_t j = 0; j < n_vc; j++) {
+      se_pve_total += gsl_matrix_get(Var_mat, i, j);
     }
   }
-  se_pve_total=sqrt(se_pve_total);
+  se_pve_total = sqrt(se_pve_total);
 
   // Compute se_enrich.
   gsl_matrix_set_identity(tmp_mat);
 
   double d1;
-  for (size_t i=0; i<n_vc; i++) {
-    d=v_pve[i]/s_pve;
-    d1=gsl_vector_get(s_vec, i);
-    for (size_t j=0; j<n_vc; j++) {
-      if (i==j) {
-	gsl_matrix_set(tmp_mat, i, j, (1-d)/d1*s_snp/s_pve);
+  for (size_t i = 0; i < n_vc; i++) {
+    d = v_pve[i] / s_pve;
+    d1 = gsl_vector_get(s_vec, i);
+    for (size_t j = 0; j < n_vc; j++) {
+      if (i == j) {
+        gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve);
       } else {
-	gsl_matrix_set(tmp_mat, i, j, -1*d/d1*s_snp/s_pve);
+        gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve);
       }
     }
   }
-  gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,tmp_mat,Var_mat,0.0,tmp_mat1);
-  gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,tmp_mat1,tmp_mat,0.0,
-		 VarEnrich_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0,
+                 tmp_mat1);
+  gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0,
+                 VarEnrich_mat);
 
-  for (size_t i=0; i<n_vc; i++) {
-    d=sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
+  for (size_t i = 0; i < n_vc; i++) {
+    d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
     v_se_enrich.push_back(d);
   }
 
-  cout<<"pve = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_pve[i]<<" ";
+  cout << "pve = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_pve[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(pve) = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_se_pve[i]<<" ";
+  cout << "se(pve) = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_se_pve[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"sigma2 per snp = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_sigma2[i]<<" ";
+  cout << "sigma2 per snp = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_sigma2[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(sigma2 per snp) = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_se_sigma2[i]<<" ";
+  cout << "se(sigma2 per snp) = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_se_sigma2[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"enrichment = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_enrich[i]<<" ";
+  cout << "enrichment = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_enrich[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
-  cout<<"se(enrichment) = ";
-  for (size_t i=0; i<n_vc; i++) {
-    cout<<v_se_enrich[i]<<" ";
+  cout << "se(enrichment) = ";
+  for (size_t i = 0; i < n_vc; i++) {
+    cout << v_se_enrich[i] << " ";
   }
-  cout<<endl;
+  cout << endl;
 
   // Delete matrices.
   gsl_matrix_free(Si_mat);
diff --git a/src/vc.h b/src/vc.h
index 43c6979..c6f66b4 100644
--- a/src/vc.h
+++ b/src/vc.h
@@ -19,25 +19,25 @@
 #ifndef __VC_H__
 #define __VC_H__
 
-#include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
 #include "io.h"
+#include "param.h"
 
 using namespace std;
 
 class VC_PARAM {
 
 public:
-	const gsl_matrix *K;
-	const gsl_matrix *W;
-	const gsl_vector *y;
-	gsl_matrix *P;
-	gsl_vector *Py;
-	gsl_matrix *KPy_mat;
-	gsl_matrix *PKPy_mat;
-	gsl_matrix *Hessian;
-	bool noconstrain;
+  const gsl_matrix *K;
+  const gsl_matrix *W;
+  const gsl_vector *y;
+  gsl_matrix *P;
+  gsl_vector *Py;
+  gsl_matrix *KPy_mat;
+  gsl_matrix *PKPy_mat;
+  gsl_matrix *Hessian;
+  bool noconstrain;
 };
 
 class VC {
@@ -45,91 +45,87 @@ class VC {
 public:
   // IO-related parameters
   size_t a_mode;
-	string file_cat;
-	string file_beta;
-	string file_cor;
-	string file_mq;
-	string file_ms;
-
-	string file_out;
-	string path_out;
-
-	set<string> setSnps;
-
-	size_t ni_total_ref, ns_total_ref, ns_pair;
-	size_t ni_total, ns_total, ns_test;
-	size_t n_vc;
-
-	double pve_total, se_pve_total;
-	vector<double> v_sigma2;
-	vector<double> v_se_sigma2;
-	vector<double> v_pve;
-	vector<double> v_se_pve;
-	vector<double> v_traceG;
-	vector<double> v_beta;
-	vector<double> v_se_beta;
-
-	size_t crt;
-	double window_cm, window_bp, window_ns;
-
-	double time_UtX;
-	double time_opt;
-
-	// Main functions.
-	void CopyFromParam (PARAM &cPar);
-	void CopyToParam (PARAM &cPar);
-	void WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec,
-			   const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
-			   const gsl_matrix *Svar_mat);
-	void CalcVChe (const gsl_matrix *K, const gsl_matrix *W,
-		       const gsl_vector *y);
-	void CalcVCreml (const bool noconstrain, const gsl_matrix *K,
-			 const gsl_matrix *W, const gsl_vector *y);
-	void CalcVCacl (const gsl_matrix *K, const gsl_matrix *W,
-			const gsl_vector *y);
+  string file_cat;
+  string file_beta;
+  string file_cor;
+  string file_mq;
+  string file_ms;
+
+  string file_out;
+  string path_out;
+
+  set<string> setSnps;
+
+  size_t ni_total_ref, ns_total_ref, ns_pair;
+  size_t ni_total, ns_total, ns_test;
+  size_t n_vc;
+
+  double pve_total, se_pve_total;
+  vector<double> v_sigma2;
+  vector<double> v_se_sigma2;
+  vector<double> v_pve;
+  vector<double> v_se_pve;
+  vector<double> v_traceG;
+  vector<double> v_beta;
+  vector<double> v_se_beta;
+
+  size_t crt;
+  double window_cm, window_bp, window_ns;
+
+  double time_UtX;
+  double time_opt;
+
+  // Main functions.
+  void CopyFromParam(PARAM &cPar);
+  void CopyToParam(PARAM &cPar);
+  void WriteFile_qs(const gsl_vector *s_vec, const gsl_vector *q_vec,
+                    const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
+                    const gsl_matrix *Svar_mat);
+  void CalcVChe(const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y);
+  void CalcVCreml(const bool noconstrain, const gsl_matrix *K,
+                  const gsl_matrix *W, const gsl_vector *y);
+  void CalcVCacl(const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y);
 };
 
 void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
-	      const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
-	      const gsl_vector *s_vec, const double df, vector<double> &v_pve,
-	      vector<double> &v_se_pve, double &pve_total,
-	      double &se_pve_total, vector<double> &v_sigma2,
-	      vector<double> &v_se_sigma2, vector<double> &v_enrich,
-	      vector<double> &v_se_enrich);
-
-bool BimbamXwz (const string &file_geno, const int display_pace,
-		vector<int> &indicator_idv, vector<int> &indicator_snp,
-		const vector<size_t> &vec_cat, const gsl_vector *w,
-		const gsl_vector *z, size_t ns_test, gsl_matrix *XWz);
-bool PlinkXwz (const string &file_bed, const int display_pace,
-	       vector<int> &indicator_idv, vector<int> &indicator_snp,
-	       const vector<size_t> &vec_cat, const gsl_vector *w,
-	       const gsl_vector *z, size_t ns_test, gsl_matrix *XWz);
-bool MFILEXwz (const size_t mfile_mode, const string &file_mfile,
-	       const int display_pace, vector<int> &indicator_idv,
-	       vector<vector<int> > &mindicator_snp,
-	       const vector<size_t> &vec_cat, const gsl_vector *w,
-	       const gsl_vector *z, gsl_matrix *XWz);
-
-bool BimbamXtXwz (const string &file_geno, const int display_pace,
-		  vector<int> &indicator_idv, vector<int> &indicator_snp,
-		  const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz);
-bool PlinkXtXwz (const string &file_bed, const int display_pace,
-		 vector<int> &indicator_idv, vector<int> &indicator_snp,
-		 const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz);
-bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile,
-		 const int display_pace, vector<int> &indicator_idv,
-		 vector<vector<int> > &mindicator_snp, const gsl_matrix *XWz,
-		 gsl_matrix *XtXWz);
+              const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
+              const gsl_vector *s_vec, const double df, vector<double> &v_pve,
+              vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
+              vector<double> &v_sigma2, vector<double> &v_se_sigma2,
+              vector<double> &v_enrich, vector<double> &v_se_enrich);
+
+bool BimbamXwz(const string &file_geno, const int display_pace,
+               vector<int> &indicator_idv, vector<int> &indicator_snp,
+               const vector<size_t> &vec_cat, const gsl_vector *w,
+               const gsl_vector *z, size_t ns_test, gsl_matrix *XWz);
+bool PlinkXwz(const string &file_bed, const int display_pace,
+              vector<int> &indicator_idv, vector<int> &indicator_snp,
+              const vector<size_t> &vec_cat, const gsl_vector *w,
+              const gsl_vector *z, size_t ns_test, gsl_matrix *XWz);
+bool MFILEXwz(const size_t mfile_mode, const string &file_mfile,
+              const int display_pace, vector<int> &indicator_idv,
+              vector<vector<int>> &mindicator_snp,
+              const vector<size_t> &vec_cat, const gsl_vector *w,
+              const gsl_vector *z, gsl_matrix *XWz);
+
+bool BimbamXtXwz(const string &file_geno, const int display_pace,
+                 vector<int> &indicator_idv, vector<int> &indicator_snp,
+                 const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz);
+bool PlinkXtXwz(const string &file_bed, const int display_pace,
+                vector<int> &indicator_idv, vector<int> &indicator_snp,
+                const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz);
+bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile,
+                const int display_pace, vector<int> &indicator_idv,
+                vector<vector<int>> &mindicator_snp, const gsl_matrix *XWz,
+                gsl_matrix *XtXWz);
 
 void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz,
-	      const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
-	      const gsl_matrix *Svar_mat, const gsl_vector *w,
-	      const gsl_vector *z, const gsl_vector *s_vec,
-	      const vector<size_t> &vec_cat, const vector<double> &v_pve,
-	      vector<double> &v_se_pve, double &pve_total,
-	      double &se_pve_total, vector<double> &v_sigma2,
-	      vector<double> &v_se_sigma2, vector<double> &v_enrich,
-	      vector<double> &v_se_enrich);
+              const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
+              const gsl_matrix *Svar_mat, const gsl_vector *w,
+              const gsl_vector *z, const gsl_vector *s_vec,
+              const vector<size_t> &vec_cat, const vector<double> &v_pve,
+              vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
+              vector<double> &v_sigma2, vector<double> &v_se_sigma2,
+              vector<double> &v_enrich, vector<double> &v_se_enrich);
 
 #endif