about summary refs log tree commit diff
path: root/src/mvlmm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mvlmm.cpp')
-rw-r--r--src/mvlmm.cpp10159
1 files changed, 5101 insertions, 5058 deletions
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index 78cd926..eb591ca 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -16,895 +16,914 @@
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-#include <iomanip>
+#include <assert.h>
+#include <bitset>
 #include <cmath>
+#include <cstring>
+#include <iomanip>
 #include <iostream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <bitset>
-#include <cstring>
-#include <assert.h>
 
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
-#include "gsl/gsl_roots.h"
-#include "gsl/gsl_min.h"
 #include "gsl/gsl_integration.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_min.h"
+#include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
 
-#include "io.h"
-#include "lapack.h"
 #include "eigenlib.h"
 #include "gzstream.h"
+#include "io.h"
+#include "lapack.h"
 #include "lmm.h"
 #include "mvlmm.h"
 
 using namespace std;
 
 // In this file, X, Y are already transformed (i.e. UtX and UtY).
-void MVLMM::CopyFromParam (PARAM &cPar) {
-	a_mode=cPar.a_mode;
-	d_pace=cPar.d_pace;
-
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_oxford=cPar.file_oxford;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
-
-	l_min=cPar.l_min;
-	l_max=cPar.l_max;
-	n_region=cPar.n_region;
-	p_nr=cPar.p_nr;
-	em_iter=cPar.em_iter;
-	nr_iter=cPar.nr_iter;
-	em_prec=cPar.em_prec;
-	nr_prec=cPar.nr_prec;
-	crt=cPar.crt;
-
-	Vg_remle_null=cPar.Vg_remle_null;
-	Ve_remle_null=cPar.Ve_remle_null;
-	Vg_mle_null=cPar.Vg_mle_null;
-	Ve_mle_null=cPar.Ve_mle_null;
-
-	time_UtX=0.0;
-	time_opt=0.0;
-
-	ni_total=cPar.ni_total;
-	ns_total=cPar.ns_total;
-	ni_test=cPar.ni_test;
-	ns_test=cPar.ns_test;
-	n_cvt=cPar.n_cvt;
-
-	n_ph=cPar.n_ph;
-
-	indicator_idv=cPar.indicator_idv;
-	indicator_snp=cPar.indicator_snp;
-	snpInfo=cPar.snpInfo;
-
-	return;
+void MVLMM::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
+  d_pace = cPar.d_pace;
+
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_oxford = cPar.file_oxford;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
+
+  l_min = cPar.l_min;
+  l_max = cPar.l_max;
+  n_region = cPar.n_region;
+  p_nr = cPar.p_nr;
+  em_iter = cPar.em_iter;
+  nr_iter = cPar.nr_iter;
+  em_prec = cPar.em_prec;
+  nr_prec = cPar.nr_prec;
+  crt = cPar.crt;
+
+  Vg_remle_null = cPar.Vg_remle_null;
+  Ve_remle_null = cPar.Ve_remle_null;
+  Vg_mle_null = cPar.Vg_mle_null;
+  Ve_mle_null = cPar.Ve_mle_null;
+
+  time_UtX = 0.0;
+  time_opt = 0.0;
+
+  ni_total = cPar.ni_total;
+  ns_total = cPar.ns_total;
+  ni_test = cPar.ni_test;
+  ns_test = cPar.ns_test;
+  n_cvt = cPar.n_cvt;
+
+  n_ph = cPar.n_ph;
+
+  indicator_idv = cPar.indicator_idv;
+  indicator_snp = cPar.indicator_snp;
+  snpInfo = cPar.snpInfo;
+
+  return;
 }
 
-void MVLMM::CopyToParam (PARAM &cPar) {
-	cPar.time_UtX=time_UtX;
-	cPar.time_opt=time_opt;
+void MVLMM::CopyToParam(PARAM &cPar) {
+  cPar.time_UtX = time_UtX;
+  cPar.time_opt = time_opt;
 
-	cPar.Vg_remle_null=Vg_remle_null;
-	cPar.Ve_remle_null=Ve_remle_null;
-	cPar.Vg_mle_null=Vg_mle_null;
-	cPar.Ve_mle_null=Ve_mle_null;
+  cPar.Vg_remle_null = Vg_remle_null;
+  cPar.Ve_remle_null = Ve_remle_null;
+  cPar.Vg_mle_null = Vg_mle_null;
+  cPar.Ve_mle_null = Ve_mle_null;
 
-	cPar.VVg_remle_null=VVg_remle_null;
-	cPar.VVe_remle_null=VVe_remle_null;
-	cPar.VVg_mle_null=VVg_mle_null;
-	cPar.VVe_mle_null=VVe_mle_null;
+  cPar.VVg_remle_null = VVg_remle_null;
+  cPar.VVe_remle_null = VVe_remle_null;
+  cPar.VVg_mle_null = VVg_mle_null;
+  cPar.VVe_mle_null = VVe_mle_null;
 
-	cPar.beta_remle_null=beta_remle_null;
-	cPar.se_beta_remle_null=se_beta_remle_null;
-	cPar.beta_mle_null=beta_mle_null;
-	cPar.se_beta_mle_null=se_beta_mle_null;
+  cPar.beta_remle_null = beta_remle_null;
+  cPar.se_beta_remle_null = se_beta_remle_null;
+  cPar.beta_mle_null = beta_mle_null;
+  cPar.se_beta_mle_null = se_beta_mle_null;
 
-	cPar.logl_remle_H0=logl_remle_H0;
-	cPar.logl_mle_H0=logl_mle_H0;
-	return;
+  cPar.logl_remle_H0 = logl_remle_H0;
+  cPar.logl_mle_H0 = logl_mle_H0;
+  return;
 }
 
-void MVLMM::WriteFiles () {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".assoc.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl; 
-	  return;
-	}
-
-	outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"
-	       <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
-	for (size_t i=0; i<n_ph; i++) {
-		outfile<<"beta_"<<i+1<<"\t";
-	}
-	for (size_t i=0; i<n_ph; i++) {
-		for (size_t j=i; j<n_ph; j++) {
-			outfile<<"Vbeta_"<<i+1<<"_"<<j+1<<"\t";
-		}
-	}
-
-	if (a_mode==1) {
-		outfile<<"p_wald"<<endl;
-	} else if (a_mode==2) {
-		outfile<<"p_lrt"<<endl;
-	} else if (a_mode==3) {
-		outfile<<"p_score"<<endl;
-	} else if (a_mode==4) {
-		outfile<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
-	} else {}
-
-
-	size_t t=0, c=0;
-	for (size_t i=0; i<snpInfo.size(); ++i) {
-		if (indicator_snp[i]==0) {continue;}
-
-		outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
-		       <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<
-		  "\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<
-		  fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
-		outfile<<scientific<<setprecision(6);
-
-		for (size_t i=0; i<n_ph; i++) {
-			outfile<<sumStat[t].v_beta[i]<<"\t";
-		}
-
-		c=0;
-		for (size_t i=0; i<n_ph; i++) {
-			for (size_t j=i; j<n_ph; j++) {
-				outfile<<sumStat[t].v_Vbeta[c]<<"\t";
-				c++;
-			}
-		}
-
-		if (a_mode==1) {
-			outfile<<sumStat[t].p_wald <<endl;
-		} else if (a_mode==2) {
-			outfile<<sumStat[t].p_lrt<<endl;
-		} else if (a_mode==3) {
-			outfile<<sumStat[t].p_score<<endl;
-		} else if (a_mode==4) {
-			outfile<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<
-			  "\t"<<sumStat[t].p_score<<endl;
-		} else {}
-
-		t++;
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+void MVLMM::WriteFiles() {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".assoc.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  outfile << "chr"
+          << "\t"
+          << "rs"
+          << "\t"
+          << "ps"
+          << "\t"
+          << "n_miss"
+          << "\t"
+          << "allele1"
+          << "\t"
+          << "allele0"
+          << "\t"
+          << "af"
+          << "\t";
+
+  for (size_t i = 0; i < n_ph; i++) {
+    outfile << "beta_" << i + 1 << "\t";
+  }
+  for (size_t i = 0; i < n_ph; i++) {
+    for (size_t j = i; j < n_ph; j++) {
+      outfile << "Vbeta_" << i + 1 << "_" << j + 1 << "\t";
+    }
+  }
+
+  if (a_mode == 1) {
+    outfile << "p_wald" << endl;
+  } else if (a_mode == 2) {
+    outfile << "p_lrt" << endl;
+  } else if (a_mode == 3) {
+    outfile << "p_score" << endl;
+  } else if (a_mode == 4) {
+    outfile << "p_wald"
+            << "\t"
+            << "p_lrt"
+            << "\t"
+            << "p_score" << endl;
+  } else {
+  }
+
+  size_t t = 0, c = 0;
+  for (size_t i = 0; i < snpInfo.size(); ++i) {
+    if (indicator_snp[i] == 0) {
+      continue;
+    }
+
+    outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+            << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
+            << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" << fixed
+            << setprecision(3) << snpInfo[i].maf << "\t";
+
+    outfile << scientific << setprecision(6);
+
+    for (size_t i = 0; i < n_ph; i++) {
+      outfile << sumStat[t].v_beta[i] << "\t";
+    }
+
+    c = 0;
+    for (size_t i = 0; i < n_ph; i++) {
+      for (size_t j = i; j < n_ph; j++) {
+        outfile << sumStat[t].v_Vbeta[c] << "\t";
+        c++;
+      }
+    }
+
+    if (a_mode == 1) {
+      outfile << sumStat[t].p_wald << endl;
+    } else if (a_mode == 2) {
+      outfile << sumStat[t].p_lrt << endl;
+    } else if (a_mode == 3) {
+      outfile << sumStat[t].p_score << endl;
+    } else if (a_mode == 4) {
+      outfile << sumStat[t].p_wald << "\t" << sumStat[t].p_lrt << "\t"
+              << sumStat[t].p_score << endl;
+    } else {
+    }
+
+    t++;
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
 // Below are functions for EM algorithm.
-double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, 
-		  gsl_vector *D_l, gsl_matrix *UltVeh, 
-		  gsl_matrix *UltVehi) {
-	size_t d_size=V_g->size1;
-	double d, logdet_Ve=0.0;
-
-	// Eigen decomposition of V_e.
-	gsl_matrix *Lambda=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_temp=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_h=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_hi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *VgVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *U_l=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_matrix_memcpy(V_e_temp, V_e);
-	EigenDecomp(V_e_temp, U_l, D_l, 0);
-
-	// Calculate V_e_h and V_e_hi.
-	gsl_matrix_set_zero(V_e_h);
-	gsl_matrix_set_zero(V_e_hi);
-	for (size_t i=0; i<d_size; i++) {
-		d=gsl_vector_get (D_l, i);
-		if (d<=0) {continue;}
-		logdet_Ve+=log(d);
-
-		gsl_vector_view U_col=gsl_matrix_column(U_l, i);
-		d=sqrt(d);
-		gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_h);
-		d=1.0/d;
-		gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_hi);
-	}
-
-	// Copy the upper part to lower part.
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<i; j++) {
-		  gsl_matrix_set (V_e_h, i, j, gsl_matrix_get(V_e_h, j, i));
-		  gsl_matrix_set (V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i));
-		}
-	}
-
-	// Calculate Lambda=V_ehi V_g V_ehi.
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,V_g,V_e_hi,0.0,VgVehi);
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,V_e_hi,VgVehi,0.0,Lambda);
-
-	// Eigen decomposition of Lambda.
-	EigenDecomp(Lambda, U_l, D_l, 0);
-
-	for (size_t i=0; i<d_size; i++) {
-	  d=gsl_vector_get (D_l, i);
-	  if (d<0) {gsl_vector_set (D_l, i, 0);}
-	}
-
-	// Calculate UltVeh and UltVehi.
-	gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,U_l,V_e_h,0.0,UltVeh);
-	gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,U_l,V_e_hi,0.0,UltVehi);
-
-	//free memory
-	gsl_matrix_free (Lambda);
-	gsl_matrix_free (V_e_temp);
-	gsl_matrix_free (V_e_h);
-	gsl_matrix_free (V_e_hi);
-	gsl_matrix_free (VgVehi);
-	gsl_matrix_free (U_l);
-
-	return logdet_Ve;
+double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
+                 gsl_matrix *UltVeh, gsl_matrix *UltVehi) {
+  size_t d_size = V_g->size1;
+  double d, logdet_Ve = 0.0;
+
+  // Eigen decomposition of V_e.
+  gsl_matrix *Lambda = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_temp = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_h = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_hi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *VgVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *U_l = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_matrix_memcpy(V_e_temp, V_e);
+  EigenDecomp(V_e_temp, U_l, D_l, 0);
+
+  // Calculate V_e_h and V_e_hi.
+  gsl_matrix_set_zero(V_e_h);
+  gsl_matrix_set_zero(V_e_hi);
+  for (size_t i = 0; i < d_size; i++) {
+    d = gsl_vector_get(D_l, i);
+    if (d <= 0) {
+      continue;
+    }
+    logdet_Ve += log(d);
+
+    gsl_vector_view U_col = gsl_matrix_column(U_l, i);
+    d = sqrt(d);
+    gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_h);
+    d = 1.0 / d;
+    gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_hi);
+  }
+
+  // Copy the upper part to lower part.
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < i; j++) {
+      gsl_matrix_set(V_e_h, i, j, gsl_matrix_get(V_e_h, j, i));
+      gsl_matrix_set(V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i));
+    }
+  }
+
+  // Calculate Lambda=V_ehi V_g V_ehi.
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_g, V_e_hi, 0.0, VgVehi);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_e_hi, VgVehi, 0.0, Lambda);
+
+  // Eigen decomposition of Lambda.
+  EigenDecomp(Lambda, U_l, D_l, 0);
+
+  for (size_t i = 0; i < d_size; i++) {
+    d = gsl_vector_get(D_l, i);
+    if (d < 0) {
+      gsl_vector_set(D_l, i, 0);
+    }
+  }
+
+  // Calculate UltVeh and UltVehi.
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_h, 0.0, UltVeh);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_hi, 0.0, UltVehi);
+
+  // free memory
+  gsl_matrix_free(Lambda);
+  gsl_matrix_free(V_e_temp);
+  gsl_matrix_free(V_e_h);
+  gsl_matrix_free(V_e_hi);
+  gsl_matrix_free(VgVehi);
+  gsl_matrix_free(U_l);
+
+  return logdet_Ve;
 }
 
-//Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}.
-double CalcQi (const gsl_vector *eval, const gsl_vector *D_l, 
-	       const gsl_matrix *X, gsl_matrix *Qi) {
-	size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1;
-	size_t c_size=dc_size/d_size;
-
-	double delta, dl, d1, d2, d, logdet_Q;
-
-	gsl_matrix *Q=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix_set_zero (Q);
-
-	for (size_t i=0; i<c_size; i++) {
-		for (size_t j=0; j<c_size; j++) {
-			for (size_t l=0; l<d_size; l++) {
-				dl=gsl_vector_get(D_l, l);
-
-				if (j<i) {
-				  d=gsl_matrix_get (Q, j*d_size+l, i*d_size+l);
-				} else {
-					d=0.0;
-					for (size_t k=0; k<n_size; k++) {
-					  d1=gsl_matrix_get(X, i, k);
-					  d2=gsl_matrix_get(X, j, k);
-					  delta=gsl_vector_get(eval, k);
-					  d+=d1*d2/(dl*delta+1.0);
-					}
-				}
-
-				gsl_matrix_set (Q, i*d_size+l, j*d_size+l, d);
-			}
-		}
-	}
-
-	// Calculate LU decomposition of Q, and invert Q and calculate |Q|.
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (dc_size);
-	LUDecomp (Q, pmt, &sig);
-	LUInvert (Q, pmt, Qi);
-
-	logdet_Q=LULndet (Q);
-
-	gsl_matrix_free (Q);
-	gsl_permutation_free (pmt);
-
-	return logdet_Q;
+// Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}.
+double CalcQi(const gsl_vector *eval, const gsl_vector *D_l,
+              const gsl_matrix *X, gsl_matrix *Qi) {
+  size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1;
+  size_t c_size = dc_size / d_size;
+
+  double delta, dl, d1, d2, d, logdet_Q;
+
+  gsl_matrix *Q = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix_set_zero(Q);
+
+  for (size_t i = 0; i < c_size; i++) {
+    for (size_t j = 0; j < c_size; j++) {
+      for (size_t l = 0; l < d_size; l++) {
+        dl = gsl_vector_get(D_l, l);
+
+        if (j < i) {
+          d = gsl_matrix_get(Q, j * d_size + l, i * d_size + l);
+        } else {
+          d = 0.0;
+          for (size_t k = 0; k < n_size; k++) {
+            d1 = gsl_matrix_get(X, i, k);
+            d2 = gsl_matrix_get(X, j, k);
+            delta = gsl_vector_get(eval, k);
+            d += d1 * d2 / (dl * delta + 1.0);
+          }
+        }
+
+        gsl_matrix_set(Q, i * d_size + l, j * d_size + l, d);
+      }
+    }
+  }
+
+  // Calculate LU decomposition of Q, and invert Q and calculate |Q|.
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(dc_size);
+  LUDecomp(Q, pmt, &sig);
+  LUInvert(Q, pmt, Qi);
+
+  logdet_Q = LULndet(Q);
+
+  gsl_matrix_free(Q);
+  gsl_permutation_free(pmt);
+
+  return logdet_Q;
 }
 
 // xHiy=\sum_{k=1}^n x_k\otimes ((delta_k*Dl+I)^{-1}Ul^TVe^{-1/2}y.
-void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l, 
-	      const gsl_matrix *X, const gsl_matrix *UltVehiY, 
-	      gsl_vector *xHiy) {
-	size_t n_size=eval->size, c_size=X->size1, d_size=D_l->size;
-
-	gsl_vector_set_zero (xHiy);
-
-	double x, delta, dl, y, d;
-	for (size_t i=0; i<d_size; i++) {
-		dl=gsl_vector_get(D_l, i);
-		for (size_t j=0; j<c_size; j++) {
-			d=0.0;
-			for (size_t k=0; k<n_size; k++) {
-				x=gsl_matrix_get(X, j, k);
-				y=gsl_matrix_get(UltVehiY, i, k);
-				delta=gsl_vector_get(eval, k);
-				d+=x*y/(delta*dl+1.0);
-			}
-			gsl_vector_set(xHiy, j*d_size+i, d);
-		}
-	}
-
-	return;
-}
+void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l,
+              const gsl_matrix *X, const gsl_matrix *UltVehiY,
+              gsl_vector *xHiy) {
+  size_t n_size = eval->size, c_size = X->size1, d_size = D_l->size;
+
+  gsl_vector_set_zero(xHiy);
+
+  double x, delta, dl, y, d;
+  for (size_t i = 0; i < d_size; i++) {
+    dl = gsl_vector_get(D_l, i);
+    for (size_t j = 0; j < c_size; j++) {
+      d = 0.0;
+      for (size_t k = 0; k < n_size; k++) {
+        x = gsl_matrix_get(X, j, k);
+        y = gsl_matrix_get(UltVehiY, i, k);
+        delta = gsl_vector_get(eval, k);
+        d += x * y / (delta * dl + 1.0);
+      }
+      gsl_vector_set(xHiy, j * d_size + i, d);
+    }
+  }
 
+  return;
+}
 
 // OmegaU=D_l/(delta Dl+I)^{-1}
 // OmegaE=delta D_l/(delta Dl+I)^{-1}
-void CalcOmega (const gsl_vector *eval, const gsl_vector *D_l, 
-gsl_matrix *OmegaU, gsl_matrix *OmegaE) {
-	size_t n_size=eval->size, d_size=D_l->size;
-	double delta, dl, d_u, d_e;
+void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l,
+               gsl_matrix *OmegaU, gsl_matrix *OmegaE) {
+  size_t n_size = eval->size, d_size = D_l->size;
+  double delta, dl, d_u, d_e;
 
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get(eval, k);
-		for (size_t i=0; i<d_size; i++) {
-			dl=gsl_vector_get(D_l, i);
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    for (size_t i = 0; i < d_size; i++) {
+      dl = gsl_vector_get(D_l, i);
 
-			d_u=dl/(delta*dl+1.0);
-			d_e=delta*d_u;
+      d_u = dl / (delta * dl + 1.0);
+      d_e = delta * d_u;
 
-			gsl_matrix_set(OmegaU, i, k, d_u);
-			gsl_matrix_set(OmegaE, i, k, d_e);
-		}
-	}
+      gsl_matrix_set(OmegaU, i, k, d_u);
+      gsl_matrix_set(OmegaE, i, k, d_e);
+    }
+  }
 
-	return;
+  return;
 }
 
-void UpdateU (const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY, 
-	      const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) {
-	gsl_matrix_memcpy (UltVehiU, UltVehiY);
-	gsl_matrix_sub (UltVehiU, UltVehiBX);
+void UpdateU(const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY,
+             const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) {
+  gsl_matrix_memcpy(UltVehiU, UltVehiY);
+  gsl_matrix_sub(UltVehiU, UltVehiBX);
 
-	gsl_matrix_mul_elements (UltVehiU, OmegaE);
-	return;
+  gsl_matrix_mul_elements(UltVehiU, OmegaE);
+  return;
 }
 
-void UpdateE (const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX, 
-	      const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) {
-	gsl_matrix_memcpy (UltVehiE, UltVehiY);
-	gsl_matrix_sub (UltVehiE, UltVehiBX);
-	gsl_matrix_sub (UltVehiE, UltVehiU);
+void UpdateE(const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX,
+             const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) {
+  gsl_matrix_memcpy(UltVehiE, UltVehiY);
+  gsl_matrix_sub(UltVehiE, UltVehiBX);
+  gsl_matrix_sub(UltVehiE, UltVehiU);
 
-	return;
+  return;
 }
 
-void UpdateL_B (const gsl_matrix *X, const gsl_matrix *XXti, 
-		const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU, 
-		gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) {
-	size_t c_size=X->size1, d_size=UltVehiY->size1;
+void UpdateL_B(const gsl_matrix *X, const gsl_matrix *XXti,
+               const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU,
+               gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) {
+  size_t c_size = X->size1, d_size = UltVehiY->size1;
 
-	gsl_matrix *YUX=gsl_matrix_alloc (d_size, c_size);
+  gsl_matrix *YUX = gsl_matrix_alloc(d_size, c_size);
 
-	gsl_matrix_memcpy (UltVehiBX, UltVehiY);
-	gsl_matrix_sub (UltVehiBX, UltVehiU);
+  gsl_matrix_memcpy(UltVehiBX, UltVehiY);
+  gsl_matrix_sub(UltVehiBX, UltVehiU);
 
-	gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,UltVehiBX,X,0.0,YUX);
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,YUX,XXti,0.0,UltVehiB);
+  gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, UltVehiBX, X, 0.0, YUX);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, YUX, XXti, 0.0, UltVehiB);
 
-	gsl_matrix_free(YUX);
+  gsl_matrix_free(YUX);
 
-	return;
+  return;
 }
 
-void UpdateRL_B (const gsl_vector *xHiy, const gsl_matrix *Qi, 
-		 gsl_matrix *UltVehiB) {
-	size_t d_size=UltVehiB->size1, c_size=UltVehiB->size2, 
-	  dc_size=Qi->size1;
+void UpdateRL_B(const gsl_vector *xHiy, const gsl_matrix *Qi,
+                gsl_matrix *UltVehiB) {
+  size_t d_size = UltVehiB->size1, c_size = UltVehiB->size2,
+         dc_size = Qi->size1;
 
-	gsl_vector *b=gsl_vector_alloc (dc_size);
+  gsl_vector *b = gsl_vector_alloc(dc_size);
 
-	// Calculate b=Qiv.
-	gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b);
+  // Calculate b=Qiv.
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b);
 
-	// Copy b to UltVehiB.
-	for (size_t i=0; i<c_size; i++) {
-		gsl_vector_view UltVehiB_col=gsl_matrix_column (UltVehiB, i);
-		gsl_vector_const_view b_subcol=
-		  gsl_vector_const_subvector (b, i*d_size, d_size);
-		gsl_vector_memcpy (&UltVehiB_col.vector, &b_subcol.vector);
-	}
+  // Copy b to UltVehiB.
+  for (size_t i = 0; i < c_size; i++) {
+    gsl_vector_view UltVehiB_col = gsl_matrix_column(UltVehiB, i);
+    gsl_vector_const_view b_subcol =
+        gsl_vector_const_subvector(b, i * d_size, d_size);
+    gsl_vector_memcpy(&UltVehiB_col.vector, &b_subcol.vector);
+  }
 
-	gsl_vector_free(b);
+  gsl_vector_free(b);
 
-	return;
+  return;
 }
 
-void UpdateV (const gsl_vector *eval, const gsl_matrix *U, 
-	      const gsl_matrix *E, const gsl_matrix *Sigma_uu, 
-	      const gsl_matrix *Sigma_ee, gsl_matrix *V_g, gsl_matrix *V_e) {
-	size_t n_size=eval->size, d_size=U->size1;
+void UpdateV(const gsl_vector *eval, const gsl_matrix *U, const gsl_matrix *E,
+             const gsl_matrix *Sigma_uu, const gsl_matrix *Sigma_ee,
+             gsl_matrix *V_g, gsl_matrix *V_e) {
+  size_t n_size = eval->size, d_size = U->size1;
 
-	gsl_matrix_set_zero (V_g);
-	gsl_matrix_set_zero (V_e);
+  gsl_matrix_set_zero(V_g);
+  gsl_matrix_set_zero(V_e);
 
-	double delta;
+  double delta;
 
-	// Calculate the first part: UD^{-1}U^T and EE^T.
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-		if (delta==0) {continue;}
+  // Calculate the first part: UD^{-1}U^T and EE^T.
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    if (delta == 0) {
+      continue;
+    }
 
-		gsl_vector_const_view U_col=gsl_matrix_const_column (U, k);
-		gsl_blas_dsyr (CblasUpper, 1.0/delta, &U_col.vector, V_g);
-	}
+    gsl_vector_const_view U_col = gsl_matrix_const_column(U, k);
+    gsl_blas_dsyr(CblasUpper, 1.0 / delta, &U_col.vector, V_g);
+  }
 
-	gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
+  gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
 
-	// Copy the upper part to lower part.
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<i; j++) {
-			gsl_matrix_set (V_g, i, j, gsl_matrix_get(V_g, j, i));
-			gsl_matrix_set (V_e, i, j, gsl_matrix_get(V_e, j, i));
-		}
-	}
+  // Copy the upper part to lower part.
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < i; j++) {
+      gsl_matrix_set(V_g, i, j, gsl_matrix_get(V_g, j, i));
+      gsl_matrix_set(V_e, i, j, gsl_matrix_get(V_e, j, i));
+    }
+  }
 
-	// Add Sigma.
-	gsl_matrix_add (V_g, Sigma_uu);
-	gsl_matrix_add (V_e, Sigma_ee);
+  // Add Sigma.
+  gsl_matrix_add(V_g, Sigma_uu);
+  gsl_matrix_add(V_e, Sigma_ee);
 
-	// Scale by 1/n.
-	gsl_matrix_scale (V_g, 1.0/(double)n_size);
-	gsl_matrix_scale (V_e, 1.0/(double)n_size);
+  // Scale by 1/n.
+  gsl_matrix_scale(V_g, 1.0 / (double)n_size);
+  gsl_matrix_scale(V_e, 1.0 / (double)n_size);
 
-	return;
+  return;
 }
 
-void CalcSigma (const char func_name, const gsl_vector *eval, 
-		const gsl_vector *D_l, const gsl_matrix *X, 
-		const gsl_matrix *OmegaU, const gsl_matrix *OmegaE, 
-		const gsl_matrix *UltVeh, const gsl_matrix *Qi, 
-		gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) {
-	if (func_name!='R' && func_name!='L' && func_name!='r' && 
-	    func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
-	    "likelihood, 'L' for log-likelihood."<<endl; 
-	  return;
-	}
-
-	size_t n_size=eval->size, c_size=X->size1;
-	size_t d_size=D_l->size, dc_size=Qi->size1;
-
-	gsl_matrix_set_zero(Sigma_uu);
-	gsl_matrix_set_zero(Sigma_ee);
-
-	double delta, dl, x, d;
-
-	// Calculate the first diagonal term.
-	gsl_vector_view Suu_diag=gsl_matrix_diagonal (Sigma_uu);
-	gsl_vector_view See_diag=gsl_matrix_diagonal (Sigma_ee);
-
-	for (size_t k=0; k<n_size; k++) {
-	  gsl_vector_const_view OmegaU_col=gsl_matrix_const_column (OmegaU, k);
-	  gsl_vector_const_view OmegaE_col=gsl_matrix_const_column (OmegaE, k);
-
-	  gsl_vector_add (&Suu_diag.vector, &OmegaU_col.vector);
-	  gsl_vector_add (&See_diag.vector, &OmegaE_col.vector);
-	}
-
-	// Calculate the second term for REML.
-	if (func_name=='R' || func_name=='r') {
-		gsl_matrix *M_u=gsl_matrix_alloc(dc_size, d_size);
-		gsl_matrix *M_e=gsl_matrix_alloc(dc_size, d_size);
-		gsl_matrix *QiM=gsl_matrix_alloc(dc_size, d_size);
-
-		gsl_matrix_set_zero(M_u);
-		gsl_matrix_set_zero(M_e);
-
-		for (size_t k=0; k<n_size; k++) {
-		  delta=gsl_vector_get(eval, k);
-		  
-		  for (size_t i=0; i<d_size; i++) {
-		    dl=gsl_vector_get(D_l, i);
-		    for (size_t j=0; j<c_size; j++) {
-		      x=gsl_matrix_get(X, j, k);
-		      d=x/(delta*dl+1.0);
-		      gsl_matrix_set(M_e, j*d_size+i, i, d);
-		      gsl_matrix_set(M_u, j*d_size+i, i, d*dl);
-		    }
-		  }
-		  gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Qi,M_u,0.0,QiM);
-		  gsl_blas_dgemm(CblasTrans,CblasNoTrans,delta,M_u,QiM,1.0,
-				 Sigma_uu);
-		  
-		  gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Qi,M_e,0.0,QiM);
-		  gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,M_e,QiM,1.0,
-				 Sigma_ee);
-		}
-		
-		gsl_matrix_free(M_u);
-		gsl_matrix_free(M_e);
-		gsl_matrix_free(QiM);
-	}
-
-	// Multiply both sides by VehUl.
-	gsl_matrix *M=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Sigma_uu,UltVeh,0.0,M);
-	gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,M,0.0,Sigma_uu);
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Sigma_ee,UltVeh,0.0,M);
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans,1.0,UltVeh,M,0.0,Sigma_ee);
-
-	gsl_matrix_free(M);
-	return;
+void CalcSigma(const char func_name, const gsl_vector *eval,
+               const gsl_vector *D_l, const gsl_matrix *X,
+               const gsl_matrix *OmegaU, const gsl_matrix *OmegaE,
+               const gsl_matrix *UltVeh, const gsl_matrix *Qi,
+               gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+         << "likelihood, 'L' for log-likelihood." << endl;
+    return;
+  }
+
+  size_t n_size = eval->size, c_size = X->size1;
+  size_t d_size = D_l->size, dc_size = Qi->size1;
+
+  gsl_matrix_set_zero(Sigma_uu);
+  gsl_matrix_set_zero(Sigma_ee);
+
+  double delta, dl, x, d;
+
+  // Calculate the first diagonal term.
+  gsl_vector_view Suu_diag = gsl_matrix_diagonal(Sigma_uu);
+  gsl_vector_view See_diag = gsl_matrix_diagonal(Sigma_ee);
+
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_vector_const_view OmegaU_col = gsl_matrix_const_column(OmegaU, k);
+    gsl_vector_const_view OmegaE_col = gsl_matrix_const_column(OmegaE, k);
+
+    gsl_vector_add(&Suu_diag.vector, &OmegaU_col.vector);
+    gsl_vector_add(&See_diag.vector, &OmegaE_col.vector);
+  }
+
+  // Calculate the second term for REML.
+  if (func_name == 'R' || func_name == 'r') {
+    gsl_matrix *M_u = gsl_matrix_alloc(dc_size, d_size);
+    gsl_matrix *M_e = gsl_matrix_alloc(dc_size, d_size);
+    gsl_matrix *QiM = gsl_matrix_alloc(dc_size, d_size);
+
+    gsl_matrix_set_zero(M_u);
+    gsl_matrix_set_zero(M_e);
+
+    for (size_t k = 0; k < n_size; k++) {
+      delta = gsl_vector_get(eval, k);
+
+      for (size_t i = 0; i < d_size; i++) {
+        dl = gsl_vector_get(D_l, i);
+        for (size_t j = 0; j < c_size; j++) {
+          x = gsl_matrix_get(X, j, k);
+          d = x / (delta * dl + 1.0);
+          gsl_matrix_set(M_e, j * d_size + i, i, d);
+          gsl_matrix_set(M_u, j * d_size + i, i, d * dl);
+        }
+      }
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_u, 0.0, QiM);
+      gsl_blas_dgemm(CblasTrans, CblasNoTrans, delta, M_u, QiM, 1.0, Sigma_uu);
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_e, 0.0, QiM);
+      gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, M_e, QiM, 1.0, Sigma_ee);
+    }
+
+    gsl_matrix_free(M_u);
+    gsl_matrix_free(M_e);
+    gsl_matrix_free(QiM);
+  }
+
+  // Multiply both sides by VehUl.
+  gsl_matrix *M = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_uu, UltVeh, 0.0, M);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_uu);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_ee, UltVeh, 0.0, M);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_ee);
+
+  gsl_matrix_free(M);
+  return;
 }
 
 // 'R' for restricted likelihood and 'L' for likelihood.
 // 'R' update B and 'L' don't.
 // only calculate -0.5*\sum_{k=1}^n|H_k|-0.5yPxy.
-double MphCalcLogL (const gsl_vector *eval, const gsl_vector *xHiy, 
-		    const gsl_vector *D_l, const gsl_matrix *UltVehiY, 
-		    const gsl_matrix *Qi) {
-	size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1;
-	double logl=0.0, delta, dl, y, d;
-
-	// Calculate yHiy+log|H_k|.
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get(eval, k);
-		for (size_t i=0; i<d_size; i++) {
-			y=gsl_matrix_get(UltVehiY, i, k);
-			dl=gsl_vector_get(D_l, i);
-			d=delta*dl+1.0;
-
-			logl+=y*y/d+log(d);
-		}
-	}
+double MphCalcLogL(const gsl_vector *eval, const gsl_vector *xHiy,
+                   const gsl_vector *D_l, const gsl_matrix *UltVehiY,
+                   const gsl_matrix *Qi) {
+  size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1;
+  double logl = 0.0, delta, dl, y, d;
+
+  // Calculate yHiy+log|H_k|.
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    for (size_t i = 0; i < d_size; i++) {
+      y = gsl_matrix_get(UltVehiY, i, k);
+      dl = gsl_vector_get(D_l, i);
+      d = delta * dl + 1.0;
+
+      logl += y * y / d + log(d);
+    }
+  }
 
-	// Calculate the rest of yPxy.
-	gsl_vector *Qiv=gsl_vector_alloc(dc_size);
+  // Calculate the rest of yPxy.
+  gsl_vector *Qiv = gsl_vector_alloc(dc_size);
 
-	gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv);
-	gsl_blas_ddot(xHiy, Qiv, &d);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv);
+  gsl_blas_ddot(xHiy, Qiv, &d);
 
-	logl-=d;
+  logl -= d;
 
-	gsl_vector_free(Qiv);
+  gsl_vector_free(Qiv);
 
-	return -0.5*logl;
+  return -0.5 * logl;
 }
 
 // Y is a dxn matrix, X is a cxn matrix, B is a dxc matrix, V_g is a
 // dxd matrix, V_e is a dxd matrix, eval is a size n vector
 //'R' for restricted likelihood and 'L' for likelihood.
-double MphEM (const char func_name, const size_t max_iter, 
-	      const double max_prec, const gsl_vector *eval, 
-	      const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *U_hat, 
-	      gsl_matrix *E_hat, gsl_matrix *OmegaU, gsl_matrix *OmegaE, 
-	      gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX, 
-	      gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, 
-	      gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B) {
-	if (func_name!='R' && func_name!='L' && 
-	    func_name!='r' && func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
-	    "likelihood, 'L' for log-likelihood."<<endl; 
-	  return 0.0;
-	}
-
-	size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
-	size_t dc_size=d_size*c_size;
-
-	gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size);
-	gsl_matrix *XXti=gsl_matrix_alloc (c_size, c_size);
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehiB=gsl_matrix_alloc (d_size, c_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *Sigma_uu=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Sigma_ee=gsl_matrix_alloc (d_size, d_size);
-	gsl_vector *xHiy=gsl_vector_alloc (dc_size);
-	gsl_permutation * pmt=gsl_permutation_alloc (c_size);
-
-	double logl_const=0.0, logl_old=0.0, logl_new=0.0;
-	double logdet_Q, logdet_Ve;
-	int sig;
-
-	// Calculate |XXt| and (XXt)^{-1}.
-	gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
-	for (size_t i=0; i<c_size; ++i) {
-		for (size_t j=0; j<i; ++j) {
-		  gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i));
-		}
-	}
-
-	LUDecomp (XXt, pmt, &sig);
-	LUInvert (XXt, pmt, XXti);
-
-	// Calculate the constant for logl.
-	if (func_name=='R' || func_name=='r') {
-	  logl_const=-0.5*(double)(n_size-c_size)*
-	    (double)d_size*log(2.0*M_PI)+0.5*(double)d_size*LULndet (XXt);
-	} else {
-	  logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI);
-	}
-
-	// Start EM.
-	for (size_t t=0; t<max_iter; t++) {
-	  logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-	  
-	  logdet_Q=CalcQi (eval, D_l, X, Qi);
-	  
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, 
-			 Y, 0.0, UltVehiY);
-	  CalcXHiY(eval, D_l, X, UltVehiY, xHiy);
-
-	  // Calculate log likelihood/restricted likelihood value, and
-	  // terminate if change is small.
-	  logl_new=logl_const+MphCalcLogL (eval, xHiy, D_l, UltVehiY, Qi) - 
-	    0.5*(double)n_size*logdet_Ve;
-	  if (func_name=='R' || func_name=='r') {
-	    logl_new+=-0.5*(logdet_Q-(double)c_size*logdet_Ve);
-	  }
-	  if (t!=0 && abs(logl_new-logl_old)<max_prec) {break;}
-	  logl_old=logl_new;
-	  
-	  CalcOmega (eval, D_l, OmegaU, OmegaE);
-
-	  // Update UltVehiB, UltVehiU.
-	  if (func_name=='R' || func_name=='r') {
-	    UpdateRL_B(xHiy, Qi, UltVehiB);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 
-			   0.0, UltVehiBX);
-	  } else if (t==0) {
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, 
-			   0.0, UltVehiB);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 
-			   0.0, UltVehiBX);
-	  }
-	  
-	  UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU);
-	  
-	  if (func_name=='L' || func_name=='l') {
-
-	    // UltVehiBX is destroyed here.
-	    UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 
-			   0.0, UltVehiBX);
-	  }
-	  
-	  UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE);
-
-	  // Calculate U_hat, E_hat and B.
-	  gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiU,
-			 0.0,U_hat);
-	  gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiE,
-			 0.0,E_hat);
-	  gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiB,0.0,B);
-	  
-	  // Calculate Sigma_uu and Sigma_ee.
-	  CalcSigma (func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, 
-		     Qi, Sigma_uu, Sigma_ee);
-	  
-	  // Update V_g and V_e.
-	  UpdateV (eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
-	}
-
-	gsl_matrix_free(XXt);
-	gsl_matrix_free(XXti);
-	gsl_vector_free(D_l);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_matrix_free(UltVehiB);
-	gsl_matrix_free(Qi);
-	gsl_matrix_free(Sigma_uu);
-	gsl_matrix_free(Sigma_ee);
-	gsl_vector_free(xHiy);
-	gsl_permutation_free(pmt);
-
-	return logl_new;
+double MphEM(const char func_name, const size_t max_iter, const double max_prec,
+             const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y,
+             gsl_matrix *U_hat, gsl_matrix *E_hat, gsl_matrix *OmegaU,
+             gsl_matrix *OmegaE, gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX,
+             gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, gsl_matrix *V_g,
+             gsl_matrix *V_e, gsl_matrix *B) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+         << "likelihood, 'L' for log-likelihood." << endl;
+    return 0.0;
+  }
+
+  size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+  size_t dc_size = d_size * c_size;
+
+  gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size);
+  gsl_matrix *XXti = gsl_matrix_alloc(c_size, c_size);
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehiB = gsl_matrix_alloc(d_size, c_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *Sigma_uu = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Sigma_ee = gsl_matrix_alloc(d_size, d_size);
+  gsl_vector *xHiy = gsl_vector_alloc(dc_size);
+  gsl_permutation *pmt = gsl_permutation_alloc(c_size);
+
+  double logl_const = 0.0, logl_old = 0.0, logl_new = 0.0;
+  double logdet_Q, logdet_Ve;
+  int sig;
+
+  // Calculate |XXt| and (XXt)^{-1}.
+  gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
+  for (size_t i = 0; i < c_size; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i));
+    }
+  }
+
+  LUDecomp(XXt, pmt, &sig);
+  LUInvert(XXt, pmt, XXti);
+
+  // Calculate the constant for logl.
+  if (func_name == 'R' || func_name == 'r') {
+    logl_const =
+        -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) +
+        0.5 * (double)d_size * LULndet(XXt);
+  } else {
+    logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI);
+  }
+
+  // Start EM.
+  for (size_t t = 0; t < max_iter; t++) {
+    logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+    logdet_Q = CalcQi(eval, D_l, X, Qi);
+
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+    CalcXHiY(eval, D_l, X, UltVehiY, xHiy);
+
+    // Calculate log likelihood/restricted likelihood value, and
+    // terminate if change is small.
+    logl_new = logl_const + MphCalcLogL(eval, xHiy, D_l, UltVehiY, Qi) -
+               0.5 * (double)n_size * logdet_Ve;
+    if (func_name == 'R' || func_name == 'r') {
+      logl_new += -0.5 * (logdet_Q - (double)c_size * logdet_Ve);
+    }
+    if (t != 0 && abs(logl_new - logl_old) < max_prec) {
+      break;
+    }
+    logl_old = logl_new;
+
+    CalcOmega(eval, D_l, OmegaU, OmegaE);
+
+    // Update UltVehiB, UltVehiU.
+    if (func_name == 'R' || func_name == 'r') {
+      UpdateRL_B(xHiy, Qi, UltVehiB);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+                     UltVehiBX);
+    } else if (t == 0) {
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, 0.0,
+                     UltVehiB);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+                     UltVehiBX);
+    }
+
+    UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU);
+
+    if (func_name == 'L' || func_name == 'l') {
+
+      // UltVehiBX is destroyed here.
+      UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+                     UltVehiBX);
+    }
+
+    UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE);
+
+    // Calculate U_hat, E_hat and B.
+    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiU, 0.0, U_hat);
+    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiE, 0.0, E_hat);
+    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiB, 0.0, B);
+
+    // Calculate Sigma_uu and Sigma_ee.
+    CalcSigma(func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, Qi, Sigma_uu,
+              Sigma_ee);
+
+    // Update V_g and V_e.
+    UpdateV(eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
+  }
+
+  gsl_matrix_free(XXt);
+  gsl_matrix_free(XXti);
+  gsl_vector_free(D_l);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_matrix_free(UltVehiB);
+  gsl_matrix_free(Qi);
+  gsl_matrix_free(Sigma_uu);
+  gsl_matrix_free(Sigma_ee);
+  gsl_vector_free(xHiy);
+  gsl_permutation_free(pmt);
+
+  return logl_new;
 }
 
 // Calculate p-value, beta (d by 1 vector) and V(beta).
-double MphCalcP (const gsl_vector *eval, const gsl_vector *x_vec, 
-		 const gsl_matrix *W, const gsl_matrix *Y, 
-		 const gsl_matrix *V_g, const gsl_matrix *V_e, 
-		 gsl_matrix *UltVehiY, gsl_vector *beta, gsl_matrix *Vbeta) {
-	size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1;
-	size_t dc_size=d_size*c_size;
-	double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value;
-
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *WHix=gsl_matrix_alloc (dc_size, d_size);
-	gsl_matrix *QiWHix=gsl_matrix_alloc(dc_size, d_size);
-
-	gsl_matrix *xPx=gsl_matrix_alloc (d_size, d_size);
-	gsl_vector *xPy=gsl_vector_alloc (d_size);
-	gsl_vector *WHiy=gsl_vector_alloc (dc_size);
-
-	gsl_matrix_set_zero (xPx);
-	gsl_matrix_set_zero (WHix);
-	gsl_vector_set_zero (xPy);
-	gsl_vector_set_zero (WHiy);
-
-	// Eigen decomposition and calculate log|Ve|.
-	logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
-	// Calculate Qi and log|Q|.
-	logdet_Q=CalcQi (eval, D_l, W, Qi);
-
-	// Calculate UltVehiY.
-	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 
-		       0.0, UltVehiY);
-
-	// Calculate WHix, WHiy, xHiy, xHix.
-	for (size_t i=0; i<d_size; i++) {
-		dl=gsl_vector_get(D_l, i);
-
-		d1=0.0; d2=0.0;
-		for (size_t k=0; k<n_size; k++) {
-			delta=gsl_vector_get(eval, k);
-			dx=gsl_vector_get(x_vec, k);
-			dy=gsl_matrix_get(UltVehiY, i, k);
-
-			d1+=dx*dy/(delta*dl+1.0);
-			d2+=dx*dx/(delta*dl+1.0);
-		}
-		gsl_vector_set (xPy, i, d1);
-		gsl_matrix_set (xPx, i, i, d2);
-
-		for (size_t j=0; j<c_size; j++) {
-			d1=0.0; d2=0.0;
-			for (size_t k=0; k<n_size; k++) {
-				delta=gsl_vector_get(eval, k);
-				dx=gsl_vector_get(x_vec, k);
-				dw=gsl_matrix_get(W, j, k);
-				dy=gsl_matrix_get(UltVehiY, i, k);
-
-				d1+=dx*dw/(delta*dl+1.0);
-				d2+=dy*dw/(delta*dl+1.0);
-			}
-			gsl_matrix_set(WHix, j*d_size+i, i, d1);
-			gsl_vector_set(WHiy, j*d_size+i, d2);
-		}
-	}
-
-	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix);
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx);
-	gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy);
-
-	// Calculate V(beta) and beta.
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (d_size);
-	LUDecomp (xPx, pmt, &sig);
-	LUSolve (xPx, pmt, xPy, D_l);
-	LUInvert (xPx, pmt, Vbeta);
-
-	// Need to multiply UltVehi on both sides or one side.
-	gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta);
-	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, 
-		       0.0, xPx);
-	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
-
-	// Calculate test statistic and p value.
-	gsl_blas_ddot(D_l, xPy, &d);
-
-	p_value=gsl_cdf_chisq_Q (d, (double)d_size);
-
-	gsl_vector_free(D_l);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_matrix_free(Qi);
-	gsl_matrix_free(WHix);
-	gsl_matrix_free(QiWHix);
-
-	gsl_matrix_free(xPx);
-	gsl_vector_free(xPy);
-	gsl_vector_free(WHiy);
-
-	gsl_permutation_free(pmt);
-
-	return p_value;
+double MphCalcP(const gsl_vector *eval, const gsl_vector *x_vec,
+                const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g,
+                const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_vector *beta,
+                gsl_matrix *Vbeta) {
+  size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
+  size_t dc_size = d_size * c_size;
+  double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value;
+
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *WHix = gsl_matrix_alloc(dc_size, d_size);
+  gsl_matrix *QiWHix = gsl_matrix_alloc(dc_size, d_size);
+
+  gsl_matrix *xPx = gsl_matrix_alloc(d_size, d_size);
+  gsl_vector *xPy = gsl_vector_alloc(d_size);
+  gsl_vector *WHiy = gsl_vector_alloc(dc_size);
+
+  gsl_matrix_set_zero(xPx);
+  gsl_matrix_set_zero(WHix);
+  gsl_vector_set_zero(xPy);
+  gsl_vector_set_zero(WHiy);
+
+  // Eigen decomposition and calculate log|Ve|.
+  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+  // Calculate Qi and log|Q|.
+  logdet_Q = CalcQi(eval, D_l, W, Qi);
+
+  // Calculate UltVehiY.
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+  // Calculate WHix, WHiy, xHiy, xHix.
+  for (size_t i = 0; i < d_size; i++) {
+    dl = gsl_vector_get(D_l, i);
+
+    d1 = 0.0;
+    d2 = 0.0;
+    for (size_t k = 0; k < n_size; k++) {
+      delta = gsl_vector_get(eval, k);
+      dx = gsl_vector_get(x_vec, k);
+      dy = gsl_matrix_get(UltVehiY, i, k);
+
+      d1 += dx * dy / (delta * dl + 1.0);
+      d2 += dx * dx / (delta * dl + 1.0);
+    }
+    gsl_vector_set(xPy, i, d1);
+    gsl_matrix_set(xPx, i, i, d2);
+
+    for (size_t j = 0; j < c_size; j++) {
+      d1 = 0.0;
+      d2 = 0.0;
+      for (size_t k = 0; k < n_size; k++) {
+        delta = gsl_vector_get(eval, k);
+        dx = gsl_vector_get(x_vec, k);
+        dw = gsl_matrix_get(W, j, k);
+        dy = gsl_matrix_get(UltVehiY, i, k);
+
+        d1 += dx * dw / (delta * dl + 1.0);
+        d2 += dy * dw / (delta * dl + 1.0);
+      }
+      gsl_matrix_set(WHix, j * d_size + i, i, d1);
+      gsl_vector_set(WHiy, j * d_size + i, d2);
+    }
+  }
+
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx);
+  gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy);
+
+  // Calculate V(beta) and beta.
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(d_size);
+  LUDecomp(xPx, pmt, &sig);
+  LUSolve(xPx, pmt, xPy, D_l);
+  LUInvert(xPx, pmt, Vbeta);
+
+  // Need to multiply UltVehi on both sides or one side.
+  gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, 0.0, xPx);
+  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
+
+  // Calculate test statistic and p value.
+  gsl_blas_ddot(D_l, xPy, &d);
+
+  p_value = gsl_cdf_chisq_Q(d, (double)d_size);
+
+  gsl_vector_free(D_l);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_matrix_free(Qi);
+  gsl_matrix_free(WHix);
+  gsl_matrix_free(QiWHix);
+
+  gsl_matrix_free(xPx);
+  gsl_vector_free(xPy);
+  gsl_vector_free(WHiy);
+
+  gsl_permutation_free(pmt);
+
+  return p_value;
 }
 
 // Calculate B and its standard error (which is a matrix of the same
 // dimension as B).
-void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W, 
-		  const gsl_matrix *Y, const gsl_matrix *V_g, 
-		  const gsl_matrix *V_e, gsl_matrix *UltVehiY, 
-		  gsl_matrix *B, gsl_matrix *se_B) {
-	size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1;
-	size_t dc_size=d_size*c_size;
-	double delta, dl, d, dy, dw, logdet_Ve, logdet_Q;
-
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *Qi_temp=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_vector *WHiy=gsl_vector_alloc (dc_size);
-	gsl_vector *QiWHiy=gsl_vector_alloc (dc_size);
-	gsl_vector *beta=gsl_vector_alloc (dc_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (dc_size, dc_size);
-
-	gsl_vector_set_zero (WHiy);
-
-	// Eigen decomposition and calculate log|Ve|.
-	logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
-	// Calculate Qi and log|Q|.
-	logdet_Q=CalcQi (eval, D_l, W, Qi);
-
-	// Calculate UltVehiY.
-	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 
-		       0.0, UltVehiY);
-
-	// Calculate WHiy.
-	for (size_t i=0; i<d_size; i++) {
-		dl=gsl_vector_get(D_l, i);
-
-		for (size_t j=0; j<c_size; j++) {
-			d=0.0;
-			for (size_t k=0; k<n_size; k++) {
-				delta=gsl_vector_get(eval, k);
-				dw=gsl_matrix_get(W, j, k);
-				dy=gsl_matrix_get(UltVehiY, i, k);
-
-				d+=dy*dw/(delta*dl+1.0);
-			}
-			gsl_vector_set(WHiy, j*d_size+i, d);
-		}
-	}
-
-	gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy);
-
-	// Need to multiply I_c\otimes UltVehi on both sides or one side.
-	for (size_t i=0; i<c_size; i++) {
-	  gsl_vector_view QiWHiy_sub=
-	    gsl_vector_subvector(QiWHiy, i*d_size, d_size);
-	  gsl_vector_view beta_sub=gsl_vector_subvector(beta,i*d_size,d_size);
-	  gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0, 
-			 &beta_sub.vector);
-	  
-	  for (size_t j=0; j<c_size; j++) {
-	    gsl_matrix_view Qi_sub=
-	      gsl_matrix_submatrix (Qi, i*d_size, j*d_size, d_size, d_size);
-	    gsl_matrix_view Qitemp_sub=
-	      gsl_matrix_submatrix (Qi_temp,i*d_size,j*d_size,d_size,d_size);
-	    gsl_matrix_view Vbeta_sub=
-	      gsl_matrix_submatrix (Vbeta, i*d_size, j*d_size, d_size, d_size);
-	    
-	    if (j<i) {
-	      gsl_matrix_view Vbeta_sym=
-		gsl_matrix_submatrix(Vbeta,j*d_size,i*d_size,d_size,d_size);
-	      gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix,&Vbeta_sym.matrix);
-	    } else {
-	      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, 
-			     UltVeh, 0.0, &Qitemp_sub.matrix);
-	      gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, 
-			     &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix);
-	    }
-	  }
-	}
-
-	// Copy beta to B, and Vbeta to se_B.
-	for (size_t j=0; j<B->size2; j++) {
-	  for (size_t i=0; i<B->size1; i++) {
-	    gsl_matrix_set(B, i, j, gsl_vector_get(beta, j*d_size+i));
-	    gsl_matrix_set(se_B, i, j, 
-			   sqrt(gsl_matrix_get(Vbeta,j*d_size+i,j*d_size+i)));
-	  }
-	}
-	
-	// Free matrices.
-	gsl_vector_free(D_l);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_matrix_free(Qi);
-	gsl_matrix_free(Qi_temp);
-	gsl_vector_free(WHiy);
-	gsl_vector_free(QiWHiy);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	return;
+void MphCalcBeta(const gsl_vector *eval, const gsl_matrix *W,
+                 const gsl_matrix *Y, const gsl_matrix *V_g,
+                 const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_matrix *B,
+                 gsl_matrix *se_B) {
+  size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
+  size_t dc_size = d_size * c_size;
+  double delta, dl, d, dy, dw, logdet_Ve, logdet_Q;
+
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *Qi_temp = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_vector *WHiy = gsl_vector_alloc(dc_size);
+  gsl_vector *QiWHiy = gsl_vector_alloc(dc_size);
+  gsl_vector *beta = gsl_vector_alloc(dc_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(dc_size, dc_size);
+
+  gsl_vector_set_zero(WHiy);
+
+  // Eigen decomposition and calculate log|Ve|.
+  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+  // Calculate Qi and log|Q|.
+  logdet_Q = CalcQi(eval, D_l, W, Qi);
+
+  // Calculate UltVehiY.
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+  // Calculate WHiy.
+  for (size_t i = 0; i < d_size; i++) {
+    dl = gsl_vector_get(D_l, i);
+
+    for (size_t j = 0; j < c_size; j++) {
+      d = 0.0;
+      for (size_t k = 0; k < n_size; k++) {
+        delta = gsl_vector_get(eval, k);
+        dw = gsl_matrix_get(W, j, k);
+        dy = gsl_matrix_get(UltVehiY, i, k);
+
+        d += dy * dw / (delta * dl + 1.0);
+      }
+      gsl_vector_set(WHiy, j * d_size + i, d);
+    }
+  }
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy);
+
+  // Need to multiply I_c\otimes UltVehi on both sides or one side.
+  for (size_t i = 0; i < c_size; i++) {
+    gsl_vector_view QiWHiy_sub =
+        gsl_vector_subvector(QiWHiy, i * d_size, d_size);
+    gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size);
+    gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0,
+                   &beta_sub.vector);
+
+    for (size_t j = 0; j < c_size; j++) {
+      gsl_matrix_view Qi_sub =
+          gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size);
+      gsl_matrix_view Qitemp_sub =
+          gsl_matrix_submatrix(Qi_temp, i * d_size, j * d_size, d_size, d_size);
+      gsl_matrix_view Vbeta_sub =
+          gsl_matrix_submatrix(Vbeta, i * d_size, j * d_size, d_size, d_size);
+
+      if (j < i) {
+        gsl_matrix_view Vbeta_sym =
+            gsl_matrix_submatrix(Vbeta, j * d_size, i * d_size, d_size, d_size);
+        gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix, &Vbeta_sym.matrix);
+      } else {
+        gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh,
+                       0.0, &Qitemp_sub.matrix);
+        gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh,
+                       &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix);
+      }
+    }
+  }
+
+  // Copy beta to B, and Vbeta to se_B.
+  for (size_t j = 0; j < B->size2; j++) {
+    for (size_t i = 0; i < B->size1; i++) {
+      gsl_matrix_set(B, i, j, gsl_vector_get(beta, j * d_size + i));
+      gsl_matrix_set(se_B, i, j, sqrt(gsl_matrix_get(Vbeta, j * d_size + i,
+                                                     j * d_size + i)));
+    }
+  }
+
+  // Free matrices.
+  gsl_vector_free(D_l);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_matrix_free(Qi);
+  gsl_matrix_free(Qi_temp);
+  gsl_vector_free(WHiy);
+  gsl_vector_free(QiWHiy);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  return;
 }
 
 // Below are functions for Newton-Raphson's algorithm.
@@ -912,996 +931,962 @@ void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W,
 // Calculate all Hi and return logdet_H=\sum_{k=1}^{n}log|H_k|
 // and calculate Qi and return logdet_Q
 // and calculate yPy.
-void CalcHiQi (const gsl_vector *eval, const gsl_matrix *X,
-	       const gsl_matrix *V_g, const gsl_matrix *V_e,
-	       gsl_matrix *Hi_all, gsl_matrix *Qi, double &logdet_H,
-	       double &logdet_Q) {
-	gsl_matrix_set_zero (Hi_all);
-	gsl_matrix_set_zero (Qi);
-	logdet_H=0.0; logdet_Q=0.0;
-
-	size_t n_size=eval->size, c_size=X->size1, d_size=V_g->size1;
-	double logdet_Ve=0.0, delta, dl, d;
-
-	gsl_matrix *mat_dd=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-
-	// Calculate D_l, UltVeh and UltVehi.
-	logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
-	// Calculate each Hi and log|H_k|.
-	logdet_H=(double)n_size*logdet_Ve;
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-
-		gsl_matrix_memcpy (mat_dd, UltVehi);
-		for (size_t i=0; i<d_size; i++) {
-			dl=gsl_vector_get(D_l, i);
-			d=delta*dl+1.0;
-
-			gsl_vector_view mat_row=gsl_matrix_row (mat_dd, i);
-			gsl_vector_scale (&mat_row.vector, 1.0/d);
-
-			logdet_H+=log(d);
-		}
-
-		gsl_matrix_view Hi_k=
-		  gsl_matrix_submatrix(Hi_all, 0, k*d_size, d_size, d_size);
-		gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi,
-			       mat_dd, 0.0, &Hi_k.matrix);
-	}
-
-	// Calculate Qi, and multiply I\o times UtVeh on both side and
-	// calculate logdet_Q, don't forget to substract
-	// c_size*logdet_Ve.
-	logdet_Q=CalcQi (eval, D_l, X, Qi)-(double)c_size*logdet_Ve;
-
-	for (size_t i=0; i<c_size; i++) {
-		for (size_t j=0; j<c_size; j++) {
-		  gsl_matrix_view Qi_sub=
-		    gsl_matrix_submatrix(Qi,i*d_size,j*d_size,d_size,d_size);
-		  if (j<i) {
-		    gsl_matrix_view Qi_sym=
-		      gsl_matrix_submatrix(Qi,j*d_size,i*d_size,d_size,d_size);
-		    gsl_matrix_transpose_memcpy(&Qi_sub.matrix,&Qi_sym.matrix);
-		  } else {
-		    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-				   &Qi_sub.matrix, UltVeh, 0.0, mat_dd);
-		    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh,
-				   mat_dd, 0.0, &Qi_sub.matrix);
-		  }
-		}
-	}
-
-	// Free memory.
-	gsl_matrix_free(mat_dd);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_vector_free(D_l);
-
-	return;
+void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X,
+              const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *Hi_all,
+              gsl_matrix *Qi, double &logdet_H, double &logdet_Q) {
+  gsl_matrix_set_zero(Hi_all);
+  gsl_matrix_set_zero(Qi);
+  logdet_H = 0.0;
+  logdet_Q = 0.0;
+
+  size_t n_size = eval->size, c_size = X->size1, d_size = V_g->size1;
+  double logdet_Ve = 0.0, delta, dl, d;
+
+  gsl_matrix *mat_dd = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+
+  // Calculate D_l, UltVeh and UltVehi.
+  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+  // Calculate each Hi and log|H_k|.
+  logdet_H = (double)n_size * logdet_Ve;
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    gsl_matrix_memcpy(mat_dd, UltVehi);
+    for (size_t i = 0; i < d_size; i++) {
+      dl = gsl_vector_get(D_l, i);
+      d = delta * dl + 1.0;
+
+      gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i);
+      gsl_vector_scale(&mat_row.vector, 1.0 / d);
+
+      logdet_H += log(d);
+    }
+
+    gsl_matrix_view Hi_k =
+        gsl_matrix_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
+    gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi, mat_dd, 0.0,
+                   &Hi_k.matrix);
+  }
+
+  // Calculate Qi, and multiply I\o times UtVeh on both side and
+  // calculate logdet_Q, don't forget to substract
+  // c_size*logdet_Ve.
+  logdet_Q = CalcQi(eval, D_l, X, Qi) - (double)c_size * logdet_Ve;
+
+  for (size_t i = 0; i < c_size; i++) {
+    for (size_t j = 0; j < c_size; j++) {
+      gsl_matrix_view Qi_sub =
+          gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size);
+      if (j < i) {
+        gsl_matrix_view Qi_sym =
+            gsl_matrix_submatrix(Qi, j * d_size, i * d_size, d_size, d_size);
+        gsl_matrix_transpose_memcpy(&Qi_sub.matrix, &Qi_sym.matrix);
+      } else {
+        gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh,
+                       0.0, mat_dd);
+        gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, mat_dd, 0.0,
+                       &Qi_sub.matrix);
+      }
+    }
+  }
+
+  // Free memory.
+  gsl_matrix_free(mat_dd);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_vector_free(D_l);
+
+  return;
 }
 
 // Calculate all Hiy.
-void Calc_Hiy_all (const gsl_matrix *Y, const gsl_matrix *Hi_all,
-		   gsl_matrix *Hiy_all) {
-	gsl_matrix_set_zero (Hiy_all);
+void Calc_Hiy_all(const gsl_matrix *Y, const gsl_matrix *Hi_all,
+                  gsl_matrix *Hiy_all) {
+  gsl_matrix_set_zero(Hiy_all);
 
-	size_t n_size=Y->size2, d_size=Y->size1;
+  size_t n_size = Y->size2, d_size = Y->size1;
 
-	for (size_t k=0; k<n_size; k++) {
-		gsl_matrix_const_view Hi_k=
-		  gsl_matrix_const_submatrix(Hi_all,0,k*d_size,d_size,d_size);
-		gsl_vector_const_view y_k=gsl_matrix_const_column(Y,k);
-		gsl_vector_view Hiy_k=gsl_matrix_column(Hiy_all, k);
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_matrix_const_view Hi_k =
+        gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
+    gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
+    gsl_vector_view Hiy_k = gsl_matrix_column(Hiy_all, k);
 
-		gsl_blas_dgemv(CblasNoTrans,1.0,&Hi_k.matrix,&y_k.vector,
-			       0.0,&Hiy_k.vector);
-	}
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Hi_k.matrix, &y_k.vector, 0.0,
+                   &Hiy_k.vector);
+  }
 
-	return;
+  return;
 }
 
 // Calculate all xHi.
-void Calc_xHi_all (const gsl_matrix *X, const gsl_matrix *Hi_all,
-		   gsl_matrix *xHi_all) {
-	gsl_matrix_set_zero (xHi_all);
+void Calc_xHi_all(const gsl_matrix *X, const gsl_matrix *Hi_all,
+                  gsl_matrix *xHi_all) {
+  gsl_matrix_set_zero(xHi_all);
 
-	size_t n_size=X->size2, c_size=X->size1, d_size=Hi_all->size1;
+  size_t n_size = X->size2, c_size = X->size1, d_size = Hi_all->size1;
 
-	double d;
+  double d;
 
-	for (size_t k=0; k<n_size; k++) {
-		gsl_matrix_const_view Hi_k=
-		  gsl_matrix_const_submatrix(Hi_all,0,k*d_size,d_size,d_size);
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_matrix_const_view Hi_k =
+        gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
 
-		for (size_t i=0; i<c_size; i++) {
-			d=gsl_matrix_get (X, i, k);
-			gsl_matrix_view xHi_sub=
-			  gsl_matrix_submatrix(xHi_all,i*d_size,k*d_size,
-					       d_size,d_size);
-			gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix);
-			gsl_matrix_scale(&xHi_sub.matrix, d);
-		}
-	}
+    for (size_t i = 0; i < c_size; i++) {
+      d = gsl_matrix_get(X, i, k);
+      gsl_matrix_view xHi_sub =
+          gsl_matrix_submatrix(xHi_all, i * d_size, k * d_size, d_size, d_size);
+      gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix);
+      gsl_matrix_scale(&xHi_sub.matrix, d);
+    }
+  }
 
-	return;
+  return;
 }
 
 // Calculate scalar yHiy.
-double Calc_yHiy (const gsl_matrix *Y, const gsl_matrix *Hiy_all) {
-	double yHiy=0.0, d;
-	size_t n_size=Y->size2;
+double Calc_yHiy(const gsl_matrix *Y, const gsl_matrix *Hiy_all) {
+  double yHiy = 0.0, d;
+  size_t n_size = Y->size2;
 
-	for (size_t k=0; k<n_size; k++) {
-	  gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
-	  gsl_vector_const_view Hiy_k=gsl_matrix_const_column(Hiy_all, k);
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
+    gsl_vector_const_view Hiy_k = gsl_matrix_const_column(Hiy_all, k);
 
-	  gsl_blas_ddot (&Hiy_k.vector, &y_k.vector, &d);
-	  yHiy+=d;
-	}
+    gsl_blas_ddot(&Hiy_k.vector, &y_k.vector, &d);
+    yHiy += d;
+  }
 
-	return yHiy;
+  return yHiy;
 }
 
 // Calculate the vector xHiy.
-void Calc_xHiy (const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) {
-	gsl_vector_set_zero (xHiy);
+void Calc_xHiy(const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) {
+  gsl_vector_set_zero(xHiy);
 
-	size_t n_size=Y->size2, d_size=Y->size1, dc_size=xHi->size1;
+  size_t n_size = Y->size2, d_size = Y->size1, dc_size = xHi->size1;
 
-	for (size_t k=0; k<n_size; k++) {
-		gsl_matrix_const_view xHi_k=
-		  gsl_matrix_const_submatrix(xHi,0,k*d_size,dc_size,d_size);
-		gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
+  for (size_t k = 0; k < n_size; k++) {
+    gsl_matrix_const_view xHi_k =
+        gsl_matrix_const_submatrix(xHi, 0, k * d_size, dc_size, d_size);
+    gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
 
-		gsl_blas_dgemv(CblasNoTrans,1.0,&xHi_k.matrix,&y_k.vector,
-			       1.0,xHiy);
-	}
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &xHi_k.matrix, &y_k.vector, 1.0, xHiy);
+  }
 
-	return;
+  return;
 }
 
 // 0<=i,j<d_size
-size_t GetIndex (const size_t i, const size_t j, const size_t d_size) {
-	if (i>=d_size || j>=d_size) {
-	  cout<<"error in GetIndex."<<endl;
-	  return 0;
-	}
-
-	size_t s, l;
-	if (j<i) {
-	  s=j;
-	  l=i;
-	} else {
-	  s=i;
-	  l=j;
-	}
-
-	return (2*d_size-s+1)*s/2+l-s;
+size_t GetIndex(const size_t i, const size_t j, const size_t d_size) {
+  if (i >= d_size || j >= d_size) {
+    cout << "error in GetIndex." << endl;
+    return 0;
+  }
+
+  size_t s, l;
+  if (j < i) {
+    s = j;
+    l = i;
+  } else {
+    s = i;
+    l = j;
+  }
+
+  return (2 * d_size - s + 1) * s / 2 + l - s;
 }
 
-void Calc_yHiDHiy (const gsl_vector *eval, const gsl_matrix *Hiy,
-		   const size_t i, const size_t j, double &yHiDHiy_g,
-		   double &yHiDHiy_e) {
-	yHiDHiy_g=0.0;
-	yHiDHiy_e=0.0;
+void Calc_yHiDHiy(const gsl_vector *eval, const gsl_matrix *Hiy, const size_t i,
+                  const size_t j, double &yHiDHiy_g, double &yHiDHiy_e) {
+  yHiDHiy_g = 0.0;
+  yHiDHiy_e = 0.0;
 
-	size_t n_size=eval->size;
+  size_t n_size = eval->size;
 
-	double delta, d1, d2;
+  double delta, d1, d2;
 
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-		d1=gsl_matrix_get (Hiy, i, k);
-		d2=gsl_matrix_get (Hiy, j, k);
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    d1 = gsl_matrix_get(Hiy, i, k);
+    d2 = gsl_matrix_get(Hiy, j, k);
 
-		if (i==j) {
-			yHiDHiy_g+=delta*d1*d2;
-			yHiDHiy_e+=d1*d2;
-		} else {
-			yHiDHiy_g+=delta*d1*d2*2.0;
-			yHiDHiy_e+=d1*d2*2.0;
-		}
-	}
+    if (i == j) {
+      yHiDHiy_g += delta * d1 * d2;
+      yHiDHiy_e += d1 * d2;
+    } else {
+      yHiDHiy_g += delta * d1 * d2 * 2.0;
+      yHiDHiy_e += d1 * d2 * 2.0;
+    }
+  }
 
-	return;
+  return;
 }
 
-void Calc_xHiDHiy (const gsl_vector *eval, const gsl_matrix *xHi,
-		   const gsl_matrix *Hiy, const size_t i, const size_t j,
-		   gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) {
-	gsl_vector_set_zero(xHiDHiy_g);
-	gsl_vector_set_zero(xHiDHiy_e);
+void Calc_xHiDHiy(const gsl_vector *eval, const gsl_matrix *xHi,
+                  const gsl_matrix *Hiy, const size_t i, const size_t j,
+                  gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) {
+  gsl_vector_set_zero(xHiDHiy_g);
+  gsl_vector_set_zero(xHiDHiy_e);
+
+  size_t n_size = eval->size, d_size = Hiy->size1;
 
-	size_t n_size=eval->size, d_size=Hiy->size1;
+  double delta, d;
 
-	double delta, d;
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
 
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
+    gsl_vector_const_view xHi_col_i =
+        gsl_matrix_const_column(xHi, k * d_size + i);
+    d = gsl_matrix_get(Hiy, j, k);
 
-		gsl_vector_const_view xHi_col_i=
-		  gsl_matrix_const_column (xHi, k*d_size+i);
-		d=gsl_matrix_get (Hiy, j, k);
+    gsl_blas_daxpy(d * delta, &xHi_col_i.vector, xHiDHiy_g);
+    gsl_blas_daxpy(d, &xHi_col_i.vector, xHiDHiy_e);
 
-		gsl_blas_daxpy (d*delta, &xHi_col_i.vector, xHiDHiy_g);
-		gsl_blas_daxpy (d, &xHi_col_i.vector, xHiDHiy_e);
+    if (i != j) {
+      gsl_vector_const_view xHi_col_j =
+          gsl_matrix_const_column(xHi, k * d_size + j);
+      d = gsl_matrix_get(Hiy, i, k);
 
-		if (i!=j) {
-		  gsl_vector_const_view xHi_col_j=
-		    gsl_matrix_const_column (xHi, k*d_size+j);
-		  d=gsl_matrix_get (Hiy, i, k);
-		  
-		  gsl_blas_daxpy (d*delta, &xHi_col_j.vector, xHiDHiy_g);
-		  gsl_blas_daxpy (d, &xHi_col_j.vector, xHiDHiy_e);
-		}
-	}
+      gsl_blas_daxpy(d * delta, &xHi_col_j.vector, xHiDHiy_g);
+      gsl_blas_daxpy(d, &xHi_col_j.vector, xHiDHiy_e);
+    }
+  }
 
-	return;
+  return;
 }
 
-void Calc_xHiDHix (const gsl_vector *eval, const gsl_matrix *xHi,
-		   const size_t i, const size_t j, gsl_matrix *xHiDHix_g,
-		   gsl_matrix *xHiDHix_e) {
-	gsl_matrix_set_zero(xHiDHix_g);
-	gsl_matrix_set_zero(xHiDHix_e);
+void Calc_xHiDHix(const gsl_vector *eval, const gsl_matrix *xHi, const size_t i,
+                  const size_t j, gsl_matrix *xHiDHix_g,
+                  gsl_matrix *xHiDHix_e) {
+  gsl_matrix_set_zero(xHiDHix_g);
+  gsl_matrix_set_zero(xHiDHix_e);
 
-	size_t n_size=eval->size, dc_size=xHi->size1;
-	size_t d_size=xHi->size2/n_size;
+  size_t n_size = eval->size, dc_size = xHi->size1;
+  size_t d_size = xHi->size2 / n_size;
 
-	double delta;
+  double delta;
 
-	gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *mat_dcdc_t=gsl_matrix_alloc (dc_size, dc_size);
+  gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *mat_dcdc_t = gsl_matrix_alloc(dc_size, dc_size);
 
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
 
-		gsl_vector_const_view xHi_col_i=
-		  gsl_matrix_const_column (xHi, k*d_size+i);
-		gsl_vector_const_view xHi_col_j=
-		  gsl_matrix_const_column (xHi, k*d_size+j);
+    gsl_vector_const_view xHi_col_i =
+        gsl_matrix_const_column(xHi, k * d_size + i);
+    gsl_vector_const_view xHi_col_j =
+        gsl_matrix_const_column(xHi, k * d_size + j);
 
-		gsl_matrix_set_zero (mat_dcdc);
-		gsl_blas_dger(1.0,&xHi_col_i.vector,&xHi_col_j.vector,
-			      mat_dcdc);
+    gsl_matrix_set_zero(mat_dcdc);
+    gsl_blas_dger(1.0, &xHi_col_i.vector, &xHi_col_j.vector, mat_dcdc);
 
-		gsl_matrix_transpose_memcpy (mat_dcdc_t, mat_dcdc);
+    gsl_matrix_transpose_memcpy(mat_dcdc_t, mat_dcdc);
 
-		gsl_matrix_add (xHiDHix_e, mat_dcdc);
+    gsl_matrix_add(xHiDHix_e, mat_dcdc);
 
-		gsl_matrix_scale (mat_dcdc, delta);
-		gsl_matrix_add (xHiDHix_g, mat_dcdc);
+    gsl_matrix_scale(mat_dcdc, delta);
+    gsl_matrix_add(xHiDHix_g, mat_dcdc);
 
-		if (i!=j) {
-			gsl_matrix_add (xHiDHix_e, mat_dcdc_t);
+    if (i != j) {
+      gsl_matrix_add(xHiDHix_e, mat_dcdc_t);
 
-			gsl_matrix_scale (mat_dcdc_t, delta);
-			gsl_matrix_add (xHiDHix_g, mat_dcdc_t);
-		}
-	}
+      gsl_matrix_scale(mat_dcdc_t, delta);
+      gsl_matrix_add(xHiDHix_g, mat_dcdc_t);
+    }
+  }
 
-	gsl_matrix_free(mat_dcdc);
-	gsl_matrix_free(mat_dcdc_t);
+  gsl_matrix_free(mat_dcdc);
+  gsl_matrix_free(mat_dcdc_t);
 
-	return;
+  return;
 }
 
-void Calc_yHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi,
-		      const gsl_matrix *Hiy, const size_t i1,
-		      const size_t j1, const size_t i2, const size_t j2,
-		      double &yHiDHiDHiy_gg, double &yHiDHiDHiy_ee,
-		      double &yHiDHiDHiy_ge) {
-	yHiDHiDHiy_gg=0.0;
-	yHiDHiDHiy_ee=0.0;
-	yHiDHiDHiy_ge=0.0;
-
-	size_t n_size=eval->size, d_size=Hiy->size1;
-
-	double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2;
-	double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-
-		d_Hiy_i1=gsl_matrix_get (Hiy, i1, k);
-		d_Hiy_j1=gsl_matrix_get (Hiy, j1, k);
-		d_Hiy_i2=gsl_matrix_get (Hiy, i2, k);
-		d_Hiy_j2=gsl_matrix_get (Hiy, j2, k);
-
-		d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
-		d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
-		d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
-		d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
-		if (i1==j1) {
-		  yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
-		  yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
-		  yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
-		  
-		  if (i2!=j2) {
-		    yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
-		    yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
-		    yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
-		  }
-		} else {
-		  yHiDHiDHiy_gg+=
-		    delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2 +
-				 d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
-		  yHiDHiDHiy_ee+=
-		    (d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2+d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
-		  yHiDHiDHiy_ge+=
-		    delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2 +
-			   d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
-		  
-		  if (i2!=j2) {
-		    yHiDHiDHiy_gg+=
-		      delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
-				   d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
-		    yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
-				    d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
-		    yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
-					  d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
-		  }
-		}
-	}
-	
-	return;
+void Calc_yHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi,
+                     const gsl_matrix *Hiy, const size_t i1, const size_t j1,
+                     const size_t i2, const size_t j2, double &yHiDHiDHiy_gg,
+                     double &yHiDHiDHiy_ee, double &yHiDHiDHiy_ge) {
+  yHiDHiDHiy_gg = 0.0;
+  yHiDHiDHiy_ee = 0.0;
+  yHiDHiDHiy_ge = 0.0;
+
+  size_t n_size = eval->size, d_size = Hiy->size1;
+
+  double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2;
+  double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    d_Hiy_i1 = gsl_matrix_get(Hiy, i1, k);
+    d_Hiy_j1 = gsl_matrix_get(Hiy, j1, k);
+    d_Hiy_i2 = gsl_matrix_get(Hiy, i2, k);
+    d_Hiy_j2 = gsl_matrix_get(Hiy, j2, k);
+
+    d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+    d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+    d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+    d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+    if (i1 == j1) {
+      yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+      yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+      yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+
+      if (i2 != j2) {
+        yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+        yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+        yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+      }
+    } else {
+      yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 +
+                                        d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+      yHiDHiDHiy_ee +=
+          (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 + d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+      yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 +
+                                d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+
+      if (i2 != j2) {
+        yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 +
+                                          d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+        yHiDHiDHiy_ee +=
+            (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 + d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+        yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 +
+                                  d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+      }
+    }
+  }
+
+  return;
 }
 
-void Calc_xHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi,
-		      const gsl_matrix *xHi, const gsl_matrix *Hiy,
-		      const size_t i1, const size_t j1, const size_t i2,
-		      const size_t j2, gsl_vector *xHiDHiDHiy_gg,
-		      gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) {
-	gsl_vector_set_zero(xHiDHiDHiy_gg);
-	gsl_vector_set_zero(xHiDHiDHiy_ee);
-	gsl_vector_set_zero(xHiDHiDHiy_ge);
-
-	size_t n_size=eval->size, d_size=Hiy->size1;
-
-	double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2;
-	double d_Hi_j1i2, d_Hi_j1j2;
-
-	for (size_t k=0; k<n_size; k++) {
-	  delta=gsl_vector_get (eval, k);
-	  
-	  gsl_vector_const_view xHi_col_i=
-	    gsl_matrix_const_column (xHi, k*d_size+i1);
-	  gsl_vector_const_view xHi_col_j=
-	    gsl_matrix_const_column (xHi, k*d_size+j1);
-	  
-	  d_Hiy_i=gsl_matrix_get (Hiy, i2, k);
-	  d_Hiy_j=gsl_matrix_get (Hiy, j2, k);
-	  
-	  d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
-	  d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
-	  d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
-	  d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-	  
-	  if (i1==j1) {
-	    gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_gg);
-	    gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_ee);
-	    gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_ge);
-		  
-	    if (i2!=j2) {
-	      gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i,
-			      &xHi_col_i.vector, xHiDHiDHiy_gg);
-	      gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
-			      xHiDHiDHiy_ee);
-	      gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
-			      xHiDHiDHiy_ge);
-	    }
-	  } else {
-	    gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_gg);
-	    gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_ee);
-	    gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
-			    xHiDHiDHiy_ge);
-	    
-	    gsl_blas_daxpy (delta*delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
-			    xHiDHiDHiy_gg);
-	    gsl_blas_daxpy (d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
-			    xHiDHiDHiy_ee);
-	    gsl_blas_daxpy (delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
-			    xHiDHiDHiy_ge);
-	    
-	    if (i2!=j2) {
-	      gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i,
-			      &xHi_col_i.vector, xHiDHiDHiy_gg);
-	      gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
-			      xHiDHiDHiy_ee);
-	      gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
-			      xHiDHiDHiy_ge);
-	      
-	      gsl_blas_daxpy (delta*delta*d_Hi_i1j2*d_Hiy_i,
-			      &xHi_col_j.vector, xHiDHiDHiy_gg);
-	      gsl_blas_daxpy (d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector,
-			      xHiDHiDHiy_ee);
-	      gsl_blas_daxpy (delta*d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector,
-			      xHiDHiDHiy_ge);
-	    }
-	  }
-	}
-	
-	return;
+void Calc_xHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi,
+                     const gsl_matrix *xHi, const gsl_matrix *Hiy,
+                     const size_t i1, const size_t j1, const size_t i2,
+                     const size_t j2, gsl_vector *xHiDHiDHiy_gg,
+                     gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) {
+  gsl_vector_set_zero(xHiDHiDHiy_gg);
+  gsl_vector_set_zero(xHiDHiDHiy_ee);
+  gsl_vector_set_zero(xHiDHiDHiy_ge);
+
+  size_t n_size = eval->size, d_size = Hiy->size1;
+
+  double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2;
+  double d_Hi_j1i2, d_Hi_j1j2;
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    gsl_vector_const_view xHi_col_i =
+        gsl_matrix_const_column(xHi, k * d_size + i1);
+    gsl_vector_const_view xHi_col_j =
+        gsl_matrix_const_column(xHi, k * d_size + j1);
+
+    d_Hiy_i = gsl_matrix_get(Hiy, i2, k);
+    d_Hiy_j = gsl_matrix_get(Hiy, j2, k);
+
+    d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+    d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+    d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+    d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+    if (i1 == j1) {
+      gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+                     xHiDHiDHiy_gg);
+      gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
+      gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+                     xHiDHiDHiy_ge);
+
+      if (i2 != j2) {
+        gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+                       xHiDHiDHiy_gg);
+        gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
+        gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+                       xHiDHiDHiy_ge);
+      }
+    } else {
+      gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+                     xHiDHiDHiy_gg);
+      gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
+      gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+                     xHiDHiDHiy_ge);
+
+      gsl_blas_daxpy(delta * delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector,
+                     xHiDHiDHiy_gg);
+      gsl_blas_daxpy(d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector, xHiDHiDHiy_ee);
+      gsl_blas_daxpy(delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector,
+                     xHiDHiDHiy_ge);
+
+      if (i2 != j2) {
+        gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+                       xHiDHiDHiy_gg);
+        gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
+        gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+                       xHiDHiDHiy_ge);
+
+        gsl_blas_daxpy(delta * delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector,
+                       xHiDHiDHiy_gg);
+        gsl_blas_daxpy(d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector, xHiDHiDHiy_ee);
+        gsl_blas_daxpy(delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector,
+                       xHiDHiDHiy_ge);
+      }
+    }
+  }
+
+  return;
 }
 
+void Calc_xHiDHiDHix(const gsl_vector *eval, const gsl_matrix *Hi,
+                     const gsl_matrix *xHi, const size_t i1, const size_t j1,
+                     const size_t i2, const size_t j2,
+                     gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee,
+                     gsl_matrix *xHiDHiDHix_ge) {
+  gsl_matrix_set_zero(xHiDHiDHix_gg);
+  gsl_matrix_set_zero(xHiDHiDHix_ee);
+  gsl_matrix_set_zero(xHiDHiDHix_ge);
+
+  size_t n_size = eval->size, d_size = Hi->size1, dc_size = xHi->size1;
+
+  double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+  gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    gsl_vector_const_view xHi_col_i1 =
+        gsl_matrix_const_column(xHi, k * d_size + i1);
+    gsl_vector_const_view xHi_col_j1 =
+        gsl_matrix_const_column(xHi, k * d_size + j1);
+    gsl_vector_const_view xHi_col_i2 =
+        gsl_matrix_const_column(xHi, k * d_size + i2);
+    gsl_vector_const_view xHi_col_j2 =
+        gsl_matrix_const_column(xHi, k * d_size + j2);
+
+    d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+    d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+    d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+    d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+    if (i1 == j1) {
+      gsl_matrix_set_zero(mat_dcdc);
+      gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector,
+                    mat_dcdc);
+
+      gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+      if (i2 != j2) {
+        gsl_matrix_set_zero(mat_dcdc);
+        gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector,
+                      mat_dcdc);
+
+        gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+      }
+    } else {
+      gsl_matrix_set_zero(mat_dcdc);
+      gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector,
+                    mat_dcdc);
+
+      gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+      gsl_matrix_set_zero(mat_dcdc);
+      gsl_blas_dger(d_Hi_i1i2, &xHi_col_j1.vector, &xHi_col_j2.vector,
+                    mat_dcdc);
+
+      gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+      gsl_matrix_scale(mat_dcdc, delta);
+      gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+      if (i2 != j2) {
+        gsl_matrix_set_zero(mat_dcdc);
+        gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector,
+                      mat_dcdc);
+
+        gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+        gsl_matrix_set_zero(mat_dcdc);
+        gsl_blas_dger(d_Hi_i1j2, &xHi_col_j1.vector, &xHi_col_i2.vector,
+                      mat_dcdc);
+
+        gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+        gsl_matrix_scale(mat_dcdc, delta);
+        gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+      }
+    }
+  }
+
+  gsl_matrix_free(mat_dcdc);
 
-void Calc_xHiDHiDHix (const gsl_vector *eval, const gsl_matrix *Hi,
-		      const gsl_matrix *xHi, const size_t i1, const size_t j1,
-		      const size_t i2, const size_t j2,
-		      gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee,
-		      gsl_matrix *xHiDHiDHix_ge) {
-	gsl_matrix_set_zero(xHiDHiDHix_gg);
-	gsl_matrix_set_zero(xHiDHiDHix_ee);
-	gsl_matrix_set_zero(xHiDHiDHix_ge);
-
-	size_t n_size=eval->size, d_size=Hi->size1, dc_size=xHi->size1;
-
-	double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
-	gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-
-		gsl_vector_const_view xHi_col_i1=
-		  gsl_matrix_const_column (xHi, k*d_size+i1);
-		gsl_vector_const_view xHi_col_j1=
-		  gsl_matrix_const_column (xHi, k*d_size+j1);
-		gsl_vector_const_view xHi_col_i2=
-		  gsl_matrix_const_column (xHi, k*d_size+i2);
-		gsl_vector_const_view xHi_col_j2=
-		  gsl_matrix_const_column (xHi, k*d_size+j2);
-
-		d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
-		d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
-		d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
-		d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
-		if (i1==j1) {
-		  gsl_matrix_set_zero (mat_dcdc);
-		  gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector,
-				 &xHi_col_j2.vector, mat_dcdc);
-
-		  gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-		  gsl_matrix_scale(mat_dcdc, delta);
-		  gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-		  gsl_matrix_scale(mat_dcdc, delta);
-		  gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-		  
-		  if (i2!=j2) {
-		    gsl_matrix_set_zero (mat_dcdc);
-		    gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector,
-				   &xHi_col_i2.vector, mat_dcdc);
-		    
-		    gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-		    gsl_matrix_scale(mat_dcdc, delta);
-		    gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-		    gsl_matrix_scale(mat_dcdc, delta);
-		    gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-		  }
-		} else {
-		  gsl_matrix_set_zero (mat_dcdc);
-		  gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector,
-				 &xHi_col_j2.vector, mat_dcdc);
-		  
-		  gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-		  gsl_matrix_scale(mat_dcdc, delta);
-		  gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-		  gsl_matrix_scale(mat_dcdc, delta);
-		  gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-		  
-		  gsl_matrix_set_zero (mat_dcdc);
-		  gsl_blas_dger (d_Hi_i1i2, &xHi_col_j1.vector,
-				 &xHi_col_j2.vector, mat_dcdc);
-
-			gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-			gsl_matrix_scale(mat_dcdc, delta);
-			gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-			gsl_matrix_scale(mat_dcdc, delta);
-			gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
-			if (i2!=j2) {
-			  gsl_matrix_set_zero (mat_dcdc);
-			  gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector,
-					 &xHi_col_i2.vector, mat_dcdc);
-			  
-			  gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-			  gsl_matrix_scale(mat_dcdc, delta);
-			  gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-			  gsl_matrix_scale(mat_dcdc, delta);
-			  gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-			  
-			  gsl_matrix_set_zero (mat_dcdc);
-			  gsl_blas_dger (d_Hi_i1j2, &xHi_col_j1.vector,
-					 &xHi_col_i2.vector, mat_dcdc);
-			  
-			  gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
-			  gsl_matrix_scale(mat_dcdc, delta);
-			  gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
-			  gsl_matrix_scale(mat_dcdc, delta);
-			  gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-			}
-		}
-	}
-
-	gsl_matrix_free(mat_dcdc);
-
-	return;
+  return;
 }
 
-void Calc_traceHiD (const gsl_vector *eval, const gsl_matrix *Hi,
-		    const size_t i, const size_t j, double &tHiD_g,
-		    double &tHiD_e) {
-	tHiD_g=0.0;
-	tHiD_e=0.0;
-
-	size_t n_size=eval->size, d_size=Hi->size1;
-	double delta, d;
-
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-		d=gsl_matrix_get (Hi, j, k*d_size+i);
-
-		if (i==j) {
-			tHiD_g+=delta*d;
-			tHiD_e+=d;
-		} else {
-			tHiD_g+=delta*d*2.0;
-			tHiD_e+=d*2.0;
-		}
-	}
-
-	return;
+void Calc_traceHiD(const gsl_vector *eval, const gsl_matrix *Hi, const size_t i,
+                   const size_t j, double &tHiD_g, double &tHiD_e) {
+  tHiD_g = 0.0;
+  tHiD_e = 0.0;
+
+  size_t n_size = eval->size, d_size = Hi->size1;
+  double delta, d;
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+    d = gsl_matrix_get(Hi, j, k * d_size + i);
+
+    if (i == j) {
+      tHiD_g += delta * d;
+      tHiD_e += d;
+    } else {
+      tHiD_g += delta * d * 2.0;
+      tHiD_e += d * 2.0;
+    }
+  }
+
+  return;
 }
 
-void Calc_traceHiDHiD (const gsl_vector *eval, const gsl_matrix *Hi,
-		       const size_t i1, const size_t j1, const size_t i2,
-		       const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee,
-		       double &tHiDHiD_ge) {
-	tHiDHiD_gg=0.0;
-	tHiDHiD_ee=0.0;
-	tHiDHiD_ge=0.0;
-
-	size_t n_size=eval->size, d_size=Hi->size1;
-	double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
-	for (size_t k=0; k<n_size; k++) {
-		delta=gsl_vector_get (eval, k);
-
-		d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
-		d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
-		d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
-		d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
-		if (i1==j1) {
-		  tHiDHiD_gg+=delta*delta*d_Hi_i1j2*d_Hi_j1i2;
-		  tHiDHiD_ee+=d_Hi_i1j2*d_Hi_j1i2;
-		  tHiDHiD_ge+=delta*d_Hi_i1j2*d_Hi_j1i2;
-		  
-		  if (i2!=j2) {
-		    tHiDHiD_gg+=delta*delta*d_Hi_i1i2*d_Hi_j1j2;
-		    tHiDHiD_ee+=d_Hi_i1i2*d_Hi_j1j2;
-		    tHiDHiD_ge+=delta*d_Hi_i1i2*d_Hi_j1j2;
-		  }
-		} else {
-		  tHiDHiD_gg+=delta*delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*
-					   d_Hi_i1i2);
-		  tHiDHiD_ee+=(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
-		  tHiDHiD_ge+=delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
-		  
-		  if (i2!=j2) {
-		    tHiDHiD_gg+=delta*delta*(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*
-					     d_Hi_i1j2);
-		    tHiDHiD_ee+=(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*d_Hi_i1j2);
-		    tHiDHiD_ge+=delta*(d_Hi_i1i2*d_Hi_j1j2 +
-				       d_Hi_j1i2*d_Hi_i1j2);
-		  }
-		}
-	}
-
-	return;
+void Calc_traceHiDHiD(const gsl_vector *eval, const gsl_matrix *Hi,
+                      const size_t i1, const size_t j1, const size_t i2,
+                      const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee,
+                      double &tHiDHiD_ge) {
+  tHiDHiD_gg = 0.0;
+  tHiDHiD_ee = 0.0;
+  tHiDHiD_ge = 0.0;
+
+  size_t n_size = eval->size, d_size = Hi->size1;
+  double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+  for (size_t k = 0; k < n_size; k++) {
+    delta = gsl_vector_get(eval, k);
+
+    d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+    d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+    d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+    d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+    if (i1 == j1) {
+      tHiDHiD_gg += delta * delta * d_Hi_i1j2 * d_Hi_j1i2;
+      tHiDHiD_ee += d_Hi_i1j2 * d_Hi_j1i2;
+      tHiDHiD_ge += delta * d_Hi_i1j2 * d_Hi_j1i2;
+
+      if (i2 != j2) {
+        tHiDHiD_gg += delta * delta * d_Hi_i1i2 * d_Hi_j1j2;
+        tHiDHiD_ee += d_Hi_i1i2 * d_Hi_j1j2;
+        tHiDHiD_ge += delta * d_Hi_i1i2 * d_Hi_j1j2;
+      }
+    } else {
+      tHiDHiD_gg +=
+          delta * delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+      tHiDHiD_ee += (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+      tHiDHiD_ge += delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+
+      if (i2 != j2) {
+        tHiDHiD_gg +=
+            delta * delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+        tHiDHiD_ee += (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+        tHiDHiD_ge += delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+      }
+    }
+  }
+
+  return;
 }
 
 // trace(PD) = trace((Hi-HixQixHi)D)=trace(HiD) - trace(HixQixHiD)
-void Calc_tracePD (const gsl_vector *eval, const gsl_matrix *Qi,
-		   const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g,
-		   const gsl_matrix *xHiDHix_all_e, const size_t i,
-		   const size_t j, double &tPD_g, double &tPD_e) {
-	size_t dc_size=Qi->size1, d_size=Hi->size1;
-	size_t v=GetIndex(i, j, d_size);
-
-	double d;
-
-	// Calculate the first part: trace(HiD).
-	Calc_traceHiD (eval, Hi, i, j, tPD_g, tPD_e);
-
-	// Calculate the second part: -trace(HixQixHiD).
-	for (size_t k=0; k<dc_size; k++) {
-		gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, k);
-		gsl_vector_const_view xHiDHix_g_col =
-		  gsl_matrix_const_column (xHiDHix_all_g, v*dc_size+k);
-		gsl_vector_const_view xHiDHix_e_col =
-		  gsl_matrix_const_column (xHiDHix_all_e, v*dc_size+k);
-
-		gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d);
-		tPD_g-=d;
-		gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d);
-		tPD_e-=d;
-	}
-
-	return;
+void Calc_tracePD(const gsl_vector *eval, const gsl_matrix *Qi,
+                  const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g,
+                  const gsl_matrix *xHiDHix_all_e, const size_t i,
+                  const size_t j, double &tPD_g, double &tPD_e) {
+  size_t dc_size = Qi->size1, d_size = Hi->size1;
+  size_t v = GetIndex(i, j, d_size);
+
+  double d;
+
+  // Calculate the first part: trace(HiD).
+  Calc_traceHiD(eval, Hi, i, j, tPD_g, tPD_e);
+
+  // Calculate the second part: -trace(HixQixHiD).
+  for (size_t k = 0; k < dc_size; k++) {
+    gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, k);
+    gsl_vector_const_view xHiDHix_g_col =
+        gsl_matrix_const_column(xHiDHix_all_g, v * dc_size + k);
+    gsl_vector_const_view xHiDHix_e_col =
+        gsl_matrix_const_column(xHiDHix_all_e, v * dc_size + k);
+
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d);
+    tPD_g -= d;
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d);
+    tPD_e -= d;
+  }
+
+  return;
 }
 
 // trace(PDPD) = trace((Hi-HixQixHi)D(Hi-HixQixHi)D)
 //             = trace(HiDHiD) - trace(HixQixHiDHiD)
 //               - trace(HiDHixQixHiD) + trace(HixQixHiDHixQixHiD)
-void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi,
-		     const gsl_matrix *Hi, const gsl_matrix *xHi,
-		     const gsl_matrix *QixHiDHix_all_g,
-		     const gsl_matrix *QixHiDHix_all_e,
-		     const gsl_matrix *xHiDHiDHix_all_gg,
-		     const gsl_matrix *xHiDHiDHix_all_ee,
-		     const gsl_matrix *xHiDHiDHix_all_ge,
-		     const size_t i1, const size_t j1,
-		     const size_t i2, const size_t j2,
-		     double &tPDPD_gg, double &tPDPD_ee,
-		     double &tPDPD_ge) {
-	size_t dc_size=Qi->size1, d_size=Hi->size1;
-	size_t v_size=d_size*(d_size+1)/2;
-	size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size);
-
-	double d;
-
-	// Calculate the first part: trace(HiDHiD).
-	Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee,
-			  tPDPD_ge);
-
-	// Calculate the second and third parts:
-	// -trace(HixQixHiDHiD) - trace(HiDHixQixHiD)
-	for (size_t i=0; i<dc_size; i++) {
-	  gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, i);
-	  gsl_vector_const_view xHiDHiDHix_gg_col=
-	    gsl_matrix_const_column(xHiDHiDHix_all_gg,
-				    (v1*v_size+v2)*dc_size+i);
-	  gsl_vector_const_view xHiDHiDHix_ee_col =
-	    gsl_matrix_const_column(xHiDHiDHix_all_ee,
-				    (v1*v_size+v2)*dc_size+i);
-	  gsl_vector_const_view xHiDHiDHix_ge_col =
-	    gsl_matrix_const_column(xHiDHiDHix_all_ge,
-				    (v1*v_size+v2)*dc_size+i);
-	  
-	  gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d);
-	  tPDPD_gg-=d*2.0;
-	  gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d);
-	  tPDPD_ee-=d*2.0;
-	  gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d);
-	  tPDPD_ge-=d*2.0;
-	}
-
-	// Calculate the fourth part: trace(HixQixHiDHixQixHiD).
-	for (size_t i=0; i<dc_size; i++) {
-
-	  gsl_vector_const_view QixHiDHix_g_fullrow1 =
-	    gsl_matrix_const_row (QixHiDHix_all_g, i);
-	  gsl_vector_const_view QixHiDHix_e_fullrow1 =
-	    gsl_matrix_const_row (QixHiDHix_all_e, i);
-	  gsl_vector_const_view QixHiDHix_g_row1 =
-	    gsl_vector_const_subvector (&QixHiDHix_g_fullrow1.vector,
-					v1*dc_size, dc_size);
-	  gsl_vector_const_view QixHiDHix_e_row1 =
-	    gsl_vector_const_subvector (&QixHiDHix_e_fullrow1.vector,
-					v1*dc_size, dc_size);
-	  
-	  gsl_vector_const_view QixHiDHix_g_col2 =
-	    gsl_matrix_const_column (QixHiDHix_all_g, v2*dc_size+i);
-	  gsl_vector_const_view QixHiDHix_e_col2 =
-	    gsl_matrix_const_column (QixHiDHix_all_e, v2*dc_size+i);
-	  
-	  gsl_blas_ddot(&QixHiDHix_g_row1.vector,&QixHiDHix_g_col2.vector,&d);
-	  tPDPD_gg+=d;
-	  gsl_blas_ddot(&QixHiDHix_e_row1.vector,&QixHiDHix_e_col2.vector,&d);
-	  tPDPD_ee+=d;
-	  gsl_blas_ddot(&QixHiDHix_g_row1.vector,&QixHiDHix_e_col2.vector,&d);
-	  tPDPD_ge+=d;
-	}
-
-	return;
+void Calc_tracePDPD(const gsl_vector *eval, const gsl_matrix *Qi,
+                    const gsl_matrix *Hi, const gsl_matrix *xHi,
+                    const gsl_matrix *QixHiDHix_all_g,
+                    const gsl_matrix *QixHiDHix_all_e,
+                    const gsl_matrix *xHiDHiDHix_all_gg,
+                    const gsl_matrix *xHiDHiDHix_all_ee,
+                    const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1,
+                    const size_t j1, const size_t i2, const size_t j2,
+                    double &tPDPD_gg, double &tPDPD_ee, double &tPDPD_ge) {
+  size_t dc_size = Qi->size1, d_size = Hi->size1;
+  size_t v_size = d_size * (d_size + 1) / 2;
+  size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size);
+
+  double d;
+
+  // Calculate the first part: trace(HiDHiD).
+  Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
+
+  // Calculate the second and third parts:
+  // -trace(HixQixHiDHiD) - trace(HiDHixQixHiD)
+  for (size_t i = 0; i < dc_size; i++) {
+    gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, i);
+    gsl_vector_const_view xHiDHiDHix_gg_col = gsl_matrix_const_column(
+        xHiDHiDHix_all_gg, (v1 * v_size + v2) * dc_size + i);
+    gsl_vector_const_view xHiDHiDHix_ee_col = gsl_matrix_const_column(
+        xHiDHiDHix_all_ee, (v1 * v_size + v2) * dc_size + i);
+    gsl_vector_const_view xHiDHiDHix_ge_col = gsl_matrix_const_column(
+        xHiDHiDHix_all_ge, (v1 * v_size + v2) * dc_size + i);
+
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d);
+    tPDPD_gg -= d * 2.0;
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d);
+    tPDPD_ee -= d * 2.0;
+    gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d);
+    tPDPD_ge -= d * 2.0;
+  }
+
+  // Calculate the fourth part: trace(HixQixHiDHixQixHiD).
+  for (size_t i = 0; i < dc_size; i++) {
+
+    gsl_vector_const_view QixHiDHix_g_fullrow1 =
+        gsl_matrix_const_row(QixHiDHix_all_g, i);
+    gsl_vector_const_view QixHiDHix_e_fullrow1 =
+        gsl_matrix_const_row(QixHiDHix_all_e, i);
+    gsl_vector_const_view QixHiDHix_g_row1 = gsl_vector_const_subvector(
+        &QixHiDHix_g_fullrow1.vector, v1 * dc_size, dc_size);
+    gsl_vector_const_view QixHiDHix_e_row1 = gsl_vector_const_subvector(
+        &QixHiDHix_e_fullrow1.vector, v1 * dc_size, dc_size);
+
+    gsl_vector_const_view QixHiDHix_g_col2 =
+        gsl_matrix_const_column(QixHiDHix_all_g, v2 * dc_size + i);
+    gsl_vector_const_view QixHiDHix_e_col2 =
+        gsl_matrix_const_column(QixHiDHix_all_e, v2 * dc_size + i);
+
+    gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_g_col2.vector, &d);
+    tPDPD_gg += d;
+    gsl_blas_ddot(&QixHiDHix_e_row1.vector, &QixHiDHix_e_col2.vector, &d);
+    tPDPD_ee += d;
+    gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_e_col2.vector, &d);
+    tPDPD_ge += d;
+  }
+
+  return;
 }
 
 // Calculate (xHiDHiy) for every pair (i,j).
-void Calc_xHiDHiy_all (const gsl_vector *eval, const gsl_matrix *xHi,
-		       const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g,
-		       gsl_matrix *xHiDHiy_all_e) {
-	gsl_matrix_set_zero(xHiDHiy_all_g);
-	gsl_matrix_set_zero(xHiDHiy_all_e);
-
-	size_t d_size=Hiy->size1;
-	size_t v;
-
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<d_size; j++) {
-	    if (j<i) {continue;}
-	    v=GetIndex(i, j, d_size);
-	    
-	    gsl_vector_view xHiDHiy_g=gsl_matrix_column (xHiDHiy_all_g, v);
-	    gsl_vector_view xHiDHiy_e=gsl_matrix_column (xHiDHiy_all_e, v);
-	    
-	    Calc_xHiDHiy (eval, xHi, Hiy, i, j, &xHiDHiy_g.vector,
-			  &xHiDHiy_e.vector);
-	  }
-	}
-	return;
+void Calc_xHiDHiy_all(const gsl_vector *eval, const gsl_matrix *xHi,
+                      const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g,
+                      gsl_matrix *xHiDHiy_all_e) {
+  gsl_matrix_set_zero(xHiDHiy_all_g);
+  gsl_matrix_set_zero(xHiDHiy_all_e);
+
+  size_t d_size = Hiy->size1;
+  size_t v;
+
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < d_size; j++) {
+      if (j < i) {
+        continue;
+      }
+      v = GetIndex(i, j, d_size);
+
+      gsl_vector_view xHiDHiy_g = gsl_matrix_column(xHiDHiy_all_g, v);
+      gsl_vector_view xHiDHiy_e = gsl_matrix_column(xHiDHiy_all_e, v);
+
+      Calc_xHiDHiy(eval, xHi, Hiy, i, j, &xHiDHiy_g.vector, &xHiDHiy_e.vector);
+    }
+  }
+  return;
 }
 
 // Calculate (xHiDHix) for every pair (i,j).
-void Calc_xHiDHix_all (const gsl_vector *eval, const gsl_matrix *xHi,
-		       gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) {
+void Calc_xHiDHix_all(const gsl_vector *eval, const gsl_matrix *xHi,
+                      gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) {
   gsl_matrix_set_zero(xHiDHix_all_g);
   gsl_matrix_set_zero(xHiDHix_all_e);
-  
-  size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1;
+
+  size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1;
   size_t v;
-  
-  for (size_t i=0; i<d_size; i++) {
-    for (size_t j=0; j<d_size; j++) {
-      if (j<i) {continue;}
-      v=GetIndex(i, j, d_size);
-      
+
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < d_size; j++) {
+      if (j < i) {
+        continue;
+      }
+      v = GetIndex(i, j, d_size);
+
       gsl_matrix_view xHiDHix_g =
-	gsl_matrix_submatrix (xHiDHix_all_g, 0, v*dc_size, dc_size, dc_size);
+          gsl_matrix_submatrix(xHiDHix_all_g, 0, v * dc_size, dc_size, dc_size);
       gsl_matrix_view xHiDHix_e =
-	gsl_matrix_submatrix (xHiDHix_all_e, 0, v*dc_size, dc_size, dc_size);
-      
-      Calc_xHiDHix (eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix);
+          gsl_matrix_submatrix(xHiDHix_all_e, 0, v * dc_size, dc_size, dc_size);
+
+      Calc_xHiDHix(eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix);
     }
   }
   return;
 }
 
 // Calculate (xHiDHiy) for every pair (i,j).
-void Calc_xHiDHiDHiy_all (const size_t v_size, const gsl_vector *eval,
-			  const gsl_matrix *Hi, const gsl_matrix *xHi,
-			  const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg,
-			  gsl_matrix *xHiDHiDHiy_all_ee,
-			  gsl_matrix *xHiDHiDHiy_all_ge) {
-	gsl_matrix_set_zero(xHiDHiDHiy_all_gg);
-	gsl_matrix_set_zero(xHiDHiDHiy_all_ee);
-	gsl_matrix_set_zero(xHiDHiDHiy_all_ge);
-
-	size_t d_size=Hiy->size1;
-	size_t v1, v2;
-
-	for (size_t i1=0; i1<d_size; i1++) {
-	  for (size_t j1=0; j1<d_size; j1++) {
-	    if (j1<i1) {continue;}
-	    v1=GetIndex(i1, j1, d_size);
-	    
-	    for (size_t i2=0; i2<d_size; i2++) {
-	      for (size_t j2=0; j2<d_size; j2++) {
-		if (j2<i2) {continue;}
-		v2=GetIndex(i2, j2, d_size);
-		
-		gsl_vector_view xHiDHiDHiy_gg =
-		  gsl_matrix_column (xHiDHiDHiy_all_gg, v1*v_size+v2);
-		gsl_vector_view xHiDHiDHiy_ee =
-		  gsl_matrix_column (xHiDHiDHiy_all_ee, v1*v_size+v2);
-		gsl_vector_view xHiDHiDHiy_ge =
-		  gsl_matrix_column (xHiDHiDHiy_all_ge, v1*v_size+v2);
-		
-		Calc_xHiDHiDHiy (eval, Hi, xHi, Hiy, i1, j1, i2, j2, &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector, &xHiDHiDHiy_ge.vector);
-	      }
-	    }
-	  }
-	}
-	return;
+void Calc_xHiDHiDHiy_all(const size_t v_size, const gsl_vector *eval,
+                         const gsl_matrix *Hi, const gsl_matrix *xHi,
+                         const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg,
+                         gsl_matrix *xHiDHiDHiy_all_ee,
+                         gsl_matrix *xHiDHiDHiy_all_ge) {
+  gsl_matrix_set_zero(xHiDHiDHiy_all_gg);
+  gsl_matrix_set_zero(xHiDHiDHiy_all_ee);
+  gsl_matrix_set_zero(xHiDHiDHiy_all_ge);
+
+  size_t d_size = Hiy->size1;
+  size_t v1, v2;
+
+  for (size_t i1 = 0; i1 < d_size; i1++) {
+    for (size_t j1 = 0; j1 < d_size; j1++) {
+      if (j1 < i1) {
+        continue;
+      }
+      v1 = GetIndex(i1, j1, d_size);
+
+      for (size_t i2 = 0; i2 < d_size; i2++) {
+        for (size_t j2 = 0; j2 < d_size; j2++) {
+          if (j2 < i2) {
+            continue;
+          }
+          v2 = GetIndex(i2, j2, d_size);
+
+          gsl_vector_view xHiDHiDHiy_gg =
+              gsl_matrix_column(xHiDHiDHiy_all_gg, v1 * v_size + v2);
+          gsl_vector_view xHiDHiDHiy_ee =
+              gsl_matrix_column(xHiDHiDHiy_all_ee, v1 * v_size + v2);
+          gsl_vector_view xHiDHiDHiy_ge =
+              gsl_matrix_column(xHiDHiDHiy_all_ge, v1 * v_size + v2);
+
+          Calc_xHiDHiDHiy(eval, Hi, xHi, Hiy, i1, j1, i2, j2,
+                          &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector,
+                          &xHiDHiDHiy_ge.vector);
+        }
+      }
+    }
+  }
+  return;
 }
 
 // Calculate (xHiDHix) for every pair (i,j).
-void Calc_xHiDHiDHix_all (const size_t v_size, const gsl_vector *eval,
-			  const gsl_matrix *Hi, const gsl_matrix *xHi,
-			  gsl_matrix *xHiDHiDHix_all_gg,
-			  gsl_matrix *xHiDHiDHix_all_ee,
-			  gsl_matrix *xHiDHiDHix_all_ge) {
-	gsl_matrix_set_zero(xHiDHiDHix_all_gg);
-	gsl_matrix_set_zero(xHiDHiDHix_all_ee);
-	gsl_matrix_set_zero(xHiDHiDHix_all_ge);
-
-	size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1;
-	size_t v1, v2;
-
-	for (size_t i1=0; i1<d_size; i1++) {
-	  for (size_t j1=0; j1<d_size; j1++) {
-	    if (j1<i1) {continue;}
-	    v1=GetIndex(i1, j1, d_size);
-	    
-	    for (size_t i2=0; i2<d_size; i2++) {
-	      for (size_t j2=0; j2<d_size; j2++) {
-		if (j2<i2) {continue;}
-		v2=GetIndex(i2, j2, d_size);
-		
-		if (v2<v1) {continue;}
-		
-		gsl_matrix_view xHiDHiDHix_gg1 =
-		  gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0,
-					(v1*v_size+v2)*dc_size,
-					dc_size, dc_size);
-		gsl_matrix_view xHiDHiDHix_ee1 =
-		  gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0,
-					(v1*v_size+v2)*dc_size,
-					dc_size, dc_size);
-		gsl_matrix_view xHiDHiDHix_ge1 =
-		  gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0,
-					(v1*v_size+v2)*dc_size,
-					dc_size, dc_size);
-		
-		Calc_xHiDHiDHix (eval, Hi, xHi, i1, j1, i2, j2,
-				 &xHiDHiDHix_gg1.matrix,
-				 &xHiDHiDHix_ee1.matrix,
-				 &xHiDHiDHix_ge1.matrix);
-		
-		if (v2!=v1) {
-		  gsl_matrix_view xHiDHiDHix_gg2 =
-		    gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0,
-					  (v2*v_size+v1)*dc_size,
-					  dc_size, dc_size);
-		  gsl_matrix_view xHiDHiDHix_ee2 =
-		    gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0,
-					  (v2*v_size+v1)*dc_size,
-					  dc_size, dc_size);
-		  gsl_matrix_view xHiDHiDHix_ge2 =
-		    gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0,
-					  (v2*v_size+v1)*dc_size,
-					  dc_size, dc_size);
-
-		  gsl_matrix_memcpy (&xHiDHiDHix_gg2.matrix,
-				     &xHiDHiDHix_gg1.matrix);
-		  gsl_matrix_memcpy (&xHiDHiDHix_ee2.matrix,
-				     &xHiDHiDHix_ee1.matrix);
-		  gsl_matrix_memcpy (&xHiDHiDHix_ge2.matrix,
-				     &xHiDHiDHix_ge1.matrix);
-		}
-	      }
-	    }
-	  }
-	}
-
-	return;
+void Calc_xHiDHiDHix_all(const size_t v_size, const gsl_vector *eval,
+                         const gsl_matrix *Hi, const gsl_matrix *xHi,
+                         gsl_matrix *xHiDHiDHix_all_gg,
+                         gsl_matrix *xHiDHiDHix_all_ee,
+                         gsl_matrix *xHiDHiDHix_all_ge) {
+  gsl_matrix_set_zero(xHiDHiDHix_all_gg);
+  gsl_matrix_set_zero(xHiDHiDHix_all_ee);
+  gsl_matrix_set_zero(xHiDHiDHix_all_ge);
+
+  size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1;
+  size_t v1, v2;
+
+  for (size_t i1 = 0; i1 < d_size; i1++) {
+    for (size_t j1 = 0; j1 < d_size; j1++) {
+      if (j1 < i1) {
+        continue;
+      }
+      v1 = GetIndex(i1, j1, d_size);
+
+      for (size_t i2 = 0; i2 < d_size; i2++) {
+        for (size_t j2 = 0; j2 < d_size; j2++) {
+          if (j2 < i2) {
+            continue;
+          }
+          v2 = GetIndex(i2, j2, d_size);
+
+          if (v2 < v1) {
+            continue;
+          }
+
+          gsl_matrix_view xHiDHiDHix_gg1 = gsl_matrix_submatrix(
+              xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size,
+              dc_size);
+          gsl_matrix_view xHiDHiDHix_ee1 = gsl_matrix_submatrix(
+              xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size,
+              dc_size);
+          gsl_matrix_view xHiDHiDHix_ge1 = gsl_matrix_submatrix(
+              xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size,
+              dc_size);
+
+          Calc_xHiDHiDHix(eval, Hi, xHi, i1, j1, i2, j2, &xHiDHiDHix_gg1.matrix,
+                          &xHiDHiDHix_ee1.matrix, &xHiDHiDHix_ge1.matrix);
+
+          if (v2 != v1) {
+            gsl_matrix_view xHiDHiDHix_gg2 = gsl_matrix_submatrix(
+                xHiDHiDHix_all_gg, 0, (v2 * v_size + v1) * dc_size, dc_size,
+                dc_size);
+            gsl_matrix_view xHiDHiDHix_ee2 = gsl_matrix_submatrix(
+                xHiDHiDHix_all_ee, 0, (v2 * v_size + v1) * dc_size, dc_size,
+                dc_size);
+            gsl_matrix_view xHiDHiDHix_ge2 = gsl_matrix_submatrix(
+                xHiDHiDHix_all_ge, 0, (v2 * v_size + v1) * dc_size, dc_size,
+                dc_size);
+
+            gsl_matrix_memcpy(&xHiDHiDHix_gg2.matrix, &xHiDHiDHix_gg1.matrix);
+            gsl_matrix_memcpy(&xHiDHiDHix_ee2.matrix, &xHiDHiDHix_ee1.matrix);
+            gsl_matrix_memcpy(&xHiDHiDHix_ge2.matrix, &xHiDHiDHix_ge1.matrix);
+          }
+        }
+      }
+    }
+  }
+
+  return;
 }
 
 // Calculate (xHiDHix)Qi(xHiy) for every pair (i,j).
-void Calc_xHiDHixQixHiy_all (const gsl_matrix *xHiDHix_all_g,
-			     const gsl_matrix *xHiDHix_all_e,
-			     const gsl_vector *QixHiy,
-			     gsl_matrix *xHiDHixQixHiy_all_g,
-			     gsl_matrix *xHiDHixQixHiy_all_e) {
-	size_t dc_size=xHiDHix_all_g->size1;
-	size_t v_size=xHiDHix_all_g->size2/dc_size;
-
-	for (size_t i=0; i<v_size; i++) {
-		gsl_matrix_const_view xHiDHix_g =
-		  gsl_matrix_const_submatrix (xHiDHix_all_g, 0, i*dc_size,
-					      dc_size, dc_size);
-		gsl_matrix_const_view xHiDHix_e =
-		  gsl_matrix_const_submatrix (xHiDHix_all_e, 0, i*dc_size,
-					      dc_size, dc_size);
-
-		gsl_vector_view xHiDHixQixHiy_g =
-		  gsl_matrix_column (xHiDHixQixHiy_all_g, i);
-		gsl_vector_view xHiDHixQixHiy_e =
-		  gsl_matrix_column (xHiDHixQixHiy_all_e, i);
-
-		gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_g.matrix,
-				QixHiy, 0.0, &xHiDHixQixHiy_g.vector);
-		gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_e.matrix,
-				QixHiy, 0.0, &xHiDHixQixHiy_e.vector);
-	}
-
-	return;
+void Calc_xHiDHixQixHiy_all(const gsl_matrix *xHiDHix_all_g,
+                            const gsl_matrix *xHiDHix_all_e,
+                            const gsl_vector *QixHiy,
+                            gsl_matrix *xHiDHixQixHiy_all_g,
+                            gsl_matrix *xHiDHixQixHiy_all_e) {
+  size_t dc_size = xHiDHix_all_g->size1;
+  size_t v_size = xHiDHix_all_g->size2 / dc_size;
+
+  for (size_t i = 0; i < v_size; i++) {
+    gsl_matrix_const_view xHiDHix_g = gsl_matrix_const_submatrix(
+        xHiDHix_all_g, 0, i * dc_size, dc_size, dc_size);
+    gsl_matrix_const_view xHiDHix_e = gsl_matrix_const_submatrix(
+        xHiDHix_all_e, 0, i * dc_size, dc_size, dc_size);
+
+    gsl_vector_view xHiDHixQixHiy_g = gsl_matrix_column(xHiDHixQixHiy_all_g, i);
+    gsl_vector_view xHiDHixQixHiy_e = gsl_matrix_column(xHiDHixQixHiy_all_e, i);
+
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_g.matrix, QixHiy, 0.0,
+                   &xHiDHixQixHiy_g.vector);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_e.matrix, QixHiy, 0.0,
+                   &xHiDHixQixHiy_e.vector);
+  }
+
+  return;
 }
 
 // Calculate Qi(xHiDHiy) and Qi(xHiDHix)Qi(xHiy) for each pair of i,j (i<=j).
-void Calc_QiVec_all (const gsl_matrix *Qi, const gsl_matrix *vec_all_g,
-		     const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g,
-		     gsl_matrix *Qivec_all_e) {
-	for (size_t i=0; i<vec_all_g->size2; i++) {
-	  gsl_vector_const_view vec_g=gsl_matrix_const_column (vec_all_g, i);
-	  gsl_vector_const_view vec_e=gsl_matrix_const_column (vec_all_e, i);
-	  
-	  gsl_vector_view Qivec_g=gsl_matrix_column (Qivec_all_g, i);
-	  gsl_vector_view Qivec_e=gsl_matrix_column (Qivec_all_e, i);
-	  
-	  gsl_blas_dgemv(CblasNoTrans,1.0,Qi,&vec_g.vector,0.0,
-			 &Qivec_g.vector);
-	  gsl_blas_dgemv(CblasNoTrans,1.0,Qi,&vec_e.vector,0.0,
-			 &Qivec_e.vector);
-	}
-
-	return;
+void Calc_QiVec_all(const gsl_matrix *Qi, const gsl_matrix *vec_all_g,
+                    const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g,
+                    gsl_matrix *Qivec_all_e) {
+  for (size_t i = 0; i < vec_all_g->size2; i++) {
+    gsl_vector_const_view vec_g = gsl_matrix_const_column(vec_all_g, i);
+    gsl_vector_const_view vec_e = gsl_matrix_const_column(vec_all_e, i);
+
+    gsl_vector_view Qivec_g = gsl_matrix_column(Qivec_all_g, i);
+    gsl_vector_view Qivec_e = gsl_matrix_column(Qivec_all_e, i);
+
+    gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_g.vector, 0.0, &Qivec_g.vector);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_e.vector, 0.0, &Qivec_e.vector);
+  }
+
+  return;
 }
 
 // Calculate Qi(xHiDHix) for each pair of i,j (i<=j).
-void Calc_QiMat_all (const gsl_matrix *Qi, const gsl_matrix *mat_all_g,
-		     const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g,
-		     gsl_matrix *Qimat_all_e) {
-	size_t dc_size=Qi->size1;
-	size_t v_size=mat_all_g->size2/mat_all_g->size1;
-
-	for (size_t i=0; i<v_size; i++) {
-		gsl_matrix_const_view mat_g =
-		  gsl_matrix_const_submatrix (mat_all_g, 0, i*dc_size,
-					      dc_size, dc_size);
-		gsl_matrix_const_view mat_e =
-		  gsl_matrix_const_submatrix (mat_all_e, 0, i*dc_size,
-					      dc_size, dc_size);
-
-		gsl_matrix_view Qimat_g =
-		  gsl_matrix_submatrix (Qimat_all_g, 0, i*dc_size, dc_size,
-					dc_size);
-		gsl_matrix_view Qimat_e =
-		  gsl_matrix_submatrix (Qimat_all_e, 0, i*dc_size, dc_size,
-					dc_size);
-
-		gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi,
-				&mat_g.matrix, 0.0, &Qimat_g.matrix);
-		gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi,
-				&mat_e.matrix, 0.0, &Qimat_e.matrix);
-	}
-
-	return;
+void Calc_QiMat_all(const gsl_matrix *Qi, const gsl_matrix *mat_all_g,
+                    const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g,
+                    gsl_matrix *Qimat_all_e) {
+  size_t dc_size = Qi->size1;
+  size_t v_size = mat_all_g->size2 / mat_all_g->size1;
+
+  for (size_t i = 0; i < v_size; i++) {
+    gsl_matrix_const_view mat_g =
+        gsl_matrix_const_submatrix(mat_all_g, 0, i * dc_size, dc_size, dc_size);
+    gsl_matrix_const_view mat_e =
+        gsl_matrix_const_submatrix(mat_all_e, 0, i * dc_size, dc_size, dc_size);
+
+    gsl_matrix_view Qimat_g =
+        gsl_matrix_submatrix(Qimat_all_g, 0, i * dc_size, dc_size, dc_size);
+    gsl_matrix_view Qimat_e =
+        gsl_matrix_submatrix(Qimat_all_e, 0, i * dc_size, dc_size, dc_size);
+
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_g.matrix, 0.0,
+                   &Qimat_g.matrix);
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_e.matrix, 0.0,
+                   &Qimat_e.matrix);
+  }
+
+  return;
 }
 
 // Calculate yPDPy
 // yPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)y
 //       = ytHiDHiy - (yHix)Qi(xHiDHiy) - (yHiDHix)Qi(xHiy)
 //         + (yHix)Qi(xHiDHix)Qi(xtHiy)
-void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy,
-		 const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
-		 const gsl_matrix *xHiDHiy_all_e,
-		 const gsl_matrix *xHiDHixQixHiy_all_g,
-		 const gsl_matrix *xHiDHixQixHiy_all_e,
-		 const size_t i, const size_t j,
-		 double &yPDPy_g, double &yPDPy_e) {
-	size_t d_size=Hiy->size1;
-	size_t v=GetIndex(i, j, d_size);
-
-	double d;
-
-	// First part: ytHiDHiy.
-	Calc_yHiDHiy (eval, Hiy, i, j, yPDPy_g, yPDPy_e);
-
-	// Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
-	gsl_vector_const_view xHiDHiy_g =
-	  gsl_matrix_const_column (xHiDHiy_all_g, v);
-	gsl_vector_const_view xHiDHiy_e =
-	  gsl_matrix_const_column (xHiDHiy_all_e, v);
-
-	gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d);
-	yPDPy_g-=d*2.0;
-	gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d);
-	yPDPy_e-=d*2.0;
-
-	// Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy).
-	gsl_vector_const_view xHiDHixQixHiy_g =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_g, v);
-	gsl_vector_const_view xHiDHixQixHiy_e =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_e, v);
-
-	gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d);
-	yPDPy_g+=d;
-	gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d);
-	yPDPy_e+=d;
-
-	return;
+void Calc_yPDPy(const gsl_vector *eval, const gsl_matrix *Hiy,
+                const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
+                const gsl_matrix *xHiDHiy_all_e,
+                const gsl_matrix *xHiDHixQixHiy_all_g,
+                const gsl_matrix *xHiDHixQixHiy_all_e, const size_t i,
+                const size_t j, double &yPDPy_g, double &yPDPy_e) {
+  size_t d_size = Hiy->size1;
+  size_t v = GetIndex(i, j, d_size);
+
+  double d;
+
+  // First part: ytHiDHiy.
+  Calc_yHiDHiy(eval, Hiy, i, j, yPDPy_g, yPDPy_e);
+
+  // Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
+  gsl_vector_const_view xHiDHiy_g = gsl_matrix_const_column(xHiDHiy_all_g, v);
+  gsl_vector_const_view xHiDHiy_e = gsl_matrix_const_column(xHiDHiy_all_e, v);
+
+  gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d);
+  yPDPy_g -= d * 2.0;
+  gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d);
+  yPDPy_e -= d * 2.0;
+
+  // Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy).
+  gsl_vector_const_view xHiDHixQixHiy_g =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_g, v);
+  gsl_vector_const_view xHiDHixQixHiy_e =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_e, v);
+
+  gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d);
+  yPDPy_g += d;
+  gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d);
+  yPDPy_e += d;
+
+  return;
 }
 
 // calculate yPDPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)D(Hi-HixQixHi)y
@@ -1912,3445 +1897,3503 @@ void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy,
 //                     + (yHiDHix)Qi(xHiDHix)Qi(xHiy)
 //                     + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
 //                     - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy)
-void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi,
-		   const gsl_matrix *xHi, const gsl_matrix *Hiy,
-		   const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
-		   const gsl_matrix *xHiDHiy_all_e,
-		   const gsl_matrix *QixHiDHiy_all_g,
-		   const gsl_matrix *QixHiDHiy_all_e,
-		   const gsl_matrix *xHiDHixQixHiy_all_g,
-		   const gsl_matrix *xHiDHixQixHiy_all_e,
-		   const gsl_matrix *QixHiDHixQixHiy_all_g,
-		   const gsl_matrix *QixHiDHixQixHiy_all_e,
-		   const gsl_matrix *xHiDHiDHiy_all_gg,
-		   const gsl_matrix *xHiDHiDHiy_all_ee,
-		   const gsl_matrix *xHiDHiDHiy_all_ge,
-		   const gsl_matrix *xHiDHiDHix_all_gg,
-		   const gsl_matrix *xHiDHiDHix_all_ee,
-		   const gsl_matrix *xHiDHiDHix_all_ge,
-		   const size_t i1, const size_t j1, const size_t i2,
-		   const size_t j2, double &yPDPDPy_gg, double &yPDPDPy_ee,
-		   double &yPDPDPy_ge) {
-	size_t d_size=Hi->size1, dc_size=xHi->size1;
-	size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size);
-	size_t v_size=d_size*(d_size+1)/2;
-
-	double d;
-
-	gsl_vector *xHiDHiDHixQixHiy=gsl_vector_alloc (dc_size);
-
-	// First part: yHiDHiDHiy.
-	Calc_yHiDHiDHiy (eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg,
-			 yPDPDPy_ee, yPDPDPy_ge);
-
-	// Second and third parts:
-	// -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy).
-	gsl_vector_const_view xHiDHiDHiy_gg1 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_gg, v1*v_size+v2);
-	gsl_vector_const_view xHiDHiDHiy_ee1 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_ee, v1*v_size+v2);
-	gsl_vector_const_view xHiDHiDHiy_ge1 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_ge, v1*v_size+v2);
-
-	gsl_vector_const_view xHiDHiDHiy_gg2 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_gg, v2*v_size+v1);
-	gsl_vector_const_view xHiDHiDHiy_ee2 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_ee, v2*v_size+v1);
-	gsl_vector_const_view xHiDHiDHiy_ge2 =
-	  gsl_matrix_const_column (xHiDHiDHiy_all_ge, v2*v_size+v1);
-
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
-	yPDPDPy_gg-=d;
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
-	yPDPDPy_ee-=d;
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
-	yPDPDPy_ge-=d;
-
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
-	yPDPDPy_gg-=d;
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
-	yPDPDPy_ee-=d;
-	gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
-	yPDPDPy_ge-=d;
-
-	// Fourth part: - (yHiDHix)Qi(xHiDHiy).
-	gsl_vector_const_view xHiDHiy_g1 =
-	  gsl_matrix_const_column (xHiDHiy_all_g, v1);
-	gsl_vector_const_view xHiDHiy_e1 =
-	  gsl_matrix_const_column (xHiDHiy_all_e, v1);
-	gsl_vector_const_view QixHiDHiy_g2 =
-	  gsl_matrix_const_column (QixHiDHiy_all_g, v2);
-	gsl_vector_const_view QixHiDHiy_e2 =
-	  gsl_matrix_const_column (QixHiDHiy_all_e, v2);
-
-	gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
-	yPDPDPy_gg-=d;
-	gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
-	yPDPDPy_ee-=d;
-	gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
-	yPDPDPy_ge-=d;
-
-	// Fifth and sixth parts:
-	//   + (yHix)Qi(xHiDHix)Qi(xHiDHiy) +
-	//   (yHiDHix)Qi(xHiDHix)Qi(xHiy)
-	gsl_vector_const_view QixHiDHiy_g1 =
-	  gsl_matrix_const_column (QixHiDHiy_all_g, v1);
-	gsl_vector_const_view QixHiDHiy_e1 =
-	  gsl_matrix_const_column (QixHiDHiy_all_e, v1);
-
-	gsl_vector_const_view xHiDHixQixHiy_g1 =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_g, v1);
-	gsl_vector_const_view xHiDHixQixHiy_e1 =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_e, v1);
-	gsl_vector_const_view xHiDHixQixHiy_g2 =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_g, v2);
-	gsl_vector_const_view xHiDHixQixHiy_e2 =
-	  gsl_matrix_const_column (xHiDHixQixHiy_all_e, v2);
-
-	gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
-	yPDPDPy_gg+=d;
-	gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d);
-	yPDPDPy_gg+=d;
-
-	gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
-	yPDPDPy_ee+=d;
-	gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d);
-	yPDPDPy_ee+=d;
-
-	gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
-	yPDPDPy_ge+=d;
-	gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d);
-	yPDPDPy_ge+=d;
-
-	// Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
-	gsl_matrix_const_view xHiDHiDHix_gg =
-	  gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0,
-				      (v1*v_size+v2)*dc_size,
-				      dc_size, dc_size);
-	gsl_matrix_const_view xHiDHiDHix_ee =
-	  gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0,
-				      (v1*v_size+v2)*dc_size,
-				      dc_size, dc_size);
-	gsl_matrix_const_view xHiDHiDHix_ge =
-	  gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0,
-				      (v1*v_size+v2)*dc_size,
-				      dc_size, dc_size);
-
-	gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix,
-			QixHiy, 0.0, xHiDHiDHixQixHiy);
-	gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
-	yPDPDPy_gg+=d;
-	gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix,
-			QixHiy, 0.0, xHiDHiDHixQixHiy);
-	gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
-	yPDPDPy_ee+=d;
-	gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix,
-			QixHiy, 0.0, xHiDHiDHixQixHiy);
-	gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
-	yPDPDPy_ge+=d;
-
-	// Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy).
-	gsl_vector_const_view QixHiDHixQixHiy_g1 =
-	  gsl_matrix_const_column (QixHiDHixQixHiy_all_g, v1);
-	gsl_vector_const_view QixHiDHixQixHiy_e1 =
-	  gsl_matrix_const_column (QixHiDHixQixHiy_all_e, v1);
-
-	gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector,&xHiDHixQixHiy_g2.vector,&d);
-	yPDPDPy_gg-=d;
-	gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector,&xHiDHixQixHiy_e2.vector,&d);
-	yPDPDPy_ee-=d;
-	gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector,&xHiDHixQixHiy_e2.vector,&d);
-	yPDPDPy_ge-=d;
-
-	// Free memory.
-	gsl_vector_free(xHiDHiDHixQixHiy);
-
-	return;
+void Calc_yPDPDPy(
+    const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matrix *xHi,
+    const gsl_matrix *Hiy, const gsl_vector *QixHiy,
+    const gsl_matrix *xHiDHiy_all_g, const gsl_matrix *xHiDHiy_all_e,
+    const gsl_matrix *QixHiDHiy_all_g, const gsl_matrix *QixHiDHiy_all_e,
+    const gsl_matrix *xHiDHixQixHiy_all_g,
+    const gsl_matrix *xHiDHixQixHiy_all_e,
+    const gsl_matrix *QixHiDHixQixHiy_all_g,
+    const gsl_matrix *QixHiDHixQixHiy_all_e,
+    const gsl_matrix *xHiDHiDHiy_all_gg, const gsl_matrix *xHiDHiDHiy_all_ee,
+    const gsl_matrix *xHiDHiDHiy_all_ge, const gsl_matrix *xHiDHiDHix_all_gg,
+    const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge,
+    const size_t i1, const size_t j1, const size_t i2, const size_t j2,
+    double &yPDPDPy_gg, double &yPDPDPy_ee, double &yPDPDPy_ge) {
+  size_t d_size = Hi->size1, dc_size = xHi->size1;
+  size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size);
+  size_t v_size = d_size * (d_size + 1) / 2;
+
+  double d;
+
+  gsl_vector *xHiDHiDHixQixHiy = gsl_vector_alloc(dc_size);
+
+  // First part: yHiDHiDHiy.
+  Calc_yHiDHiDHiy(eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee,
+                  yPDPDPy_ge);
+
+  // Second and third parts:
+  // -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy).
+  gsl_vector_const_view xHiDHiDHiy_gg1 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_gg, v1 * v_size + v2);
+  gsl_vector_const_view xHiDHiDHiy_ee1 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_ee, v1 * v_size + v2);
+  gsl_vector_const_view xHiDHiDHiy_ge1 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_ge, v1 * v_size + v2);
+
+  gsl_vector_const_view xHiDHiDHiy_gg2 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_gg, v2 * v_size + v1);
+  gsl_vector_const_view xHiDHiDHiy_ee2 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_ee, v2 * v_size + v1);
+  gsl_vector_const_view xHiDHiDHiy_ge2 =
+      gsl_matrix_const_column(xHiDHiDHiy_all_ge, v2 * v_size + v1);
+
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
+  yPDPDPy_gg -= d;
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
+  yPDPDPy_ee -= d;
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
+  yPDPDPy_ge -= d;
+
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
+  yPDPDPy_gg -= d;
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
+  yPDPDPy_ee -= d;
+  gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
+  yPDPDPy_ge -= d;
+
+  // Fourth part: - (yHiDHix)Qi(xHiDHiy).
+  gsl_vector_const_view xHiDHiy_g1 = gsl_matrix_const_column(xHiDHiy_all_g, v1);
+  gsl_vector_const_view xHiDHiy_e1 = gsl_matrix_const_column(xHiDHiy_all_e, v1);
+  gsl_vector_const_view QixHiDHiy_g2 =
+      gsl_matrix_const_column(QixHiDHiy_all_g, v2);
+  gsl_vector_const_view QixHiDHiy_e2 =
+      gsl_matrix_const_column(QixHiDHiy_all_e, v2);
+
+  gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
+  yPDPDPy_gg -= d;
+  gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
+  yPDPDPy_ee -= d;
+  gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
+  yPDPDPy_ge -= d;
+
+  // Fifth and sixth parts:
+  //   + (yHix)Qi(xHiDHix)Qi(xHiDHiy) +
+  //   (yHiDHix)Qi(xHiDHix)Qi(xHiy)
+  gsl_vector_const_view QixHiDHiy_g1 =
+      gsl_matrix_const_column(QixHiDHiy_all_g, v1);
+  gsl_vector_const_view QixHiDHiy_e1 =
+      gsl_matrix_const_column(QixHiDHiy_all_e, v1);
+
+  gsl_vector_const_view xHiDHixQixHiy_g1 =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_g, v1);
+  gsl_vector_const_view xHiDHixQixHiy_e1 =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_e, v1);
+  gsl_vector_const_view xHiDHixQixHiy_g2 =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_g, v2);
+  gsl_vector_const_view xHiDHixQixHiy_e2 =
+      gsl_matrix_const_column(xHiDHixQixHiy_all_e, v2);
+
+  gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
+  yPDPDPy_gg += d;
+  gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d);
+  yPDPDPy_gg += d;
+
+  gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
+  yPDPDPy_ee += d;
+  gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d);
+  yPDPDPy_ee += d;
+
+  gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
+  yPDPDPy_ge += d;
+  gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d);
+  yPDPDPy_ge += d;
+
+  // Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
+  gsl_matrix_const_view xHiDHiDHix_gg = gsl_matrix_const_submatrix(
+      xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+  gsl_matrix_const_view xHiDHiDHix_ee = gsl_matrix_const_submatrix(
+      xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+  gsl_matrix_const_view xHiDHiDHix_ge = gsl_matrix_const_submatrix(
+      xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix, QixHiy, 0.0,
+                 xHiDHiDHixQixHiy);
+  gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+  yPDPDPy_gg += d;
+  gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix, QixHiy, 0.0,
+                 xHiDHiDHixQixHiy);
+  gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+  yPDPDPy_ee += d;
+  gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix, QixHiy, 0.0,
+                 xHiDHiDHixQixHiy);
+  gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+  yPDPDPy_ge += d;
+
+  // Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy).
+  gsl_vector_const_view QixHiDHixQixHiy_g1 =
+      gsl_matrix_const_column(QixHiDHixQixHiy_all_g, v1);
+  gsl_vector_const_view QixHiDHixQixHiy_e1 =
+      gsl_matrix_const_column(QixHiDHixQixHiy_all_e, v1);
+
+  gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_g2.vector, &d);
+  yPDPDPy_gg -= d;
+  gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector, &xHiDHixQixHiy_e2.vector, &d);
+  yPDPDPy_ee -= d;
+  gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_e2.vector, &d);
+  yPDPDPy_ge -= d;
+
+  // Free memory.
+  gsl_vector_free(xHiDHiDHixQixHiy);
+
+  return;
 }
 
 // Calculate Edgeworth correctation factors for small samples notation
 // and method follows Thomas J. Rothenberg, Econometirca 1984; 52 (4)
 // M=xHiDHix
-void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi,
-	      const gsl_matrix *QixHiDHix_all_g,
-	      const gsl_matrix *QixHiDHix_all_e,
-	      const gsl_matrix *xHiDHiDHix_all_gg,
-	      const gsl_matrix *xHiDHiDHix_all_ee,
-	      const gsl_matrix *xHiDHiDHix_all_ge,
-	      const size_t d_size, double &crt_a,
-	      double &crt_b, double &crt_c) {
-	crt_a=0.0; crt_b=0.0; crt_c=0.0;
-
-	size_t dc_size=Qi->size1, v_size=Hessian_inv->size1/2;
-	size_t c_size=dc_size/d_size;
-	double h_gg, h_ge, h_ee, d, B=0.0, C=0.0, D=0.0;
-	double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee;
-	double trCC_gg, trCC_ge, trCC_ee, trD_gg=0.0, trD_ge=0.0, trD_ee=0.0;
-
-	gsl_matrix *QiMQi_g1=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQi_e1=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQi_g2=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQi_e2=gsl_matrix_alloc (dc_size, dc_size);
-
-	gsl_matrix *QiMQisQisi_g1=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *QiMQisQisi_e1=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *QiMQisQisi_g2=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *QiMQisQisi_e2=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_matrix *QiMQiMQi_gg=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQiMQi_ge=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMQiMQi_ee=gsl_matrix_alloc (dc_size, dc_size);
-
-	gsl_matrix *QiMMQi_gg=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMMQi_ge=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *QiMMQi_ee=gsl_matrix_alloc (dc_size, dc_size);
-
-	gsl_matrix *Qi_si=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_matrix *M_dd=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *M_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-
-	// Invert Qi_sub to Qi_si.
-	gsl_matrix *Qi_sub=gsl_matrix_alloc (d_size, d_size);
-
-	gsl_matrix_const_view Qi_s =
-	  gsl_matrix_const_submatrix (Qi, (c_size-1)*d_size,
-				      (c_size-1)*d_size, d_size, d_size);
-
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (d_size);
-
-	gsl_matrix_memcpy (Qi_sub, &Qi_s.matrix);
-	LUDecomp (Qi_sub, pmt, &sig);
-	LUInvert (Qi_sub, pmt, Qi_si);
-
-	gsl_permutation_free(pmt);
-	gsl_matrix_free(Qi_sub);
-
-	// Calculate correction factors.
-	for (size_t v1=0; v1<v_size; v1++) {
-	  
-	  // Calculate Qi(xHiDHix)Qi, and subpart of it.
-	  gsl_matrix_const_view QiM_g1 =
-	    gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v1*dc_size,
-					dc_size, dc_size);
-	  gsl_matrix_const_view QiM_e1 =
-	    gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v1*dc_size,
-					dc_size, dc_size);
-	  
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
-			 Qi, 0.0, QiMQi_g1);
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
-			 Qi, 0.0, QiMQi_e1);
-	  
-	  gsl_matrix_view QiMQi_g1_s =
-	    gsl_matrix_submatrix (QiMQi_g1, (c_size-1)*d_size,
-				  (c_size-1)*d_size, d_size, d_size);
-	  gsl_matrix_view QiMQi_e1_s =
-	    gsl_matrix_submatrix (QiMQi_e1, (c_size-1)*d_size,
-				  (c_size-1)*d_size, d_size, d_size);
-
-	  // Calculate trCg1 and trCe1.
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix,
-			 Qi_si, 0.0, QiMQisQisi_g1);
-	  trCg1=0.0;
-	  for (size_t k=0; k<d_size; k++) {
-	    trCg1-=gsl_matrix_get (QiMQisQisi_g1, k, k);
-	  }
-
-	  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix,
-			 Qi_si, 0.0, QiMQisQisi_e1);
-	  trCe1=0.0;
-	  for (size_t k=0; k<d_size; k++) {
-	    trCe1-=gsl_matrix_get (QiMQisQisi_e1, k, k);
-	  }
-
-	  for (size_t v2=0; v2<v_size; v2++) {
-	    if (v2<v1) {continue;}
-	    
-	    // Calculate Qi(xHiDHix)Qi, and subpart of it.
-	    gsl_matrix_const_view QiM_g2 =
-	      gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v2*dc_size,
-					  dc_size, dc_size);
-	    gsl_matrix_const_view QiM_e2 =
-	      gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v2*dc_size,
-					  dc_size, dc_size);
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix,
-			   Qi, 0.0, QiMQi_g2);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix,
-			   Qi, 0.0, QiMQi_e2);
-	    
-	    gsl_matrix_view QiMQi_g2_s =
-	      gsl_matrix_submatrix (QiMQi_g2, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMQi_e2_s =
-	      gsl_matrix_submatrix (QiMQi_e2, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    
-	    // Calculate trCg2 and trCe2.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQi_g2_s.matrix, Qi_si, 0.0, QiMQisQisi_g2);
-	    trCg2=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCg2-=gsl_matrix_get (QiMQisQisi_g2, k, k);
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQi_e2_s.matrix, Qi_si, 0.0, QiMQisQisi_e2);
-	    trCe2=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCe2-=gsl_matrix_get (QiMQisQisi_e2, k, k);
-	    }
-	    
-	    // Calculate trCC_gg, trCC_ge, trCC_ee.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   QiMQisQisi_g1, QiMQisQisi_g2, 0.0, M_dd);
-	    trCC_gg=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCC_gg+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
-			   QiMQisQisi_e2, 0.0, M_dd);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
-			   QiMQisQisi_g2, 1.0, M_dd);
-	    trCC_ge=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCC_ge+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
-			   QiMQisQisi_e2, 0.0, M_dd);
-	    trCC_ee=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      trCC_ee+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    
-	    // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
-			   QiMQi_g2, 0.0, QiMQiMQi_gg);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
-			   QiMQi_e2, 0.0, QiMQiMQi_ge);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
-			   QiMQi_g2, 1.0, QiMQiMQi_ge);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
-			   QiMQi_e2, 0.0, QiMQiMQi_ee);
-	    
-	    gsl_matrix_view QiMQiMQi_gg_s =
-	      gsl_matrix_submatrix (QiMQiMQi_gg, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMQiMQi_ge_s =
-	      gsl_matrix_submatrix (QiMQiMQi_ge, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMQiMQi_ee_s =
-	      gsl_matrix_submatrix (QiMQiMQi_ee, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    
-	    // and part of trB_gg, trB_ge, trB_ee.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQiMQi_gg_s.matrix, Qi_si, 0.0, M_dd);
-	    trB_gg=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      d=gsl_matrix_get (M_dd, k, k);
-	      trB_gg-=d;
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQiMQi_ge_s.matrix, Qi_si, 0.0, M_dd);
-	    trB_ge=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      d=gsl_matrix_get (M_dd, k, k);
-	      trB_ge-=d;
-	    }
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMQiMQi_ee_s.matrix, Qi_si, 0.0, M_dd);
-	    trB_ee=0.0;
-	    for (size_t k=0; k<d_size; k++) {
-	      d=gsl_matrix_get (M_dd, k, k);
-	      trB_ee-=d;
-	    }
-	    
-	    // Calculate Qi(xHiDHiDHix)Qi, and subpart of it.
-	    gsl_matrix_const_view MM_gg =
-	      gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0,
-					  (v1*v_size+v2)*dc_size, dc_size,
-					  dc_size);
-	    gsl_matrix_const_view MM_ge =
-	      gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0,
-					  (v1*v_size+v2)*dc_size, dc_size,
-					  dc_size);
-	    gsl_matrix_const_view MM_ee =
-	      gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0,
-					  (v1*v_size+v2)*dc_size, dc_size,
-					  dc_size);
-	    
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
-			   &MM_gg.matrix, 0.0, M_dcdc);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
-			   QiMMQi_gg);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
-			   &MM_ge.matrix, 0.0, M_dcdc);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc,
-			   Qi, 0.0, QiMMQi_ge);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
-			   &MM_ee.matrix, 0.0, M_dcdc);
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi,
-			   0.0, QiMMQi_ee);
-	    
-	    gsl_matrix_view QiMMQi_gg_s =
-	      gsl_matrix_submatrix (QiMMQi_gg, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMMQi_ge_s =
-	      gsl_matrix_submatrix (QiMMQi_ge, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    gsl_matrix_view QiMMQi_ee_s =
-	      gsl_matrix_submatrix (QiMMQi_ee, (c_size-1)*d_size,
-				    (c_size-1)*d_size, d_size, d_size);
-	    
-	    // Calculate the other part of trB_gg, trB_ge, trB_ee.
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMMQi_gg_s.matrix, Qi_si, 0.0, M_dd);
-	    for (size_t k=0; k<d_size; k++) {
-	      trB_gg+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMMQi_ge_s.matrix, Qi_si, 0.0, M_dd);
-	    for (size_t k=0; k<d_size; k++) {
-	      trB_ge+=2.0*gsl_matrix_get (M_dd, k, k);
-	    }
-	    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
-			   &QiMMQi_ee_s.matrix, Qi_si, 0.0, M_dd);
-	    for (size_t k=0; k<d_size; k++) {
-	      trB_ee+=gsl_matrix_get (M_dd, k, k);
-	    }
-	    
-	    // Calculate trD_gg, trD_ge, trD_ee.
-	    trD_gg=2.0*trB_gg;
-	    trD_ge=2.0*trB_ge;
-	    trD_ee=2.0*trB_ee;
-	    
-	    //calculate B, C and D
-	    h_gg=-1.0*gsl_matrix_get (Hessian_inv, v1, v2);
-	    h_ge=-1.0*gsl_matrix_get (Hessian_inv, v1, v2+v_size);
-	    h_ee=-1.0*gsl_matrix_get (Hessian_inv, v1+v_size, v2+v_size);
-	    
-	    B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee;
-	    C+=h_gg*(trCC_gg+0.5*trCg1*trCg2) +
-	      h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2) +
-	      h_ee*(trCC_ee+0.5*trCe1*trCe2);
-	    D+=h_gg*(trCC_gg+0.5*trD_gg) +
-	      h_ge*(trCC_ge+0.5*trD_ge) + h_ee*(trCC_ee+0.5*trD_ee);
-	    
-	    if (v1!=v2) {
-	      B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee;
-	      C+=h_gg*(trCC_gg+0.5*trCg1*trCg2) +
-		h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2) +
-		h_ee*(trCC_ee+0.5*trCe1*trCe2);
-	      D+=h_gg*(trCC_gg+0.5*trD_gg) +
-		h_ge*(trCC_ge+0.5*trD_ge) +
-		h_ee*(trCC_ee+0.5*trD_ee);
-	    }
-	  }
-	}
-
-	// Calculate a, b, c from B C D.
-	crt_a=2.0*D-C;
-	crt_b=2.0*B;
-	crt_c=C;
-
-	// Free matrix memory.
-	gsl_matrix_free(QiMQi_g1);
-	gsl_matrix_free(QiMQi_e1);
-	gsl_matrix_free(QiMQi_g2);
-	gsl_matrix_free(QiMQi_e2);
-
-	gsl_matrix_free(QiMQisQisi_g1);
-	gsl_matrix_free(QiMQisQisi_e1);
-	gsl_matrix_free(QiMQisQisi_g2);
-	gsl_matrix_free(QiMQisQisi_e2);
-
-	gsl_matrix_free(QiMQiMQi_gg);
-	gsl_matrix_free(QiMQiMQi_ge);
-	gsl_matrix_free(QiMQiMQi_ee);
-
-	gsl_matrix_free(QiMMQi_gg);
-	gsl_matrix_free(QiMMQi_ge);
-	gsl_matrix_free(QiMMQi_ee);
-
-	gsl_matrix_free(Qi_si);
-
-	gsl_matrix_free(M_dd);
-	gsl_matrix_free(M_dcdc);
-
-	return;
+void CalcCRT(const gsl_matrix *Hessian_inv, const gsl_matrix *Qi,
+             const gsl_matrix *QixHiDHix_all_g,
+             const gsl_matrix *QixHiDHix_all_e,
+             const gsl_matrix *xHiDHiDHix_all_gg,
+             const gsl_matrix *xHiDHiDHix_all_ee,
+             const gsl_matrix *xHiDHiDHix_all_ge, const size_t d_size,
+             double &crt_a, double &crt_b, double &crt_c) {
+  crt_a = 0.0;
+  crt_b = 0.0;
+  crt_c = 0.0;
+
+  size_t dc_size = Qi->size1, v_size = Hessian_inv->size1 / 2;
+  size_t c_size = dc_size / d_size;
+  double h_gg, h_ge, h_ee, d, B = 0.0, C = 0.0, D = 0.0;
+  double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee;
+  double trCC_gg, trCC_ge, trCC_ee, trD_gg = 0.0, trD_ge = 0.0, trD_ee = 0.0;
+
+  gsl_matrix *QiMQi_g1 = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQi_e1 = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQi_g2 = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQi_e2 = gsl_matrix_alloc(dc_size, dc_size);
+
+  gsl_matrix *QiMQisQisi_g1 = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *QiMQisQisi_e1 = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *QiMQisQisi_g2 = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *QiMQisQisi_e2 = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_matrix *QiMQiMQi_gg = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQiMQi_ge = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMQiMQi_ee = gsl_matrix_alloc(dc_size, dc_size);
+
+  gsl_matrix *QiMMQi_gg = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMMQi_ge = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *QiMMQi_ee = gsl_matrix_alloc(dc_size, dc_size);
+
+  gsl_matrix *Qi_si = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_matrix *M_dd = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *M_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+
+  // Invert Qi_sub to Qi_si.
+  gsl_matrix *Qi_sub = gsl_matrix_alloc(d_size, d_size);
+
+  gsl_matrix_const_view Qi_s = gsl_matrix_const_submatrix(
+      Qi, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(d_size);
+
+  gsl_matrix_memcpy(Qi_sub, &Qi_s.matrix);
+  LUDecomp(Qi_sub, pmt, &sig);
+  LUInvert(Qi_sub, pmt, Qi_si);
+
+  gsl_permutation_free(pmt);
+  gsl_matrix_free(Qi_sub);
+
+  // Calculate correction factors.
+  for (size_t v1 = 0; v1 < v_size; v1++) {
+
+    // Calculate Qi(xHiDHix)Qi, and subpart of it.
+    gsl_matrix_const_view QiM_g1 = gsl_matrix_const_submatrix(
+        QixHiDHix_all_g, 0, v1 * dc_size, dc_size, dc_size);
+    gsl_matrix_const_view QiM_e1 = gsl_matrix_const_submatrix(
+        QixHiDHix_all_e, 0, v1 * dc_size, dc_size, dc_size);
+
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, Qi, 0.0,
+                   QiMQi_g1);
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, Qi, 0.0,
+                   QiMQi_e1);
+
+    gsl_matrix_view QiMQi_g1_s = gsl_matrix_submatrix(
+        QiMQi_g1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+    gsl_matrix_view QiMQi_e1_s = gsl_matrix_submatrix(
+        QiMQi_e1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+
+    // Calculate trCg1 and trCe1.
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix, Qi_si,
+                   0.0, QiMQisQisi_g1);
+    trCg1 = 0.0;
+    for (size_t k = 0; k < d_size; k++) {
+      trCg1 -= gsl_matrix_get(QiMQisQisi_g1, k, k);
+    }
+
+    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix, Qi_si,
+                   0.0, QiMQisQisi_e1);
+    trCe1 = 0.0;
+    for (size_t k = 0; k < d_size; k++) {
+      trCe1 -= gsl_matrix_get(QiMQisQisi_e1, k, k);
+    }
+
+    for (size_t v2 = 0; v2 < v_size; v2++) {
+      if (v2 < v1) {
+        continue;
+      }
+
+      // Calculate Qi(xHiDHix)Qi, and subpart of it.
+      gsl_matrix_const_view QiM_g2 = gsl_matrix_const_submatrix(
+          QixHiDHix_all_g, 0, v2 * dc_size, dc_size, dc_size);
+      gsl_matrix_const_view QiM_e2 = gsl_matrix_const_submatrix(
+          QixHiDHix_all_e, 0, v2 * dc_size, dc_size, dc_size);
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix, Qi, 0.0,
+                     QiMQi_g2);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix, Qi, 0.0,
+                     QiMQi_e2);
+
+      gsl_matrix_view QiMQi_g2_s =
+          gsl_matrix_submatrix(QiMQi_g2, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMQi_e2_s =
+          gsl_matrix_submatrix(QiMQi_e2, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+
+      // Calculate trCg2 and trCe2.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g2_s.matrix, Qi_si,
+                     0.0, QiMQisQisi_g2);
+      trCg2 = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCg2 -= gsl_matrix_get(QiMQisQisi_g2, k, k);
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e2_s.matrix, Qi_si,
+                     0.0, QiMQisQisi_e2);
+      trCe2 = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCe2 -= gsl_matrix_get(QiMQisQisi_e2, k, k);
+      }
+
+      // Calculate trCC_gg, trCC_ge, trCC_ee.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
+                     QiMQisQisi_g2, 0.0, M_dd);
+      trCC_gg = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCC_gg += gsl_matrix_get(M_dd, k, k);
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
+                     QiMQisQisi_e2, 0.0, M_dd);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
+                     QiMQisQisi_g2, 1.0, M_dd);
+      trCC_ge = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCC_ge += gsl_matrix_get(M_dd, k, k);
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
+                     QiMQisQisi_e2, 0.0, M_dd);
+      trCC_ee = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        trCC_ee += gsl_matrix_get(M_dd, k, k);
+      }
+
+      // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_g2,
+                     0.0, QiMQiMQi_gg);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_e2,
+                     0.0, QiMQiMQi_ge);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_g2,
+                     1.0, QiMQiMQi_ge);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_e2,
+                     0.0, QiMQiMQi_ee);
+
+      gsl_matrix_view QiMQiMQi_gg_s =
+          gsl_matrix_submatrix(QiMQiMQi_gg, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMQiMQi_ge_s =
+          gsl_matrix_submatrix(QiMQiMQi_ge, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMQiMQi_ee_s =
+          gsl_matrix_submatrix(QiMQiMQi_ee, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+
+      // and part of trB_gg, trB_ge, trB_ee.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_gg_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      trB_gg = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        d = gsl_matrix_get(M_dd, k, k);
+        trB_gg -= d;
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ge_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      trB_ge = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        d = gsl_matrix_get(M_dd, k, k);
+        trB_ge -= d;
+      }
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ee_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      trB_ee = 0.0;
+      for (size_t k = 0; k < d_size; k++) {
+        d = gsl_matrix_get(M_dd, k, k);
+        trB_ee -= d;
+      }
+
+      // Calculate Qi(xHiDHiDHix)Qi, and subpart of it.
+      gsl_matrix_const_view MM_gg = gsl_matrix_const_submatrix(
+          xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+      gsl_matrix_const_view MM_ge = gsl_matrix_const_submatrix(
+          xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+      gsl_matrix_const_view MM_ee = gsl_matrix_const_submatrix(
+          xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_gg.matrix, 0.0,
+                     M_dcdc);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+                     QiMMQi_gg);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ge.matrix, 0.0,
+                     M_dcdc);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+                     QiMMQi_ge);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ee.matrix, 0.0,
+                     M_dcdc);
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+                     QiMMQi_ee);
+
+      gsl_matrix_view QiMMQi_gg_s =
+          gsl_matrix_submatrix(QiMMQi_gg, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMMQi_ge_s =
+          gsl_matrix_submatrix(QiMMQi_ge, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+      gsl_matrix_view QiMMQi_ee_s =
+          gsl_matrix_submatrix(QiMMQi_ee, (c_size - 1) * d_size,
+                               (c_size - 1) * d_size, d_size, d_size);
+
+      // Calculate the other part of trB_gg, trB_ge, trB_ee.
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_gg_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      for (size_t k = 0; k < d_size; k++) {
+        trB_gg += gsl_matrix_get(M_dd, k, k);
+      }
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ge_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      for (size_t k = 0; k < d_size; k++) {
+        trB_ge += 2.0 * gsl_matrix_get(M_dd, k, k);
+      }
+      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ee_s.matrix,
+                     Qi_si, 0.0, M_dd);
+      for (size_t k = 0; k < d_size; k++) {
+        trB_ee += gsl_matrix_get(M_dd, k, k);
+      }
+
+      // Calculate trD_gg, trD_ge, trD_ee.
+      trD_gg = 2.0 * trB_gg;
+      trD_ge = 2.0 * trB_ge;
+      trD_ee = 2.0 * trB_ee;
+
+      // calculate B, C and D
+      h_gg = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2);
+      h_ge = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2 + v_size);
+      h_ee = -1.0 * gsl_matrix_get(Hessian_inv, v1 + v_size, v2 + v_size);
+
+      B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee;
+      C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) +
+           h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) +
+           h_ee * (trCC_ee + 0.5 * trCe1 * trCe2);
+      D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) +
+           h_ee * (trCC_ee + 0.5 * trD_ee);
+
+      if (v1 != v2) {
+        B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee;
+        C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) +
+             h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) +
+             h_ee * (trCC_ee + 0.5 * trCe1 * trCe2);
+        D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) +
+             h_ee * (trCC_ee + 0.5 * trD_ee);
+      }
+    }
+  }
+
+  // Calculate a, b, c from B C D.
+  crt_a = 2.0 * D - C;
+  crt_b = 2.0 * B;
+  crt_c = C;
+
+  // Free matrix memory.
+  gsl_matrix_free(QiMQi_g1);
+  gsl_matrix_free(QiMQi_e1);
+  gsl_matrix_free(QiMQi_g2);
+  gsl_matrix_free(QiMQi_e2);
+
+  gsl_matrix_free(QiMQisQisi_g1);
+  gsl_matrix_free(QiMQisQisi_e1);
+  gsl_matrix_free(QiMQisQisi_g2);
+  gsl_matrix_free(QiMQisQisi_e2);
+
+  gsl_matrix_free(QiMQiMQi_gg);
+  gsl_matrix_free(QiMQiMQi_ge);
+  gsl_matrix_free(QiMQiMQi_ee);
+
+  gsl_matrix_free(QiMMQi_gg);
+  gsl_matrix_free(QiMMQi_ge);
+  gsl_matrix_free(QiMMQi_ee);
+
+  gsl_matrix_free(Qi_si);
+
+  gsl_matrix_free(M_dd);
+  gsl_matrix_free(M_dcdc);
+
+  return;
 }
 
 // Calculate first-order and second-order derivatives.
-void CalcDev (const char func_name, const gsl_vector *eval,
-	      const gsl_matrix *Qi, const gsl_matrix *Hi,
-	      const gsl_matrix *xHi, const gsl_matrix *Hiy,
-	      const gsl_vector *QixHiy, gsl_vector *gradient,
-	      gsl_matrix *Hessian_inv, double &crt_a, double &crt_b,
-	      double &crt_c) {
-	if (func_name!='R' && func_name!='L' && func_name!='r' &&
-	    func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for " <<
-	    "log-restricted likelihood, 'L' for log-likelihood."<<endl;
-	  return;
-	}
-
-	size_t dc_size=Qi->size1, d_size=Hi->size1;
-	size_t c_size=dc_size/d_size;
-	size_t v_size=d_size*(d_size+1)/2;
-	size_t v1, v2;
-	double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge;
-
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_matrix *xHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *xHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *xHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size);
-	gsl_matrix *xHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
-	gsl_matrix *xHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *xHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-
-	gsl_matrix *QixHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *QixHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *QixHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size);
-	gsl_matrix *QixHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
-	gsl_matrix *QixHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
-	gsl_matrix *QixHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-
-	gsl_matrix *xHiDHiDHiy_all_gg =
-	  gsl_matrix_alloc (dc_size, v_size*v_size);
-	gsl_matrix *xHiDHiDHiy_all_ee =
-	  gsl_matrix_alloc (dc_size, v_size*v_size);
-	gsl_matrix *xHiDHiDHiy_all_ge =
-	  gsl_matrix_alloc (dc_size, v_size*v_size);
-	gsl_matrix *xHiDHiDHix_all_gg =
-	  gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
-	gsl_matrix *xHiDHiDHix_all_ee =
-	  gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
-	gsl_matrix *xHiDHiDHix_all_ge =
-	  gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
-
-	// Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all.
-	Calc_xHiDHiy_all (eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
-	Calc_xHiDHix_all (eval, xHi, xHiDHix_all_g, xHiDHix_all_e);
-	Calc_xHiDHixQixHiy_all (xHiDHix_all_g, xHiDHix_all_e, QixHiy,
-				xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e);
-
-	Calc_xHiDHiDHiy_all (v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg,
-			     xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge);
-	Calc_xHiDHiDHix_all (v_size, eval, Hi, xHi, xHiDHiDHix_all_gg,
-			     xHiDHiDHix_all_ee, xHiDHiDHix_all_ge);
-
-	// Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all.
-	Calc_QiVec_all (Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g,
-			QixHiDHiy_all_e);
-	Calc_QiVec_all (Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
-			QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e);
-	Calc_QiMat_all (Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g,
-			QixHiDHix_all_e);
-
-	double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee;
-	double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge;
-	double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge;
-
-	// Calculate gradient and Hessian for Vg.
-	for (size_t i1=0; i1<d_size; i1++) {
-	  for (size_t j1=0; j1<d_size; j1++) {
-	    if (j1<i1) {continue;}
-	    v1=GetIndex (i1, j1, d_size);
-	    
-	    Calc_yPDPy (eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
-			xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1,
-			yPDPy_g, yPDPy_e);
-	    
-	    if (func_name=='R' || func_name=='r') {
-	      Calc_tracePD (eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e,
-			    i1, j1, tPD_g, tPD_e);
-
-	      dev1_g=-0.5*tPD_g+0.5*yPDPy_g;
-	      dev1_e=-0.5*tPD_e+0.5*yPDPy_e;
-	    } else {
-	      Calc_traceHiD (eval, Hi, i1, j1, tHiD_g, tHiD_e);
-	      
-	      dev1_g=-0.5*tHiD_g+0.5*yPDPy_g;
-	      dev1_e=-0.5*tHiD_e+0.5*yPDPy_e;
-	    }
-	    
-	    gsl_vector_set (gradient, v1, dev1_g);
-	    gsl_vector_set (gradient, v1+v_size, dev1_e);
-	    
-	    for (size_t i2=0; i2<d_size; i2++) {
-	      for (size_t j2=0; j2<d_size; j2++) {
-		if (j2<i2) {continue;}
-		v2=GetIndex (i2, j2, d_size);
-		
-		if (v2<v1) {continue;}
-
-		Calc_yPDPDPy (eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g,
-			      xHiDHiy_all_e, QixHiDHiy_all_g, QixHiDHiy_all_e,
-			      xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
-			      QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e,
-			      xHiDHiDHiy_all_gg, xHiDHiDHiy_all_ee,
-			      xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg,
-			      xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1,
-			      i2, j2, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
-		
-		// AI for REML.
-		if (func_name=='R' || func_name=='r') {
-		  Calc_tracePDPD (eval, Qi, Hi, xHi, QixHiDHix_all_g,
-				  QixHiDHix_all_e, xHiDHiDHix_all_gg,
-				  xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1,
-				  i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
-		  
-		  dev2_gg=0.5*tPDPD_gg-yPDPDPy_gg;
-		  dev2_ee=0.5*tPDPD_ee-yPDPDPy_ee;
-		  dev2_ge=0.5*tPDPD_ge-yPDPDPy_ge;
-		} else {
-		  Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tHiDHiD_gg,
-				    tHiDHiD_ee, tHiDHiD_ge);
-		  
-		  dev2_gg=0.5*tHiDHiD_gg-yPDPDPy_gg;
-		  dev2_ee=0.5*tHiDHiD_ee-yPDPDPy_ee;
-		  dev2_ge=0.5*tHiDHiD_ge-yPDPDPy_ge;
-		}
-		
-		// Set up Hessian.
-		gsl_matrix_set (Hessian, v1, v2, dev2_gg);
-		gsl_matrix_set (Hessian, v1+v_size, v2+v_size, dev2_ee);
-		gsl_matrix_set (Hessian, v1, v2+v_size, dev2_ge);
-		gsl_matrix_set (Hessian, v2+v_size, v1, dev2_ge);
-		
-		if (v1!=v2) {
-		  gsl_matrix_set (Hessian, v2, v1, dev2_gg);
-		  gsl_matrix_set (Hessian, v2+v_size, v1+v_size, dev2_ee);
-		  gsl_matrix_set (Hessian, v2, v1+v_size, dev2_ge);
-		  gsl_matrix_set (Hessian, v1+v_size, v2, dev2_ge);
-		}
-	      }
-	    }
-	  }
-	}
-	
-	// Invert Hessian.
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (v_size*2);
-
-	LUDecomp (Hessian, pmt, &sig);
-	LUInvert (Hessian, pmt, Hessian_inv);
-	
-	gsl_permutation_free(pmt);
-	gsl_matrix_free(Hessian);
-
-	// Calculate Edgeworth correction factors after inverting
-	// Hessian.
-	if (c_size>1) {
-	  CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e,
-		  xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge,
-		  d_size, crt_a, crt_b, crt_c);
-	} else {
-	  crt_a=0.0; crt_b=0.0; crt_c=0.0;
-	}
-	
-	gsl_matrix_free(xHiDHiy_all_g);
-	gsl_matrix_free(xHiDHiy_all_e);
-	gsl_matrix_free(xHiDHix_all_g);
-	gsl_matrix_free(xHiDHix_all_e);
-	gsl_matrix_free(xHiDHixQixHiy_all_g);
-	gsl_matrix_free(xHiDHixQixHiy_all_e);
-
-	gsl_matrix_free(QixHiDHiy_all_g);
-	gsl_matrix_free(QixHiDHiy_all_e);
-	gsl_matrix_free(QixHiDHix_all_g);
-	gsl_matrix_free(QixHiDHix_all_e);
-	gsl_matrix_free(QixHiDHixQixHiy_all_g);
-	gsl_matrix_free(QixHiDHixQixHiy_all_e);
-
-	gsl_matrix_free(xHiDHiDHiy_all_gg);
-	gsl_matrix_free(xHiDHiDHiy_all_ee);
-	gsl_matrix_free(xHiDHiDHiy_all_ge);
-	gsl_matrix_free(xHiDHiDHix_all_gg);
-	gsl_matrix_free(xHiDHiDHix_all_ee);
-	gsl_matrix_free(xHiDHiDHix_all_ge);
-
-	return;
+void CalcDev(const char func_name, const gsl_vector *eval, const gsl_matrix *Qi,
+             const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *Hiy,
+             const gsl_vector *QixHiy, gsl_vector *gradient,
+             gsl_matrix *Hessian_inv, double &crt_a, double &crt_b,
+             double &crt_c) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for "
+         << "log-restricted likelihood, 'L' for log-likelihood." << endl;
+    return;
+  }
+
+  size_t dc_size = Qi->size1, d_size = Hi->size1;
+  size_t c_size = dc_size / d_size;
+  size_t v_size = d_size * (d_size + 1) / 2;
+  size_t v1, v2;
+  double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge;
+
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_matrix *xHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *xHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *xHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size);
+  gsl_matrix *xHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size);
+  gsl_matrix *xHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *xHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+
+  gsl_matrix *QixHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *QixHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *QixHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size);
+  gsl_matrix *QixHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size);
+  gsl_matrix *QixHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+  gsl_matrix *QixHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+
+  gsl_matrix *xHiDHiDHiy_all_gg = gsl_matrix_alloc(dc_size, v_size * v_size);
+  gsl_matrix *xHiDHiDHiy_all_ee = gsl_matrix_alloc(dc_size, v_size * v_size);
+  gsl_matrix *xHiDHiDHiy_all_ge = gsl_matrix_alloc(dc_size, v_size * v_size);
+  gsl_matrix *xHiDHiDHix_all_gg =
+      gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+  gsl_matrix *xHiDHiDHix_all_ee =
+      gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+  gsl_matrix *xHiDHiDHix_all_ge =
+      gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+
+  // Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all.
+  Calc_xHiDHiy_all(eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
+  Calc_xHiDHix_all(eval, xHi, xHiDHix_all_g, xHiDHix_all_e);
+  Calc_xHiDHixQixHiy_all(xHiDHix_all_g, xHiDHix_all_e, QixHiy,
+                         xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e);
+
+  Calc_xHiDHiDHiy_all(v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg,
+                      xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge);
+  Calc_xHiDHiDHix_all(v_size, eval, Hi, xHi, xHiDHiDHix_all_gg,
+                      xHiDHiDHix_all_ee, xHiDHiDHix_all_ge);
+
+  // Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all.
+  Calc_QiVec_all(Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g,
+                 QixHiDHiy_all_e);
+  Calc_QiVec_all(Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
+                 QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e);
+  Calc_QiMat_all(Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g,
+                 QixHiDHix_all_e);
+
+  double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee;
+  double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge;
+  double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge;
+
+  // Calculate gradient and Hessian for Vg.
+  for (size_t i1 = 0; i1 < d_size; i1++) {
+    for (size_t j1 = 0; j1 < d_size; j1++) {
+      if (j1 < i1) {
+        continue;
+      }
+      v1 = GetIndex(i1, j1, d_size);
+
+      Calc_yPDPy(eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
+                 xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1, yPDPy_g,
+                 yPDPy_e);
+
+      if (func_name == 'R' || func_name == 'r') {
+        Calc_tracePD(eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e, i1, j1, tPD_g,
+                     tPD_e);
+
+        dev1_g = -0.5 * tPD_g + 0.5 * yPDPy_g;
+        dev1_e = -0.5 * tPD_e + 0.5 * yPDPy_e;
+      } else {
+        Calc_traceHiD(eval, Hi, i1, j1, tHiD_g, tHiD_e);
+
+        dev1_g = -0.5 * tHiD_g + 0.5 * yPDPy_g;
+        dev1_e = -0.5 * tHiD_e + 0.5 * yPDPy_e;
+      }
+
+      gsl_vector_set(gradient, v1, dev1_g);
+      gsl_vector_set(gradient, v1 + v_size, dev1_e);
+
+      for (size_t i2 = 0; i2 < d_size; i2++) {
+        for (size_t j2 = 0; j2 < d_size; j2++) {
+          if (j2 < i2) {
+            continue;
+          }
+          v2 = GetIndex(i2, j2, d_size);
+
+          if (v2 < v1) {
+            continue;
+          }
+
+          Calc_yPDPDPy(eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
+                       QixHiDHiy_all_g, QixHiDHiy_all_e, xHiDHixQixHiy_all_g,
+                       xHiDHixQixHiy_all_e, QixHiDHixQixHiy_all_g,
+                       QixHiDHixQixHiy_all_e, xHiDHiDHiy_all_gg,
+                       xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg,
+                       xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, i2, j2,
+                       yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
+
+          // AI for REML.
+          if (func_name == 'R' || func_name == 'r') {
+            Calc_tracePDPD(eval, Qi, Hi, xHi, QixHiDHix_all_g, QixHiDHix_all_e,
+                           xHiDHiDHix_all_gg, xHiDHiDHix_all_ee,
+                           xHiDHiDHix_all_ge, i1, j1, i2, j2, tPDPD_gg,
+                           tPDPD_ee, tPDPD_ge);
+
+            dev2_gg = 0.5 * tPDPD_gg - yPDPDPy_gg;
+            dev2_ee = 0.5 * tPDPD_ee - yPDPDPy_ee;
+            dev2_ge = 0.5 * tPDPD_ge - yPDPDPy_ge;
+          } else {
+            Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tHiDHiD_gg, tHiDHiD_ee,
+                             tHiDHiD_ge);
+
+            dev2_gg = 0.5 * tHiDHiD_gg - yPDPDPy_gg;
+            dev2_ee = 0.5 * tHiDHiD_ee - yPDPDPy_ee;
+            dev2_ge = 0.5 * tHiDHiD_ge - yPDPDPy_ge;
+          }
+
+          // Set up Hessian.
+          gsl_matrix_set(Hessian, v1, v2, dev2_gg);
+          gsl_matrix_set(Hessian, v1 + v_size, v2 + v_size, dev2_ee);
+          gsl_matrix_set(Hessian, v1, v2 + v_size, dev2_ge);
+          gsl_matrix_set(Hessian, v2 + v_size, v1, dev2_ge);
+
+          if (v1 != v2) {
+            gsl_matrix_set(Hessian, v2, v1, dev2_gg);
+            gsl_matrix_set(Hessian, v2 + v_size, v1 + v_size, dev2_ee);
+            gsl_matrix_set(Hessian, v2, v1 + v_size, dev2_ge);
+            gsl_matrix_set(Hessian, v1 + v_size, v2, dev2_ge);
+          }
+        }
+      }
+    }
+  }
+
+  // Invert Hessian.
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(v_size * 2);
+
+  LUDecomp(Hessian, pmt, &sig);
+  LUInvert(Hessian, pmt, Hessian_inv);
+
+  gsl_permutation_free(pmt);
+  gsl_matrix_free(Hessian);
+
+  // Calculate Edgeworth correction factors after inverting
+  // Hessian.
+  if (c_size > 1) {
+    CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e,
+            xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, d_size,
+            crt_a, crt_b, crt_c);
+  } else {
+    crt_a = 0.0;
+    crt_b = 0.0;
+    crt_c = 0.0;
+  }
+
+  gsl_matrix_free(xHiDHiy_all_g);
+  gsl_matrix_free(xHiDHiy_all_e);
+  gsl_matrix_free(xHiDHix_all_g);
+  gsl_matrix_free(xHiDHix_all_e);
+  gsl_matrix_free(xHiDHixQixHiy_all_g);
+  gsl_matrix_free(xHiDHixQixHiy_all_e);
+
+  gsl_matrix_free(QixHiDHiy_all_g);
+  gsl_matrix_free(QixHiDHiy_all_e);
+  gsl_matrix_free(QixHiDHix_all_g);
+  gsl_matrix_free(QixHiDHix_all_e);
+  gsl_matrix_free(QixHiDHixQixHiy_all_g);
+  gsl_matrix_free(QixHiDHixQixHiy_all_e);
+
+  gsl_matrix_free(xHiDHiDHiy_all_gg);
+  gsl_matrix_free(xHiDHiDHiy_all_ee);
+  gsl_matrix_free(xHiDHiDHiy_all_ge);
+  gsl_matrix_free(xHiDHiDHix_all_gg);
+  gsl_matrix_free(xHiDHiDHix_all_ee);
+  gsl_matrix_free(xHiDHiDHix_all_ge);
+
+  return;
 }
 
 // Update Vg, Ve.
-void UpdateVgVe (const gsl_matrix *Hessian_inv, const gsl_vector *gradient,
-		 const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) {
-	size_t v_size=gradient->size/2, d_size=V_g->size1;
-	size_t v;
+void UpdateVgVe(const gsl_matrix *Hessian_inv, const gsl_vector *gradient,
+                const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) {
+  size_t v_size = gradient->size / 2, d_size = V_g->size1;
+  size_t v;
 
-	gsl_vector *vec_v=gsl_vector_alloc (v_size*2);
+  gsl_vector *vec_v = gsl_vector_alloc(v_size * 2);
 
-	double d;
+  double d;
 
-	// Vectorize Vg and Ve.
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<d_size; j++) {
-			if (j<i) {continue;}
-			v=GetIndex(i, j, d_size);
+  // Vectorize Vg and Ve.
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < d_size; j++) {
+      if (j < i) {
+        continue;
+      }
+      v = GetIndex(i, j, d_size);
 
-			d=gsl_matrix_get (V_g, i, j);
-			gsl_vector_set (vec_v, v, d);
+      d = gsl_matrix_get(V_g, i, j);
+      gsl_vector_set(vec_v, v, d);
 
-			d=gsl_matrix_get (V_e, i, j);
-			gsl_vector_set (vec_v, v+v_size, d);
-		}
-	}
+      d = gsl_matrix_get(V_e, i, j);
+      gsl_vector_set(vec_v, v + v_size, d);
+    }
+  }
 
-	gsl_blas_dgemv (CblasNoTrans, -1.0*step_scale, Hessian_inv,
-			gradient, 1.0, vec_v);
+  gsl_blas_dgemv(CblasNoTrans, -1.0 * step_scale, Hessian_inv, gradient, 1.0,
+                 vec_v);
 
-	// Save Vg and Ve.
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<d_size; j++) {
-			if (j<i) {continue;}
-			v=GetIndex(i, j, d_size);
+  // Save Vg and Ve.
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j < d_size; j++) {
+      if (j < i) {
+        continue;
+      }
+      v = GetIndex(i, j, d_size);
 
-			d=gsl_vector_get (vec_v, v);
-			gsl_matrix_set (V_g, i, j, d);
-			gsl_matrix_set (V_g, j, i, d);
+      d = gsl_vector_get(vec_v, v);
+      gsl_matrix_set(V_g, i, j, d);
+      gsl_matrix_set(V_g, j, i, d);
 
-			d=gsl_vector_get (vec_v, v+v_size);
-			gsl_matrix_set (V_e, i, j, d);
-			gsl_matrix_set (V_e, j, i, d);
-		}
-	}
+      d = gsl_vector_get(vec_v, v + v_size);
+      gsl_matrix_set(V_e, i, j, d);
+      gsl_matrix_set(V_e, j, i, d);
+    }
+  }
 
-	gsl_vector_free(vec_v);
+  gsl_vector_free(vec_v);
 
-	return;
+  return;
 }
 
-double MphNR (const char func_name, const size_t max_iter,
-	      const double max_prec, const gsl_vector *eval,
-	      const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *Hi_all,
-	      gsl_matrix *xHi_all, gsl_matrix *Hiy_all, gsl_matrix *V_g,
-	      gsl_matrix *V_e, gsl_matrix *Hessian_inv, double &crt_a,
-	      double &crt_b, double &crt_c) {
-	if (func_name!='R' && func_name!='L' && func_name!='r' &&
-	    func_name!='l') {
-	  cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
-	    "likelihood, 'L' for log-likelihood."<<endl;
-	  return 0.0;
-	}
-	size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
-	size_t dc_size=d_size*c_size;
-	size_t v_size=d_size*(d_size+1)/2;
-
-	double logdet_H, logdet_Q, yPy, logl_const;
-	double logl_old=0.0, logl_new=0.0, step_scale;
-	int sig;
-	size_t step_iter, flag_pd;
-
-	gsl_matrix *Vg_save=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Ve_save=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_temp=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *U_temp=gsl_matrix_alloc (d_size, d_size);
-	gsl_vector *D_temp=gsl_vector_alloc (d_size);
-	gsl_vector *xHiy=gsl_vector_alloc (dc_size);
-	gsl_vector *QixHiy=gsl_vector_alloc (dc_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
-	gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size);
-
-	gsl_vector *gradient=gsl_vector_alloc (v_size*2);
-
-	// Calculate |XXt| and (XXt)^{-1}.
-	gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
-	for (size_t i=0; i<c_size; ++i) {
-	  for (size_t j=0; j<i; ++j) {
-	    gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i));
-	  }
-	}
-
-	gsl_permutation * pmt=gsl_permutation_alloc (c_size);
-	LUDecomp (XXt, pmt, &sig);
-	gsl_permutation_free (pmt);
-
-	// Calculate the constant for logl.
-	if (func_name=='R' || func_name=='r') {
-	  logl_const=-0.5*(double)(n_size-c_size) *
-	    (double)d_size*log(2.0*M_PI) +
-	    0.5*(double)d_size*LULndet (XXt);
-	} else {
-	  logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI);
-	}
-	
-	// Optimization iterations.
-	for (size_t t=0; t<max_iter; t++) {
-		gsl_matrix_memcpy (Vg_save, V_g);
-		gsl_matrix_memcpy (Ve_save, V_e);
-
-		step_scale=1.0; step_iter=0;
-		do {
-		  gsl_matrix_memcpy (V_g, Vg_save);
-		  gsl_matrix_memcpy (V_e, Ve_save);
-		  
-		  // Update Vg, Ve, and invert Hessian.
-		  if (t!=0) {
-		    UpdateVgVe (Hessian_inv, gradient, step_scale, V_g, V_e);
-		  }
-		  
-		  // Check if both Vg and Ve are positive definite.
-		  flag_pd=1;
-		  gsl_matrix_memcpy (V_temp, V_e);
-		  EigenDecomp(V_temp, U_temp, D_temp, 0);
-		  for (size_t i=0; i<d_size; i++) {
-		    if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;}
-		  }
-		  gsl_matrix_memcpy (V_temp, V_g);
-		  EigenDecomp(V_temp, U_temp, D_temp, 0);
-		  for (size_t i=0; i<d_size; i++) {
-		    if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;}
-		  }
-		  
-		  // If flag_pd==1, continue to calculate quantities
-		  // and logl.
-		  if (flag_pd==1) {
-		    CalcHiQi(eval,X,V_g,V_e,Hi_all,Qi,logdet_H,logdet_Q);
-		    Calc_Hiy_all (Y, Hi_all, Hiy_all);
-		    Calc_xHi_all (X, Hi_all, xHi_all);
-		    
-		    // Calculate QixHiy and yPy.
-		    Calc_xHiy (Y, xHi_all, xHiy);
-		    gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy);
-		    
-		    gsl_blas_ddot (QixHiy, xHiy, &yPy);
-		    yPy=Calc_yHiy (Y, Hiy_all)-yPy;
-		    
-		    // Calculate log likelihood/restricted likelihood value.
-		    if (func_name=='R' || func_name=='r') {
-		      logl_new=logl_const-0.5*logdet_H-0.5*logdet_Q-0.5*yPy;
-		    } else {
-		      logl_new=logl_const-0.5*logdet_H-0.5*yPy;
-		    }
-		  }
-		  
-		  step_scale/=2.0;
-		  step_iter++;
-		  
-		} while ( (flag_pd==0 || logl_new<logl_old ||
-			   logl_new-logl_old>10 ) && step_iter<10 && t!=0);
-
-		// Terminate if change is small.
-		if (t!=0) {
-			if (logl_new<logl_old || flag_pd==0) {
-				gsl_matrix_memcpy (V_g, Vg_save);
-				gsl_matrix_memcpy (V_e, Ve_save);
-				break;
-			}
-
-			if (logl_new-logl_old<max_prec) {
-				break;
-			}
-		}
-
-		logl_old=logl_new;
-
-		CalcDev (func_name, eval, Qi, Hi_all, xHi_all, Hiy_all,
-			 QixHiy, gradient, Hessian_inv, crt_a, crt_b, crt_c);
-	}
-
-	// Mutiply Hessian_inv with -1.0.
-	// Now Hessian_inv is the variance matrix.
-	gsl_matrix_scale (Hessian_inv, -1.0);
-
-	gsl_matrix_free(Vg_save);
-	gsl_matrix_free(Ve_save);
-	gsl_matrix_free(V_temp);
-	gsl_matrix_free(U_temp);
-	gsl_vector_free(D_temp);
-	gsl_vector_free(xHiy);
-	gsl_vector_free(QixHiy);
-
-	gsl_matrix_free(Qi);
-	gsl_matrix_free(XXt);
-
-	gsl_vector_free(gradient);
-
-	return logl_new;
+double MphNR(const char func_name, const size_t max_iter, const double max_prec,
+             const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y,
+             gsl_matrix *Hi_all, gsl_matrix *xHi_all, gsl_matrix *Hiy_all,
+             gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *Hessian_inv,
+             double &crt_a, double &crt_b, double &crt_c) {
+  if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+      func_name != 'l') {
+    cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+         << "likelihood, 'L' for log-likelihood." << endl;
+    return 0.0;
+  }
+  size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+  size_t dc_size = d_size * c_size;
+  size_t v_size = d_size * (d_size + 1) / 2;
+
+  double logdet_H, logdet_Q, yPy, logl_const;
+  double logl_old = 0.0, logl_new = 0.0, step_scale;
+  int sig;
+  size_t step_iter, flag_pd;
+
+  gsl_matrix *Vg_save = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Ve_save = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_temp = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *U_temp = gsl_matrix_alloc(d_size, d_size);
+  gsl_vector *D_temp = gsl_vector_alloc(d_size);
+  gsl_vector *xHiy = gsl_vector_alloc(dc_size);
+  gsl_vector *QixHiy = gsl_vector_alloc(dc_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+  gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size);
+
+  gsl_vector *gradient = gsl_vector_alloc(v_size * 2);
+
+  // Calculate |XXt| and (XXt)^{-1}.
+  gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
+  for (size_t i = 0; i < c_size; ++i) {
+    for (size_t j = 0; j < i; ++j) {
+      gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i));
+    }
+  }
+
+  gsl_permutation *pmt = gsl_permutation_alloc(c_size);
+  LUDecomp(XXt, pmt, &sig);
+  gsl_permutation_free(pmt);
+
+  // Calculate the constant for logl.
+  if (func_name == 'R' || func_name == 'r') {
+    logl_const =
+        -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) +
+        0.5 * (double)d_size * LULndet(XXt);
+  } else {
+    logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI);
+  }
+
+  // Optimization iterations.
+  for (size_t t = 0; t < max_iter; t++) {
+    gsl_matrix_memcpy(Vg_save, V_g);
+    gsl_matrix_memcpy(Ve_save, V_e);
+
+    step_scale = 1.0;
+    step_iter = 0;
+    do {
+      gsl_matrix_memcpy(V_g, Vg_save);
+      gsl_matrix_memcpy(V_e, Ve_save);
+
+      // Update Vg, Ve, and invert Hessian.
+      if (t != 0) {
+        UpdateVgVe(Hessian_inv, gradient, step_scale, V_g, V_e);
+      }
+
+      // Check if both Vg and Ve are positive definite.
+      flag_pd = 1;
+      gsl_matrix_memcpy(V_temp, V_e);
+      EigenDecomp(V_temp, U_temp, D_temp, 0);
+      for (size_t i = 0; i < d_size; i++) {
+        if (gsl_vector_get(D_temp, i) <= 0) {
+          flag_pd = 0;
+        }
+      }
+      gsl_matrix_memcpy(V_temp, V_g);
+      EigenDecomp(V_temp, U_temp, D_temp, 0);
+      for (size_t i = 0; i < d_size; i++) {
+        if (gsl_vector_get(D_temp, i) <= 0) {
+          flag_pd = 0;
+        }
+      }
+
+      // If flag_pd==1, continue to calculate quantities
+      // and logl.
+      if (flag_pd == 1) {
+        CalcHiQi(eval, X, V_g, V_e, Hi_all, Qi, logdet_H, logdet_Q);
+        Calc_Hiy_all(Y, Hi_all, Hiy_all);
+        Calc_xHi_all(X, Hi_all, xHi_all);
+
+        // Calculate QixHiy and yPy.
+        Calc_xHiy(Y, xHi_all, xHiy);
+        gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy);
+
+        gsl_blas_ddot(QixHiy, xHiy, &yPy);
+        yPy = Calc_yHiy(Y, Hiy_all) - yPy;
+
+        // Calculate log likelihood/restricted likelihood value.
+        if (func_name == 'R' || func_name == 'r') {
+          logl_new = logl_const - 0.5 * logdet_H - 0.5 * logdet_Q - 0.5 * yPy;
+        } else {
+          logl_new = logl_const - 0.5 * logdet_H - 0.5 * yPy;
+        }
+      }
+
+      step_scale /= 2.0;
+      step_iter++;
+
+    } while (
+        (flag_pd == 0 || logl_new < logl_old || logl_new - logl_old > 10) &&
+        step_iter < 10 && t != 0);
+
+    // Terminate if change is small.
+    if (t != 0) {
+      if (logl_new < logl_old || flag_pd == 0) {
+        gsl_matrix_memcpy(V_g, Vg_save);
+        gsl_matrix_memcpy(V_e, Ve_save);
+        break;
+      }
+
+      if (logl_new - logl_old < max_prec) {
+        break;
+      }
+    }
+
+    logl_old = logl_new;
+
+    CalcDev(func_name, eval, Qi, Hi_all, xHi_all, Hiy_all, QixHiy, gradient,
+            Hessian_inv, crt_a, crt_b, crt_c);
+  }
+
+  // Mutiply Hessian_inv with -1.0.
+  // Now Hessian_inv is the variance matrix.
+  gsl_matrix_scale(Hessian_inv, -1.0);
+
+  gsl_matrix_free(Vg_save);
+  gsl_matrix_free(Ve_save);
+  gsl_matrix_free(V_temp);
+  gsl_matrix_free(U_temp);
+  gsl_vector_free(D_temp);
+  gsl_vector_free(xHiy);
+  gsl_vector_free(QixHiy);
+
+  gsl_matrix_free(Qi);
+  gsl_matrix_free(XXt);
+
+  gsl_vector_free(gradient);
+
+  return logl_new;
 }
 
 // Initialize Vg, Ve and B.
 void MphInitial(const size_t em_iter, const double em_prec,
-		const size_t nr_iter, const double nr_prec,
-		const gsl_vector *eval, const gsl_matrix *X,
-		const gsl_matrix *Y, const double l_min, const double l_max,
-		const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e,
-		gsl_matrix *B) {
-  
-	gsl_matrix_set_zero (V_g);
-	gsl_matrix_set_zero (V_e);
-	gsl_matrix_set_zero (B);
-
-	size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
-	double a, b, c;
-	double lambda, logl, vg, ve;
-
-	// Initialize the diagonal elements of Vg and Ve using univariate
-	// LMM and REML estimates.
-	gsl_matrix *Xt=gsl_matrix_alloc (n_size, c_size);
-	gsl_vector *beta_temp=gsl_vector_alloc(c_size);
-	gsl_vector *se_beta_temp=gsl_vector_alloc(c_size);
-
-	gsl_matrix_transpose_memcpy (Xt, X);
-
-	for (size_t i=0; i<d_size; i++) {
-	  gsl_vector_const_view Y_row=gsl_matrix_const_row (Y, i);
-	  CalcLambda ('R', eval, Xt, &Y_row.vector, l_min, l_max,
-		      n_region, lambda, logl);
-	  CalcLmmVgVeBeta (eval, Xt, &Y_row.vector, lambda, vg, ve,
-			   beta_temp, se_beta_temp);
-	  
-	  gsl_matrix_set(V_g, i, i, vg);
-	  gsl_matrix_set(V_e, i, i, ve);
-	}
-	
-	gsl_matrix_free (Xt);
-	gsl_vector_free (beta_temp);
-	gsl_vector_free (se_beta_temp);
-
-	// If number of phenotypes is above four, then obtain the off
-	// diagonal elements with two trait models.
-	if (d_size>4) {
-	  
-	  // First obtain good initial values.
-	  // Large matrices for EM.
-	  gsl_matrix *U_hat=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *E_hat=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *OmegaU=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *OmegaE=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *UltVehiY=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *UltVehiBX=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *UltVehiU=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *UltVehiE=gsl_matrix_alloc (2, n_size);
-	  
-	  // Large matrices for NR. Each dxd block is H_k^{-1}.
-	  gsl_matrix *Hi_all=gsl_matrix_alloc (2, 2*n_size);
-
-	  // Each column is H_k^{-1}y_k.
-	  gsl_matrix *Hiy_all=gsl_matrix_alloc (2, n_size);
-
-	  // Each dcxdc block is x_k\otimes H_k^{-1}.
-	  gsl_matrix *xHi_all=gsl_matrix_alloc (2*c_size, 2*n_size);
-	  gsl_matrix *Hessian=gsl_matrix_alloc (6, 6);
-	  
-	  // 2 by n matrix of Y.
-	  gsl_matrix *Y_sub=gsl_matrix_alloc (2, n_size);
-	  gsl_matrix *Vg_sub=gsl_matrix_alloc (2, 2);
-	  gsl_matrix *Ve_sub=gsl_matrix_alloc (2, 2);
-	  gsl_matrix *B_sub=gsl_matrix_alloc (2, c_size);
-	  
-	  for (size_t i=0; i<d_size; i++) {
-	    gsl_vector_view Y_sub1=gsl_matrix_row (Y_sub, 0);
-	    gsl_vector_const_view Y_1=gsl_matrix_const_row (Y, i);
-	    gsl_vector_memcpy (&Y_sub1.vector, &Y_1.vector);
-	    
-	    for (size_t j=i+1; j<d_size; j++) {
-	      gsl_vector_view Y_sub2=gsl_matrix_row (Y_sub, 1);
-	      gsl_vector_const_view Y_2=gsl_matrix_const_row (Y, j);
-	      gsl_vector_memcpy (&Y_sub2.vector, &Y_2.vector);
-	      
-	      gsl_matrix_set_zero (Vg_sub);
-	      gsl_matrix_set_zero (Ve_sub);
-	      gsl_matrix_set (Vg_sub, 0, 0, gsl_matrix_get (V_g, i, i));
-	      gsl_matrix_set (Ve_sub, 0, 0, gsl_matrix_get (V_e, i, i));
-	      gsl_matrix_set (Vg_sub, 1, 1, gsl_matrix_get (V_g, j, j));
-	      gsl_matrix_set (Ve_sub, 1, 1, gsl_matrix_get (V_e, j, j));
-	      
-	      logl=MphEM ('R', em_iter, em_prec, eval, X, Y_sub, U_hat,
-			  E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
-			  UltVehiU, UltVehiE, Vg_sub, Ve_sub, B_sub);
-	      logl=MphNR ('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all,
-			  xHi_all, Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c);
-	      
-	      gsl_matrix_set(V_g, i, j, gsl_matrix_get (Vg_sub, 0, 1));
-	      gsl_matrix_set(V_g, j, i, gsl_matrix_get (Vg_sub, 0, 1));
-	      
-	      gsl_matrix_set(V_e, i, j, ve=gsl_matrix_get (Ve_sub, 0, 1));
-	      gsl_matrix_set(V_e, j, i, ve=gsl_matrix_get (Ve_sub, 0, 1));
-	    }
-	  }
-	  
-	  // Free matrices.
-	  gsl_matrix_free(U_hat);
-	  gsl_matrix_free(E_hat);
-	  gsl_matrix_free(OmegaU);
-	  gsl_matrix_free(OmegaE);
-	  gsl_matrix_free(UltVehiY);
-	  gsl_matrix_free(UltVehiBX);
-	  gsl_matrix_free(UltVehiU);
-	  gsl_matrix_free(UltVehiE);
-	  
-	  gsl_matrix_free(Hi_all);
-	  gsl_matrix_free(Hiy_all);
-	  gsl_matrix_free(xHi_all);
-	  gsl_matrix_free(Hessian);
-	  
-	  gsl_matrix_free(Y_sub);
-	  gsl_matrix_free(Vg_sub);
-	  gsl_matrix_free(Ve_sub);
-	  gsl_matrix_free(B_sub);
-	}
-
-	// Calculate B hat using GSL estimate.
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-
-	gsl_vector *D_l=gsl_vector_alloc (d_size);
-	gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *Qi=gsl_matrix_alloc (d_size*c_size, d_size*c_size);
-	gsl_vector *XHiy=gsl_vector_alloc (d_size*c_size);
-	gsl_vector *beta=gsl_vector_alloc (d_size*c_size);
-
-	gsl_vector_set_zero (XHiy);
-
-	double logdet_Ve, logdet_Q, dl, d, delta, dx, dy;
-
-	// Eigen decomposition and calculate log|Ve|.
-	logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
-	// Calculate Qi and log|Q|.
-	logdet_Q=CalcQi (eval, D_l, X, Qi);
-
-	// Calculate UltVehiY.
-	gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,UltVehi,Y,0.0,UltVehiY);
-
-	//calculate XHiy
-	for (size_t i=0; i<d_size; i++) {
-	  dl=gsl_vector_get(D_l, i);
-	  
-	  for (size_t j=0; j<c_size; j++) {
-	    d=0.0;
-	    for (size_t k=0; k<n_size; k++) {
-	      delta=gsl_vector_get(eval, k);
-	      dx=gsl_matrix_get(X, j, k);
-	      dy=gsl_matrix_get(UltVehiY, i, k);
-	      d+=dy*dx/(delta*dl+1.0);
-	    }
-	    gsl_vector_set(XHiy, j*d_size+i, d);
-	  }
-	}
-
-	gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta);
-
-	// Multiply beta by UltVeh and save to B.
-	for (size_t i=0; i<c_size; i++) {
-	  gsl_vector_view B_col=gsl_matrix_column (B, i);
-	  gsl_vector_view beta_sub=gsl_vector_subvector(beta,i*d_size,d_size);
-	  gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0,
-			 &B_col.vector);
-	}
-
-	// Free memory.
-	gsl_matrix_free(UltVehiY);
-
-	gsl_vector_free(D_l);
-	gsl_matrix_free(UltVeh);
-	gsl_matrix_free(UltVehi);
-	gsl_matrix_free(Qi);
-	gsl_vector_free(XHiy);
-	gsl_vector_free(beta);
-
-	return;
+                const size_t nr_iter, const double nr_prec,
+                const gsl_vector *eval, const gsl_matrix *X,
+                const gsl_matrix *Y, const double l_min, const double l_max,
+                const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e,
+                gsl_matrix *B) {
+
+  gsl_matrix_set_zero(V_g);
+  gsl_matrix_set_zero(V_e);
+  gsl_matrix_set_zero(B);
+
+  size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+  double a, b, c;
+  double lambda, logl, vg, ve;
+
+  // Initialize the diagonal elements of Vg and Ve using univariate
+  // LMM and REML estimates.
+  gsl_matrix *Xt = gsl_matrix_alloc(n_size, c_size);
+  gsl_vector *beta_temp = gsl_vector_alloc(c_size);
+  gsl_vector *se_beta_temp = gsl_vector_alloc(c_size);
+
+  gsl_matrix_transpose_memcpy(Xt, X);
+
+  for (size_t i = 0; i < d_size; i++) {
+    gsl_vector_const_view Y_row = gsl_matrix_const_row(Y, i);
+    CalcLambda('R', eval, Xt, &Y_row.vector, l_min, l_max, n_region, lambda,
+               logl);
+    CalcLmmVgVeBeta(eval, Xt, &Y_row.vector, lambda, vg, ve, beta_temp,
+                    se_beta_temp);
+
+    gsl_matrix_set(V_g, i, i, vg);
+    gsl_matrix_set(V_e, i, i, ve);
+  }
+
+  gsl_matrix_free(Xt);
+  gsl_vector_free(beta_temp);
+  gsl_vector_free(se_beta_temp);
+
+  // If number of phenotypes is above four, then obtain the off
+  // diagonal elements with two trait models.
+  if (d_size > 4) {
+
+    // First obtain good initial values.
+    // Large matrices for EM.
+    gsl_matrix *U_hat = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *E_hat = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *OmegaU = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *OmegaE = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *UltVehiY = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *UltVehiBX = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *UltVehiU = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *UltVehiE = gsl_matrix_alloc(2, n_size);
+
+    // Large matrices for NR. Each dxd block is H_k^{-1}.
+    gsl_matrix *Hi_all = gsl_matrix_alloc(2, 2 * n_size);
+
+    // Each column is H_k^{-1}y_k.
+    gsl_matrix *Hiy_all = gsl_matrix_alloc(2, n_size);
+
+    // Each dcxdc block is x_k\otimes H_k^{-1}.
+    gsl_matrix *xHi_all = gsl_matrix_alloc(2 * c_size, 2 * n_size);
+    gsl_matrix *Hessian = gsl_matrix_alloc(6, 6);
+
+    // 2 by n matrix of Y.
+    gsl_matrix *Y_sub = gsl_matrix_alloc(2, n_size);
+    gsl_matrix *Vg_sub = gsl_matrix_alloc(2, 2);
+    gsl_matrix *Ve_sub = gsl_matrix_alloc(2, 2);
+    gsl_matrix *B_sub = gsl_matrix_alloc(2, c_size);
+
+    for (size_t i = 0; i < d_size; i++) {
+      gsl_vector_view Y_sub1 = gsl_matrix_row(Y_sub, 0);
+      gsl_vector_const_view Y_1 = gsl_matrix_const_row(Y, i);
+      gsl_vector_memcpy(&Y_sub1.vector, &Y_1.vector);
+
+      for (size_t j = i + 1; j < d_size; j++) {
+        gsl_vector_view Y_sub2 = gsl_matrix_row(Y_sub, 1);
+        gsl_vector_const_view Y_2 = gsl_matrix_const_row(Y, j);
+        gsl_vector_memcpy(&Y_sub2.vector, &Y_2.vector);
+
+        gsl_matrix_set_zero(Vg_sub);
+        gsl_matrix_set_zero(Ve_sub);
+        gsl_matrix_set(Vg_sub, 0, 0, gsl_matrix_get(V_g, i, i));
+        gsl_matrix_set(Ve_sub, 0, 0, gsl_matrix_get(V_e, i, i));
+        gsl_matrix_set(Vg_sub, 1, 1, gsl_matrix_get(V_g, j, j));
+        gsl_matrix_set(Ve_sub, 1, 1, gsl_matrix_get(V_e, j, j));
+
+        logl = MphEM('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat,
+                     OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                     Vg_sub, Ve_sub, B_sub);
+        logl = MphNR('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all, xHi_all,
+                     Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c);
+
+        gsl_matrix_set(V_g, i, j, gsl_matrix_get(Vg_sub, 0, 1));
+        gsl_matrix_set(V_g, j, i, gsl_matrix_get(Vg_sub, 0, 1));
+
+        gsl_matrix_set(V_e, i, j, ve = gsl_matrix_get(Ve_sub, 0, 1));
+        gsl_matrix_set(V_e, j, i, ve = gsl_matrix_get(Ve_sub, 0, 1));
+      }
+    }
+
+    // Free matrices.
+    gsl_matrix_free(U_hat);
+    gsl_matrix_free(E_hat);
+    gsl_matrix_free(OmegaU);
+    gsl_matrix_free(OmegaE);
+    gsl_matrix_free(UltVehiY);
+    gsl_matrix_free(UltVehiBX);
+    gsl_matrix_free(UltVehiU);
+    gsl_matrix_free(UltVehiE);
+
+    gsl_matrix_free(Hi_all);
+    gsl_matrix_free(Hiy_all);
+    gsl_matrix_free(xHi_all);
+    gsl_matrix_free(Hessian);
+
+    gsl_matrix_free(Y_sub);
+    gsl_matrix_free(Vg_sub);
+    gsl_matrix_free(Ve_sub);
+    gsl_matrix_free(B_sub);
+  }
+
+  // Calculate B hat using GSL estimate.
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+
+  gsl_vector *D_l = gsl_vector_alloc(d_size);
+  gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *Qi = gsl_matrix_alloc(d_size * c_size, d_size * c_size);
+  gsl_vector *XHiy = gsl_vector_alloc(d_size * c_size);
+  gsl_vector *beta = gsl_vector_alloc(d_size * c_size);
+
+  gsl_vector_set_zero(XHiy);
+
+  double logdet_Ve, logdet_Q, dl, d, delta, dx, dy;
+
+  // Eigen decomposition and calculate log|Ve|.
+  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+  // Calculate Qi and log|Q|.
+  logdet_Q = CalcQi(eval, D_l, X, Qi);
+
+  // Calculate UltVehiY.
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+  // calculate XHiy
+  for (size_t i = 0; i < d_size; i++) {
+    dl = gsl_vector_get(D_l, i);
+
+    for (size_t j = 0; j < c_size; j++) {
+      d = 0.0;
+      for (size_t k = 0; k < n_size; k++) {
+        delta = gsl_vector_get(eval, k);
+        dx = gsl_matrix_get(X, j, k);
+        dy = gsl_matrix_get(UltVehiY, i, k);
+        d += dy * dx / (delta * dl + 1.0);
+      }
+      gsl_vector_set(XHiy, j * d_size + i, d);
+    }
+  }
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta);
+
+  // Multiply beta by UltVeh and save to B.
+  for (size_t i = 0; i < c_size; i++) {
+    gsl_vector_view B_col = gsl_matrix_column(B, i);
+    gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size);
+    gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0,
+                   &B_col.vector);
+  }
+
+  // Free memory.
+  gsl_matrix_free(UltVehiY);
+
+  gsl_vector_free(D_l);
+  gsl_matrix_free(UltVeh);
+  gsl_matrix_free(UltVehi);
+  gsl_matrix_free(Qi);
+  gsl_vector_free(XHiy);
+  gsl_vector_free(beta);
+
+  return;
 }
 
 // p-value correction
 // mode=1 Wald; mode=2 LRT; mode=3 SCORE;
-double PCRT (const size_t mode, const size_t d_size, const double p_value,
-	     const double crt_a, const double crt_b, const double crt_c) {
-	double p_crt=0.0, chisq_crt=0.0, q=(double)d_size;
-	double chisq=gsl_cdf_chisq_Qinv(p_value, (double)d_size );
-
-	if (mode==1) {
-		double a=crt_c/(2.0*q*(q+2.0));
-		double b=1.0+(crt_a+crt_b)/(2.0*q);
-		chisq_crt=(-1.0*b+sqrt(b*b+4.0*a*chisq))/(2.0*a);
-	} else if (mode==2) {
-		chisq_crt=chisq/(1.0+crt_a/(2.0*q) );
-	} else {
-		chisq_crt=chisq;
-	}
-
-	p_crt=gsl_cdf_chisq_Q (chisq_crt, (double)d_size );
-
-	return p_crt;
+double PCRT(const size_t mode, const size_t d_size, const double p_value,
+            const double crt_a, const double crt_b, const double crt_c) {
+  double p_crt = 0.0, chisq_crt = 0.0, q = (double)d_size;
+  double chisq = gsl_cdf_chisq_Qinv(p_value, (double)d_size);
+
+  if (mode == 1) {
+    double a = crt_c / (2.0 * q * (q + 2.0));
+    double b = 1.0 + (crt_a + crt_b) / (2.0 * q);
+    chisq_crt = (-1.0 * b + sqrt(b * b + 4.0 * a * chisq)) / (2.0 * a);
+  } else if (mode == 2) {
+    chisq_crt = chisq / (1.0 + crt_a / (2.0 * q));
+  } else {
+    chisq_crt = chisq;
+  }
+
+  p_crt = gsl_cdf_chisq_Q(chisq_crt, (double)d_size);
+
+  return p_crt;
 }
 
 // WJA added.
-void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
-			 const gsl_matrix *UtW, const gsl_matrix *UtY) {
-	string file_bgen=file_oxford+".bgen";
-	ifstream infile (file_bgen.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bgen file:"<<file_bgen<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	string line;
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_miss, c_phen;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR. Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-	gsl_vector *x=gsl_vector_alloc (n_size);
-	gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub =
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-
-	gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
-	gsl_vector_view X_row=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row.vector);
-	gsl_vector_view B_col=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
-		       Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size,
-						     c+v_size) );
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<=i; j++) {
-			cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-		}
-		cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<=i; j++) {
-			c=GetIndex(i, j, d_size);
-			cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-		}
-		cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-		for (size_t j=0; j<=i; j++) {
-			cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-		}
-		cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
-		       Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-
-
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-
-	// Read in header.
-	uint32_t bgen_snp_block_offset;
-	uint32_t bgen_header_length;
-	uint32_t bgen_nsamples;
-	uint32_t bgen_nsnps;
-	uint32_t bgen_flags;
-	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
-	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
-	bgen_snp_block_offset-=4;
-	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
-	bgen_snp_block_offset-=4;
-	infile.ignore(4+bgen_header_length-20);
-	bgen_snp_block_offset-=4+bgen_header_length-20;
-	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
-	bgen_snp_block_offset-=4;
-	bool CompressedSNPBlocks=bgen_flags&0x1;
-
-	infile.ignore(bgen_snp_block_offset);
-
-	double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
-	double bgen_geno_prob_non_miss;
-
-	uint32_t bgen_N;
-	uint16_t bgen_LS;
-	uint16_t bgen_LR;
-	uint16_t bgen_LC;
-	uint32_t bgen_SNP_pos;
-	uint32_t bgen_LA;
-	std::string bgen_A_allele;
-	uint32_t bgen_LB;
-	std::string bgen_B_allele;
-	uint32_t bgen_P;
-	size_t unzipped_data_size;
-	string id;
-	string rs;
-	string chr;
-	std::cout<<"Warning: WJA hard coded SNP missingness threshold "<<
-	  "of 10%"<<std::endl;
-
-	// Start reading genotypes and analyze.
-	size_t csnp=0, t_last=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (t%d_pace==0 || t==(ns_total-1)) {
-	    ProgressBar ("Reading SNPs  ", t, ns_total-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-	  
-	  // Read SNP header.
-	  id.clear();
-	  rs.clear();
-	  chr.clear();
-	  bgen_A_allele.clear();
-	  bgen_B_allele.clear();
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_N),4);
-	  infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-	  
-	  id.resize(bgen_LS);
-	  infile.read(&id[0], bgen_LS);
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_LR),2);
-	  rs.resize(bgen_LR);
-	  infile.read(&rs[0], bgen_LR);
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_LC),2);
-	  chr.resize(bgen_LC);
-	  infile.read(&chr[0], bgen_LC);
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-	  
-	  infile.read(reinterpret_cast<char*>(&bgen_LA),4);
-	  bgen_A_allele.resize(bgen_LA);
-	  infile.read(&bgen_A_allele[0], bgen_LA);
-		
-	  infile.read(reinterpret_cast<char*>(&bgen_LB),4);
-	  bgen_B_allele.resize(bgen_LB);
-	  infile.read(&bgen_B_allele[0], bgen_LB);
-	  
-	  uint16_t unzipped_data[3*bgen_N];
-	  
-	  if (indicator_snp[t]==0) {
-	    if(CompressedSNPBlocks)
-	      infile.read(reinterpret_cast<char*>(&bgen_P),4);
-	    else
-	      bgen_P=6*bgen_N;
-	    
-	    infile.ignore(static_cast<size_t>(bgen_P));
-	    
-	    continue;
-	  }
-
-	  if(CompressedSNPBlocks) {
-
-	    infile.read(reinterpret_cast<char*>(&bgen_P),4);
-	    uint8_t zipped_data[bgen_P];
-	    
-	    unzipped_data_size=6*bgen_N;
-	    
-	    infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
-	    
-	    int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data),
-	      reinterpret_cast<uLongf*>(&unzipped_data_size),
-	      reinterpret_cast<Bytef*>(zipped_data),
-              static_cast<uLong> (bgen_P));
-	    assert(result == Z_OK);
-	    
-	  } else {
-	    
-	    bgen_P=6*bgen_N;
-	    infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
-	  }
-	  
-	  x_mean=0.0; c_phen=0; n_miss=0;
-	  gsl_vector_set_zero(x_miss);
-	  for (size_t i=0; i<bgen_N; ++i) {
-	    if (indicator_idv[i]==0) {continue;}
-	    
-	    bgen_geno_prob_AA =
-	      static_cast<double>(unzipped_data[i*3])/32768.0;
-	    bgen_geno_prob_AB =
-	      static_cast<double>(unzipped_data[i*3+1])/32768.0;
-	    bgen_geno_prob_BB =
-	      static_cast<double>(unzipped_data[i*3+2])/32768.0;
-	    
-	    // WJA.
-	    bgen_geno_prob_non_miss=bgen_geno_prob_AA +
-	      bgen_geno_prob_AB+bgen_geno_prob_BB;
-	    if (bgen_geno_prob_non_miss<0.9) {
-	      gsl_vector_set(x_miss, c_phen, 0.0);
-	      n_miss++;
-	    }
-	    else {
-	      
-	      bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
-	      bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
-	      bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-	      
-	      geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-	      
-	      gsl_vector_set(x, c_phen, geno);
-	      gsl_vector_set(x_miss, c_phen, 1.0);
-	      x_mean+=geno;
-	    }
-	    c_phen++;
-	  }
-	  
-	  x_mean/=static_cast<double>(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
-	  }
-
-	  gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
-	  gsl_vector_memcpy (&Xlarge_col.vector, x);
-	  csnp++;
-
-	  if (csnp%msize==0 || csnp==t_last ) {
-	    size_t l=0;
-	    if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-	    
-	    gsl_matrix_view Xlarge_sub =
-	      gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-	    gsl_matrix_view UtXlarge_sub =
-	      gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-	    
-	    time_start=clock();
-	    eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-			    &UtXlarge_sub.matrix);
-	    time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	    
-	    gsl_matrix_set_zero (Xlarge);
-	    
-	    for (size_t i=0; i<l; i++) {
-	      gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
-	      gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-	      
-	      // Initial values.
-	      gsl_matrix_memcpy (V_g, V_g_null);
-	      gsl_matrix_memcpy (V_e, V_e_null);
-	      gsl_matrix_memcpy (B, B_null);
-	      
-	      time_start=clock();
-		    
-	      // 3 is before 1.
-	      if (a_mode==3 || a_mode==4) {
-		p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				  V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-		if (p_score<p_nr && crt==1) {
-		  logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
-				 xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
-				 crt_b, crt_c);
-		  p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-		}
-	      }
-	      
-	      if (a_mode==2 || a_mode==4) {
-		logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		
-		// Calculate beta and Vbeta.
-		p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				V_e, UltVehiY, beta, Vbeta);
-		p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-		
-		if (p_lrt<p_nr) {
-		  logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  
-		  // Calculate beta and Vbeta.
-		  p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				  V_e, UltVehiY, beta, Vbeta);
-		  p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
-					 (double)d_size );
-		  
-		  if (crt==1) {
-		    p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      if (a_mode==1 || a_mode==4) {
-		logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat,
-			       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
-			       UltVehiU, UltVehiE, V_g, V_e, B);
-		p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				 V_e, UltVehiY, beta, Vbeta);
-		
-		if (p_wald<p_nr) {
-		  logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				   V_g, V_e, UltVehiY, beta, Vbeta);
-		  
-		  if (crt==1) {
-		    p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-
-	      time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	      // Store summary data.
-	      for (size_t i=0; i<d_size; i++) {
-		v_beta[i]=gsl_vector_get (beta, i);
-	      }
-	      
-	      c=0;
-	      for (size_t i=0; i<d_size; i++) {
-		for (size_t j=i; j<d_size; j++) {
-		  v_Vg[c]=gsl_matrix_get (V_g, i, j);
-		  v_Ve[c]=gsl_matrix_get (V_e, i, j);
-		  v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-		  c++;
-		}
-	      }
-	      
-	      MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve,
-			       v_Vbeta};
-	      sumStat.push_back(SNPs);
-	    }
-	  }
-	}
-	cout<<endl;
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-	gsl_vector_free(x_miss);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null);
-
-	gsl_matrix_free(Xlarge);
-	gsl_matrix_free(UtXlarge);
-
-	return;
+void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+                        const gsl_matrix *UtW, const gsl_matrix *UtY) {
+  string file_bgen = file_oxford + ".bgen";
+  ifstream infile(file_bgen.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bgen file:" << file_bgen << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  string line;
+
+  // Create a large matrix.
+  size_t msize = LMM_BATCH_SIZE;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_miss, c_phen;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR. Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+  gsl_vector *x = gsl_vector_alloc(n_size);
+  gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+
+  gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+  gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row.vector);
+  gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub.matrix);
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Read in header.
+  uint32_t bgen_snp_block_offset;
+  uint32_t bgen_header_length;
+  uint32_t bgen_nsamples;
+  uint32_t bgen_nsnps;
+  uint32_t bgen_flags;
+  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+  bgen_snp_block_offset -= 4;
+  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+  bgen_snp_block_offset -= 4;
+  infile.ignore(4 + bgen_header_length - 20);
+  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+  bgen_snp_block_offset -= 4;
+  bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+  infile.ignore(bgen_snp_block_offset);
+
+  double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
+  double bgen_geno_prob_non_miss;
+
+  uint32_t bgen_N;
+  uint16_t bgen_LS;
+  uint16_t bgen_LR;
+  uint16_t bgen_LC;
+  uint32_t bgen_SNP_pos;
+  uint32_t bgen_LA;
+  std::string bgen_A_allele;
+  uint32_t bgen_LB;
+  std::string bgen_B_allele;
+  uint32_t bgen_P;
+  size_t unzipped_data_size;
+  string id;
+  string rs;
+  string chr;
+  std::cout << "Warning: WJA hard coded SNP missingness threshold "
+            << "of 10%" << std::endl;
+
+  // Start reading genotypes and analyze.
+  size_t csnp = 0, t_last = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // Read SNP header.
+    id.clear();
+    rs.clear();
+    chr.clear();
+    bgen_A_allele.clear();
+    bgen_B_allele.clear();
+
+    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+    id.resize(bgen_LS);
+    infile.read(&id[0], bgen_LS);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+    rs.resize(bgen_LR);
+    infile.read(&rs[0], bgen_LR);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+    chr.resize(bgen_LC);
+    infile.read(&chr[0], bgen_LC);
+
+    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+    bgen_A_allele.resize(bgen_LA);
+    infile.read(&bgen_A_allele[0], bgen_LA);
+
+    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+    bgen_B_allele.resize(bgen_LB);
+    infile.read(&bgen_B_allele[0], bgen_LB);
+
+    uint16_t unzipped_data[3 * bgen_N];
+
+    if (indicator_snp[t] == 0) {
+      if (CompressedSNPBlocks)
+        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      else
+        bgen_P = 6 * bgen_N;
+
+      infile.ignore(static_cast<size_t>(bgen_P));
+
+      continue;
+    }
+
+    if (CompressedSNPBlocks) {
+
+      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+      uint8_t zipped_data[bgen_P];
+
+      unzipped_data_size = 6 * bgen_N;
+
+      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+
+      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+                              reinterpret_cast<uLongf *>(&unzipped_data_size),
+                              reinterpret_cast<Bytef *>(zipped_data),
+                              static_cast<uLong>(bgen_P));
+      assert(result == Z_OK);
+
+    } else {
+
+      bgen_P = 6 * bgen_N;
+      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+    }
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < bgen_N; ++i) {
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+      bgen_geno_prob_AB =
+          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+      bgen_geno_prob_BB =
+          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+
+      // WJA.
+      bgen_geno_prob_non_miss =
+          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+      if (bgen_geno_prob_non_miss < 0.9) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+
+        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= static_cast<double>(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    csnp++;
+
+    if (csnp % msize == 0 || csnp == t_last) {
+      size_t l = 0;
+      if (csnp % msize == 0) {
+        l = msize;
+      } else {
+        l = csnp % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+        // Initial values.
+        gsl_matrix_memcpy(V_g, V_g_null);
+        gsl_matrix_memcpy(V_e, V_e_null);
+        gsl_matrix_memcpy(B, B_null);
+
+        time_start = clock();
+
+        // 3 is before 1.
+        if (a_mode == 3 || a_mode == 4) {
+          p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+                             V_e_null, UltVehiY, beta, Vbeta);
+          if (p_score < p_nr && crt == 1) {
+            logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                            Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+          }
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+
+          // Calculate beta and Vbeta.
+          p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                           UltVehiY, beta, Vbeta);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+          if (p_lrt < p_nr) {
+            logl_H1 =
+                MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+            // Calculate beta and Vbeta.
+            p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                             UltVehiY, beta, Vbeta);
+            p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+            if (crt == 1) {
+              p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+          p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                            UltVehiY, beta, Vbeta);
+
+          if (p_wald < p_nr) {
+            logl_H1 =
+                MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                              UltVehiY, beta, Vbeta);
+
+            if (crt == 1) {
+              p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        for (size_t i = 0; i < d_size; i++) {
+          v_beta[i] = gsl_vector_get(beta, i);
+        }
+
+        c = 0;
+        for (size_t i = 0; i < d_size; i++) {
+          for (size_t j = i; j < d_size; j++) {
+            v_Vg[c] = gsl_matrix_get(V_g, i, j);
+            v_Ve[c] = gsl_matrix_get(V_e, i, j);
+            v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+            c++;
+          }
+        }
+
+        MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  return;
 }
 
-void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
-			   const gsl_matrix *UtW, const gsl_matrix *UtY) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return;
-	}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	string line;
-	char *ch_ptr;
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_miss, c_phen;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
-	// Each dcxdc block is x_k \otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_vector *x=gsl_vector_alloc (n_size);
-	gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub =
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-
-	gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
-	gsl_vector_view X_row=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row.vector);
-	gsl_vector_view B_col=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
-		       &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian,
-		       crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size,
-						     c+v_size) );
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
-		       Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-
-	// Start reading genotypes and analyze.
-	size_t csnp=0, t_last=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  !safeGetline(infile, line).eof();
-	  if (t%d_pace==0 || t==(ns_total-1)) {
-	    ProgressBar ("Reading SNPs  ", t, ns_total-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-	  
-	  ch_ptr=strtok ((char *)line.c_str(), " , \t");
-	  ch_ptr=strtok (NULL, " , \t");
-	  ch_ptr=strtok (NULL, " , \t");
-	  
-	  x_mean=0.0; c_phen=0; n_miss=0;
-	  gsl_vector_set_zero(x_miss);
-	  for (size_t i=0; i<ni_total; ++i) {
-	    ch_ptr=strtok (NULL, " , \t");
-	    if (indicator_idv[i]==0) {continue;}
-	    
-	    if (strcmp(ch_ptr, "NA")==0) {
-	      gsl_vector_set(x_miss, c_phen, 0.0);
-	      n_miss++;
-	    }
-	    else {
-	      geno=atof(ch_ptr);
-	      
-	      gsl_vector_set(x, c_phen, geno);
-	      gsl_vector_set(x_miss, c_phen, 1.0);
-	      x_mean+=geno;
-	    }
-	    c_phen++;
-	  }
-	  
-	  x_mean/=(double)(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
-	    geno=gsl_vector_get(x, i);
-	  }
-
-	  gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
-	  gsl_vector_memcpy (&Xlarge_col.vector, x);
-	  csnp++;
-	  
-	  if (csnp%msize==0 || csnp==t_last ) {
-	    size_t l=0;
-	    if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-	    
-	    gsl_matrix_view Xlarge_sub =
-	      gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-	    gsl_matrix_view UtXlarge_sub =
-	      gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-	    
-	    time_start=clock();
-	    eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-			    &UtXlarge_sub.matrix);
-	    time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	    
-	    gsl_matrix_set_zero (Xlarge);
-	    
-	    for (size_t i=0; i<l; i++) {
-	      gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
-	      gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-	      
-	      // Initial values.
-	      gsl_matrix_memcpy (V_g, V_g_null);
-	      gsl_matrix_memcpy (V_e, V_e_null);
-	      gsl_matrix_memcpy (B, B_null);
-	      
-	      time_start=clock();
-	      
-	      // 3 is before 1.
-	      if (a_mode==3 || a_mode==4) {
-		p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				  V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-		if (p_score<p_nr && crt==1) {
-		  logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
-				 xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
-				 crt_b, crt_c);
-		  p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-		}
-	      }
-
-	      if (a_mode==2 || a_mode==4) {
-		logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		
-		// Calculate beta and Vbeta.
-		p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				V_g, V_e, UltVehiY, beta, Vbeta);
-		p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-		
-		if (p_lrt<p_nr) {
-		  logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  
-		  // Calculate beta and Vbeta.
-		  p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				  V_g, V_e, UltVehiY, beta, Vbeta);
-		  p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
-					 (double)d_size );
-
-		  if (crt==1) {
-		    p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      if (a_mode==1 || a_mode==4) {
-		logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				 V_e, UltVehiY, beta, Vbeta);
-		
-		if (p_wald<p_nr) {
-		  logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e,
-				 Hessian, crt_a, crt_b, crt_c);
-		  p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				   V_g, V_e, UltVehiY, beta, Vbeta);
-		  
-		  if (crt==1) {
-		    p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	      // Store summary data.
-	      for (size_t i=0; i<d_size; i++) {
-		v_beta[i]=gsl_vector_get (beta, i);
-	      }
-	      
-	      c=0;
-	      for (size_t i=0; i<d_size; i++) {
-		for (size_t j=i; j<d_size; j++) {
-		  v_Vg[c]=gsl_matrix_get (V_g, i, j);
-		  v_Ve[c]=gsl_matrix_get (V_e, i, j);
-		  v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-		  c++;
-		}
-	      }
-	      
-	      MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
-			       v_Ve, v_Vbeta};
-	      sumStat.push_back(SNPs);
-	    }
-	  }
-	}
-	cout<<endl;
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-	gsl_vector_free(x_miss);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null);
-
-	gsl_matrix_free(Xlarge);
-	gsl_matrix_free(UtXlarge);
-
-	return;
+void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+                          const gsl_matrix *UtW, const gsl_matrix *UtY) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  string line;
+  char *ch_ptr;
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_miss, c_phen;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Create a large matrix.
+  size_t msize = LMM_BATCH_SIZE;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k \otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_vector *x = gsl_vector_alloc(n_size);
+  gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+
+  gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+  gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row.vector);
+  gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub.matrix);
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Start reading genotypes and analyze.
+  size_t csnp = 0, t_last = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+      geno = gsl_vector_get(x, i);
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    csnp++;
+
+    if (csnp % msize == 0 || csnp == t_last) {
+      size_t l = 0;
+      if (csnp % msize == 0) {
+        l = msize;
+      } else {
+        l = csnp % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+        // Initial values.
+        gsl_matrix_memcpy(V_g, V_g_null);
+        gsl_matrix_memcpy(V_e, V_e_null);
+        gsl_matrix_memcpy(B, B_null);
+
+        time_start = clock();
+
+        // 3 is before 1.
+        if (a_mode == 3 || a_mode == 4) {
+          p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+                             V_e_null, UltVehiY, beta, Vbeta);
+          if (p_score < p_nr && crt == 1) {
+            logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                            Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+          }
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+
+          // Calculate beta and Vbeta.
+          p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                           UltVehiY, beta, Vbeta);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+          if (p_lrt < p_nr) {
+            logl_H1 =
+                MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+            // Calculate beta and Vbeta.
+            p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                             UltVehiY, beta, Vbeta);
+            p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+            if (crt == 1) {
+              p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+          p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                            UltVehiY, beta, Vbeta);
+
+          if (p_wald < p_nr) {
+            logl_H1 =
+                MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                              UltVehiY, beta, Vbeta);
+
+            if (crt == 1) {
+              p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        for (size_t i = 0; i < d_size; i++) {
+          v_beta[i] = gsl_vector_get(beta, i);
+        }
+
+        c = 0;
+        for (size_t i = 0; i < d_size; i++) {
+          for (size_t j = i; j < d_size; j++) {
+            v_Vg[c] = gsl_matrix_get(V_g, i, j);
+            v_Ve[c] = gsl_matrix_get(V_e, i, j);
+            v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+            c++;
+          }
+        }
+
+        MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  return;
 }
 
-void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
-			  const gsl_matrix *UtW, const gsl_matrix *UtY) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	char ch[1];
-	bitset<8> b;
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_bit, n_miss, ci_total, ci_test;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Create a large matrix.
-	size_t msize=10000;
-	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
-	gsl_matrix_set_zero(Xlarge);
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-	
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_vector *x=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub =
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-	gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
-	gsl_vector_view X_row=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row.vector);
-	gsl_vector_view B_col=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
-
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-		       UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
-		       &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian,
-		       crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
-						    c+v_size));
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-		cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y,
-		       U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
-		       UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
-		       Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
-		     &B_sub.matrix, se_B_null);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null->size1; i++) {
-	  for (size_t j=0; j<se_B_null->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-	
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-	
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-	
-	// Start reading genotypes and analyze.
-	// Calculate n_bit and c, the number of bit for each snp.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-	  infile.read(ch,1);
-	  b=ch[0];
-	}
-
-	size_t csnp=0, t_last=0;
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  if (indicator_snp[t]==0) {continue;}
-	  t_last++;
-	}
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-	  if (t%d_pace==0 || t==snpInfo.size()-1) {
-	    ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-
-	  // n_bit, and 3 is the number of magic numbers.
-	  infile.seekg(t*n_bit+3);		
-
-	  //read genotypes
-	  x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0;
-	  for (int i=0; i<n_bit; ++i) {
-	    infile.read(ch,1);
-	    b=ch[0];
-
-	    // Minor allele homozygous: 2.0; major: 0.0;
-	    for (size_t j=0; j<4; ++j) {                
-	      if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
-	      if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-	      
-	      if (b[2*j]==0) {
-		if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
-		else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
-	      }
-	      else {
-		if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
-		else {gsl_vector_set(x, ci_test, -9); n_miss++; }
-	      }
-	      
-	      ci_total++;
-	      ci_test++;
-			}
-	  }
-	  
-	  x_mean/=(double)(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    geno=gsl_vector_get(x,i);
-	    if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
-	  }
-
-	  gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
-	  gsl_vector_memcpy (&Xlarge_col.vector, x);
-	  csnp++;
-	  
-	  if (csnp%msize==0 || csnp==t_last ) {
-	    size_t l=0;
-	    if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-	    
-	    gsl_matrix_view Xlarge_sub =
-	      gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-	    gsl_matrix_view UtXlarge_sub =
-	      gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-	    
-	    time_start=clock();
-	    eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-			    &UtXlarge_sub.matrix);
-	    time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	    
-	    gsl_matrix_set_zero (Xlarge);
-	    
-	    for (size_t i=0; i<l; i++) {
-	      gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
-	      gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-	      
-	      // Initial values.
-	      gsl_matrix_memcpy (V_g, V_g_null);
-	      gsl_matrix_memcpy (V_e, V_e_null);
-	      gsl_matrix_memcpy (B, B_null);
-	      
-	      time_start=clock();
-	      
-	      // 3 is before 1.
-	      if (a_mode==3 || a_mode==4) {
-		p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				  V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-		
-		if (p_score<p_nr && crt==1) {
-		  logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
-				 xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
-				 crt_b, crt_c);
-		  p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-		}
-	      }
-	      
-	      if (a_mode==2 || a_mode==4) {
-		logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		
-		// Calculate beta and Vbeta.
-		p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				V_e, UltVehiY, beta, Vbeta);
-		p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-		
-		if (p_lrt<p_nr) {
-		  logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  
-		  // Calculate beta and Vbeta.
-		  p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				  V_e, UltVehiY, beta, Vbeta);
-		  p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
-					 (double)d_size );
-		  if (crt==1) {
-		    p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      if (a_mode==1 || a_mode==4) {
-		logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
-			       U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
-			       UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
-				 V_e, UltVehiY, beta, Vbeta);
-		
-		if (p_wald<p_nr) {
-		  logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
-				 Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
-				 crt_a, crt_b, crt_c);
-		  p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
-				   V_g, V_e, UltVehiY, beta, Vbeta);
-		  
-		  if (crt==1) {
-		    p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-		  }
-		}
-	      }
-	      
-	      time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	      // Store summary data.
-	      for (size_t i=0; i<d_size; i++) {
-		v_beta[i]=gsl_vector_get (beta, i);
-	      }
-	      
-	      c=0;
-	      for (size_t i=0; i<d_size; i++) {
-		for (size_t j=i; j<d_size; j++) {
-		  v_Vg[c]=gsl_matrix_get (V_g, i, j);
-		  v_Ve[c]=gsl_matrix_get (V_e, i, j);
-		  v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-		  c++;
-		}
-	      }
-	      
-	      MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
-			       v_Ve, v_Vbeta};
-	      sumStat.push_back(SNPs);
-	    }
-	  }
-	}
-	cout<<endl;
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null);
-
-	gsl_matrix_free(Xlarge);
-	gsl_matrix_free(UtXlarge);
-
-	return;
+void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+                         const gsl_matrix *UtW, const gsl_matrix *UtY) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  char ch[1];
+  bitset<8> b;
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_bit, n_miss, ci_total, ci_test;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Create a large matrix.
+  size_t msize = LMM_BATCH_SIZE;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+  gsl_matrix_set_zero(Xlarge);
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_vector *x = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+  gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+  gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row.vector);
+  gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub.matrix);
+
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+                  crt_c);
+  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+              se_B_null);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null->size1; i++) {
+    for (size_t j = 0; j < se_B_null->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Start reading genotypes and analyze.
+  // Calculate n_bit and c, the number of bit for each snp.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  size_t csnp = 0, t_last = 0;
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+    t_last++;
+  }
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // read genotypes
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0;
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(x, ci_test, 2);
+            x_mean += 2.0;
+          } else {
+            gsl_vector_set(x, ci_test, 1);
+            x_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(x, ci_test, 0);
+          } else {
+            gsl_vector_set(x, ci_test, -9);
+            n_miss++;
+          }
+        }
+
+        ci_total++;
+        ci_test++;
+      }
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+    }
+
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    csnp++;
+
+    if (csnp % msize == 0 || csnp == t_last) {
+      size_t l = 0;
+      if (csnp % msize == 0) {
+        l = msize;
+      } else {
+        l = csnp % msize;
+      }
+
+      gsl_matrix_view Xlarge_sub =
+          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+      gsl_matrix_view UtXlarge_sub =
+          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+      time_start = clock();
+      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                     &UtXlarge_sub.matrix);
+      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      gsl_matrix_set_zero(Xlarge);
+
+      for (size_t i = 0; i < l; i++) {
+        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+        gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+        // Initial values.
+        gsl_matrix_memcpy(V_g, V_g_null);
+        gsl_matrix_memcpy(V_e, V_e_null);
+        gsl_matrix_memcpy(B, B_null);
+
+        time_start = clock();
+
+        // 3 is before 1.
+        if (a_mode == 3 || a_mode == 4) {
+          p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+                             V_e_null, UltVehiY, beta, Vbeta);
+
+          if (p_score < p_nr && crt == 1) {
+            logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                            Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+          }
+        }
+
+        if (a_mode == 2 || a_mode == 4) {
+          logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+
+          // Calculate beta and Vbeta.
+          p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                           UltVehiY, beta, Vbeta);
+          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+          if (p_lrt < p_nr) {
+            logl_H1 =
+                MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+            // Calculate beta and Vbeta.
+            p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                             UltVehiY, beta, Vbeta);
+            p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+            if (crt == 1) {
+              p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        if (a_mode == 1 || a_mode == 4) {
+          logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+                          UltVehiE, V_g, V_e, B);
+          p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                            UltVehiY, beta, Vbeta);
+
+          if (p_wald < p_nr) {
+            logl_H1 =
+                MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+            p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+                              UltVehiY, beta, Vbeta);
+
+            if (crt == 1) {
+              p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+            }
+          }
+        }
+
+        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+        // Store summary data.
+        for (size_t i = 0; i < d_size; i++) {
+          v_beta[i] = gsl_vector_get(beta, i);
+        }
+
+        c = 0;
+        for (size_t i = 0; i < d_size; i++) {
+          for (size_t j = i; j < d_size; j++) {
+            v_Vg[c] = gsl_matrix_get(V_g, i, j);
+            v_Ve[c] = gsl_matrix_get(V_e, i, j);
+            v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+            c++;
+          }
+        }
+
+        MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+        sumStat.push_back(SNPs);
+      }
+    }
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null);
+
+  gsl_matrix_free(Xlarge);
+  gsl_matrix_free(UtXlarge);
+
+  return;
 }
 
 // Calculate Vg, Ve, B, se(B) in the null mvLMM model.
 // Both B and se_B are d by c matrices.
-void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
-			const gsl_matrix *UtY, const size_t em_iter,
-			const size_t nr_iter, const double em_prec,
-			const double nr_prec, const double l_min,
-			const double l_max, const size_t n_region,
-			gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
-			gsl_matrix *se_B) {
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-	size_t dc_size=d_size*c_size, v_size=d_size*(d_size+1)/2;
-
-	double logl, crt_a, crt_b, crt_c;
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-	
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);		
-
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);	
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	// Transpose matrices.
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *W=gsl_matrix_alloc (c_size, n_size);
-	gsl_matrix_transpose_memcpy (Y, UtY);
-	gsl_matrix_transpose_memcpy (W, UtW);
-
-	// Initial, EM, NR, and calculate B.
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, 
-		   l_min, l_max, n_region, V_g, V_e, B);
-	logl=MphEM ('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, 
-		    OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-		    UltVehiE, V_g, V_e, B);
-	logl=MphNR ('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, 
-		    Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
-
-	// Free matrices.
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(W);
-
-	return;
+void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+                       const gsl_matrix *UtY, const size_t em_iter,
+                       const size_t nr_iter, const double em_prec,
+                       const double nr_prec, const double l_min,
+                       const double l_max, const size_t n_region,
+                       gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
+                       gsl_matrix *se_B) {
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+  size_t dc_size = d_size * c_size, v_size = d_size * (d_size + 1) / 2;
+
+  double logl, crt_a, crt_b, crt_c;
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  // Transpose matrices.
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *W = gsl_matrix_alloc(c_size, n_size);
+  gsl_matrix_transpose_memcpy(Y, UtY);
+  gsl_matrix_transpose_memcpy(W, UtW);
+
+  // Initial, EM, NR, and calculate B.
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max,
+             n_region, V_g, V_e, B);
+  logl = MphEM('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE,
+               UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+  logl = MphNR('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g,
+               V_e, Hessian, crt_a, crt_b, crt_c);
+  MphCalcBeta(eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
+
+  // Free matrices.
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(W);
+
+  return;
 }
 
-void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, 
-			      const gsl_matrix *UtW, const gsl_matrix *UtY, 
-			      const gsl_vector *env) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl; 
-	  return;
-	}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	string line;
-	char *ch_ptr;
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_miss, c_phen;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
-	// Each column is H_k^{-1}y_k.
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_vector *x=gsl_vector_alloc (n_size);
-	gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values; including env but not
-	// including x.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
-	gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size);
-	gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1);
-	gsl_matrix_view xHi_all_sub1=
-	  gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size);
-
-	gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub2=
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-
-	gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size);
-	gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
-	gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
-	gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
-
-	gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1);
-	gsl_vector_set_zero(&X_row1.vector);
-	gsl_vector_view X_row2=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row2.vector);
-
-	gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1);
-	gsl_vector_set_zero(&B_col1.vector);
-	gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col2.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, 
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, 
-		       U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, 
-		       UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, 
-		       Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, 
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, 
-		     &B_sub1.matrix, se_B_null1);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
-						    c+v_size));
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null1->size1; i++) {
-	  for (size_t j=0; j<se_B_null1->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-	
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, 
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-		       UltVehiE, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, 
-		       Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, 
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, 
-		     &B_sub1.matrix, se_B_null1);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null1->size1; i++) {
-	  for (size_t j=0; j<se_B_null1->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-	
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-
-	// Start reading genotypes and analyze.
-	for (size_t t=0; t<indicator_snp.size(); ++t) {
-	  !safeGetline(infile, line).eof();
-	  if (t%d_pace==0 || t==(ns_total-1)) {
-	    ProgressBar ("Reading SNPs  ", t, ns_total-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-	  
-	  ch_ptr=strtok ((char *)line.c_str(), " , \t");
-	  ch_ptr=strtok (NULL, " , \t");
-	  ch_ptr=strtok (NULL, " , \t");
-	  
-	  x_mean=0.0; c_phen=0; n_miss=0;
-	  gsl_vector_set_zero(x_miss);
-	  for (size_t i=0; i<ni_total; ++i) {
-	    ch_ptr=strtok (NULL, " , \t");
-	    if (indicator_idv[i]==0) {continue;}
-	    
-	    if (strcmp(ch_ptr, "NA")==0) {
-	      gsl_vector_set(x_miss, c_phen, 0.0); 
-	      n_miss++;
-	    }
-	    else {
-	      geno=atof(ch_ptr);
-	      
-	      gsl_vector_set(x, c_phen, geno);
-	      gsl_vector_set(x_miss, c_phen, 1.0);
-	      x_mean+=geno;
-	    }
-	    c_phen++;
-	  }
-	  
-	  x_mean/=(double)(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
-	    geno=gsl_vector_get(x, i);
-	    if (x_mean>1) {
-	      gsl_vector_set(x, i, 2-geno);
-	    }
-	  }
-	  
-	  // Calculate statistics.
-	  time_start=clock();
-	  gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
-	  gsl_vector_mul (x, env);
-	  gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
-	  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	  
-		//initial values
-		gsl_matrix_memcpy (V_g, V_g_null);
-		gsl_matrix_memcpy (V_e, V_e_null);
-		gsl_matrix_memcpy (B, B_null);
-
-		if (a_mode==2 || a_mode==3 || a_mode==4) {
-		  if (a_mode==3 || a_mode==4) {
-		    logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, 
-				   &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, 
-				   OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-				   UltVehiE, V_g, V_e, &B_sub2.matrix);
-		    logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, 
-				   &X_sub2.matrix, Y, Hi_all, 
-				   &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, 
-				   Hessian, crt_a, crt_b, crt_c);
-		    MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, 
-				 &B_sub2.matrix, se_B_null2);
-		  }
-
-		  if (a_mode==2 || a_mode==4) {
-		    logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, 
-				   &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, 
-				   OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-				   UltVehiE, V_g, V_e, &B_sub2.matrix);
-		    logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, 
-				   &X_sub2.matrix, Y, Hi_all, 
-				   &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, 
-				   Hessian, crt_a, crt_b, crt_c);
-		    MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, 
-				 &B_sub2.matrix, se_B_null2);
-		  }
-		}
-
-		time_start=clock();
-
-		// 3 is before 1.
-		if (a_mode==3 || a_mode==4) {
-		  p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				    V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-		  if (p_score<p_nr && crt==1) {
-		    logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, 
-				   xHi_all, Hiy_all, V_g, V_e, Hessian, 
-				   crt_a, crt_b, crt_c);
-		    p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-		  }
-		}
-		
-		if (a_mode==2 || a_mode==4) {
-		  logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, 
-				 U_hat, E_hat, OmegaU, OmegaE, UltVehiY, 
-				 UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-
-		  // Calculate beta and Vbeta.
-		  p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				  V_g, V_e, UltVehiY, beta, Vbeta);
-		  p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),(double)d_size);
-		  
-		  if (p_lrt<p_nr) {
-		    logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, 
-				   Hi_all, xHi_all, Hiy_all, V_g, V_e, 
-				   Hessian, crt_a, crt_b, crt_c);
-		    
-		    // Calculate beta and Vbeta.
-		    p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				    V_g, V_e, UltVehiY, beta, Vbeta);
-		    p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),
-					  (double)d_size );
-		    
-		    if (crt==1) {
-		      p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-		    }
-		  }
-		}
-		
-		if (a_mode==1 || a_mode==4) {
-		  logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, 
-				 U_hat, E_hat, OmegaU, OmegaE, UltVehiY, 
-				 UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-		  p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				   V_g, V_e, UltVehiY, beta, Vbeta);
-		  
-		  if (p_wald<p_nr) {
-		    logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, 
-				   Hi_all, xHi_all, Hiy_all, V_g, V_e, 
-				   Hessian, crt_a, crt_b, crt_c);
-		    p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-				     V_g, V_e, UltVehiY, beta, Vbeta);
-		    
-		    if (crt==1) {
-		      p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-		    }
-		  }
-		}
-
-		if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-		// Store summary data.
-		for (size_t i=0; i<d_size; i++) {
-		  v_beta[i]=gsl_vector_get (beta, i);
-		}
-
-		c=0;
-		for (size_t i=0; i<d_size; i++) {
-		  for (size_t j=i; j<d_size; j++) {
-		    v_Vg[c]=gsl_matrix_get (V_g, i, j);
-		    v_Ve[c]=gsl_matrix_get (V_e, i, j);
-		    v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-		    c++;
-		  }
-		}
-
-		MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, 
-				 v_Ve, v_Vbeta};
-		sumStat.push_back(SNPs);
-	}
-	cout<<endl;
-
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-	gsl_vector_free(x_miss);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null1);
-	gsl_matrix_free(se_B_null2);
-
-	return;
+void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+                             const gsl_matrix *UtW, const gsl_matrix *UtY,
+                             const gsl_vector *env) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  string line;
+  char *ch_ptr;
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_miss, c_phen;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2;
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k.
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_vector *x = gsl_vector_alloc(n_size);
+  gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values; including env but not
+  // including x.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1);
+  gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size);
+  gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1);
+  gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix(
+      xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size);
+
+  gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub2 =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+
+  gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size);
+  gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW);
+  gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2);
+  gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
+
+  gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1);
+  gsl_vector_set_zero(&X_row1.vector);
+  gsl_vector_view X_row2 = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row2.vector);
+
+  gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1);
+  gsl_vector_set_zero(&B_col1.vector);
+  gsl_vector_view B_col2 = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col2.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub1.matrix);
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub1.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+                  &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+                  crt_b, crt_c);
+  MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+              se_B_null1);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null1->size1; i++) {
+    for (size_t j = 0; j < se_B_null1->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub1.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+                  &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+                  crt_b, crt_c);
+  MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+              se_B_null1);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null1->size1; i++) {
+    for (size_t j = 0; j < se_B_null1->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Start reading genotypes and analyze.
+  for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 0) {
+        continue;
+      }
+
+      if (strcmp(ch_ptr, "NA") == 0) {
+        gsl_vector_set(x_miss, c_phen, 0.0);
+        n_miss++;
+      } else {
+        geno = atof(ch_ptr);
+
+        gsl_vector_set(x, c_phen, geno);
+        gsl_vector_set(x_miss, c_phen, 1.0);
+        x_mean += geno;
+      }
+      c_phen++;
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+      geno = gsl_vector_get(x, i);
+      if (x_mean > 1) {
+        gsl_vector_set(x, i, 2 - geno);
+      }
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+    gsl_vector_mul(x, env);
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // initial values
+    gsl_matrix_memcpy(V_g, V_g_null);
+    gsl_matrix_memcpy(V_e, V_e_null);
+    gsl_matrix_memcpy(B, B_null);
+
+    if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
+      if (a_mode == 3 || a_mode == 4) {
+        logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+                        Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+                        UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+        logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+                        Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+                        Hessian, crt_a, crt_b, crt_c);
+        MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+                    se_B_null2);
+      }
+
+      if (a_mode == 2 || a_mode == 4) {
+        logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+                        Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+                        UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+        logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+                        Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+                        Hessian, crt_a, crt_b, crt_c);
+        MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+                    se_B_null2);
+      }
+    }
+
+    time_start = clock();
+
+    // 3 is before 1.
+    if (a_mode == 3 || a_mode == 4) {
+      p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null,
+                         V_e_null, UltVehiY, beta, Vbeta);
+      if (p_score < p_nr && crt == 1) {
+        logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                        Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+        p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+      }
+    }
+
+    if (a_mode == 2 || a_mode == 4) {
+      logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                      V_g, V_e, B);
+
+      // Calculate beta and Vbeta.
+      p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                       UltVehiY, beta, Vbeta);
+      p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+      if (p_lrt < p_nr) {
+        logl_H1 =
+            MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                  Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+        // Calculate beta and Vbeta.
+        p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                         UltVehiY, beta, Vbeta);
+        p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+        if (crt == 1) {
+          p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+        }
+      }
+    }
+
+    if (a_mode == 1 || a_mode == 4) {
+      logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                      V_g, V_e, B);
+      p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                        UltVehiY, beta, Vbeta);
+
+      if (p_wald < p_nr) {
+        logl_H1 =
+            MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                  Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+        p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                          UltVehiY, beta, Vbeta);
+
+        if (crt == 1) {
+          p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+        }
+      }
+    }
+
+    if (x_mean > 1) {
+      gsl_vector_scale(beta, -1.0);
+    }
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    for (size_t i = 0; i < d_size; i++) {
+      v_beta[i] = gsl_vector_get(beta, i);
+    }
+
+    c = 0;
+    for (size_t i = 0; i < d_size; i++) {
+      for (size_t j = i; j < d_size; j++) {
+        v_Vg[c] = gsl_matrix_get(V_g, i, j);
+        v_Ve[c] = gsl_matrix_get(V_e, i, j);
+        v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+        c++;
+      }
+    }
+
+    MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null1);
+  gsl_matrix_free(se_B_null2);
+
+  return;
 }
 
-void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, 
-			     const gsl_matrix *UtW, const gsl_matrix *UtY, 
-			     const gsl_vector *env) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl; 
-	  return;
-	}
-
-	clock_t time_start=clock();
-	time_UtX=0; time_opt=0;
-
-	char ch[1];
-	bitset<8> b;
-
-	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
-	double crt_a, crt_b, crt_c;
-	int n_bit, n_miss, ci_total, ci_test;
-	double geno, x_mean;
-	size_t c=0;
-	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
-	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
-	// Large matrices for EM.
-	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
-	// Large matrices for NR.
-	// Each dxd block is H_k^{-1}.
-	gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-	
-	// Each column is H_k^{-1}y_k
-	gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-	
-	// Each dcxdc block is x_k\otimes H_k^{-1}.
-	gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);	
-	gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
-	gsl_vector *x=gsl_vector_alloc (n_size);
-
-	gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
-	gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
-	gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_vector *beta=gsl_vector_alloc (d_size);
-	gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
-	// Null estimates for initial values.
-	gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
-	gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
-	gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
-	gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
-
-	gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size);
-	gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1);
-	gsl_matrix_view xHi_all_sub1=
-	  gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size);
-
-	gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
-	gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
-	gsl_matrix_view xHi_all_sub2=
-	  gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
-	gsl_matrix_transpose_memcpy (Y, UtY);
-
-	gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size);
-	gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
-	gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
-	gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
-
-	gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1);
-	gsl_vector_set_zero(&X_row1.vector);
-	gsl_vector_view X_row2=gsl_matrix_row(X, c_size);
-	gsl_vector_set_zero(&X_row2.vector);
-
-	gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1);
-	gsl_vector_set_zero(&B_col1.vector);
-	gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
-	gsl_vector_set_zero(&B_col2.vector);
-
-	MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, 
-		   Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
-
-	logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, 
-		       E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, 
-		       UltVehiE, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, 
-		       Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, 
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, 
-		     &B_sub1.matrix, se_B_null1);
-
-	c=0;
-	Vg_remle_null.clear();
-	Ve_remle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
-						    c+v_size));
-	    c++;
-	  }
-	}
-	beta_remle_null.clear();
-	se_beta_remle_null.clear();
-	for (size_t i=0; i<se_B_null1->size1; i++) {
-	  for (size_t j=0; j<se_B_null1->size2; j++) {
-	    beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
-	  }
-	}
-	logl_remle_H0=logl_H0;
-
-	cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-	cout.precision(4);
-	cout<<"REMLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-	logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, 
-		       U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, 
-		       UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
-	logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, 
-		       Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, 
-		       Hessian, crt_a, crt_b, crt_c);
-	MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, 
-		     &B_sub1.matrix, se_B_null1);
-
-	c=0;
-	Vg_mle_null.clear();
-	Ve_mle_null.clear();
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
-	    Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
-	    VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
-	    VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
-	    c++;
-	  }
-	}
-	beta_mle_null.clear();
-	se_beta_mle_null.clear();
-	for (size_t i=0; i<se_B_null1->size1; i++) {
-	  for (size_t j=0; j<se_B_null1->size2; j++) {
-	    beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
-	    se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
-	  }
-	}
-	logl_mle_H0=logl_H0;
-
-	cout<<"MLE estimate for Vg in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_g, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Vg): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE estimate for Ve in the null model: "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    cout<<gsl_matrix_get(V_e, i, j)<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"se(Ve): "<<endl;
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=0; j<=i; j++) {
-	    c=GetIndex(i, j, d_size);
-	    cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
-	  }
-	  cout<<endl;
-	}
-	cout<<"MLE likelihood = "<<logl_H0<<endl;
-
-	vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-	for (size_t i=0; i<d_size; i++) {
-	  v_beta.push_back(0.0);
-	}
-	for (size_t i=0; i<d_size; i++) {
-	  for (size_t j=i; j<d_size; j++) {
-	    v_Vg.push_back(0.0);
-	    v_Ve.push_back(0.0);
-	    v_Vbeta.push_back(0.0);
-	  }
-	}
-
-	gsl_matrix_memcpy (V_g_null, V_g);
-	gsl_matrix_memcpy (V_e_null, V_e);
-	gsl_matrix_memcpy (B_null, B);
-
-	// Start reading genotypes and analyze.
-	// Calculate n_bit and c, the number of bit for each SNP.
-	if (ni_total%4==0) {n_bit=ni_total/4;}
-	else {n_bit=ni_total/4+1; }
-
-	// Print the first three magic numbers.
-	for (int i=0; i<3; ++i) {
-	  infile.read(ch,1);
-	  b=ch[0];
-	}
-
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-	  if (t%d_pace==0 || t==snpInfo.size()-1) {
-	    ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-	  }
-	  if (indicator_snp[t]==0) {continue;}
-	  
-	  // n_bit, and 3 is the number of magic numbers.
-	  infile.seekg(t*n_bit+3);
-
-	  // Read genotypes.
-	  x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0;
-	  for (int i=0; i<n_bit; ++i) {
-	    infile.read(ch,1);
-	    b=ch[0];
-
-	    // Minor allele homozygous: 2.0; major: 0.0.
-	    for (size_t j=0; j<4; ++j) {                
-	      
-	      if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
-	      if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-	      
-	      if (b[2*j]==0) {
-		if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
-		else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
-	      }
-	      else {
-		if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
-		else {gsl_vector_set(x, ci_test, -9); n_miss++; }
-	      }
-	      
-	      ci_total++;
-	      ci_test++;
-	    }
-	  }
-	  
-	  x_mean/=(double)(ni_test-n_miss);
-	  
-	  for (size_t i=0; i<ni_test; ++i) {
-	    geno=gsl_vector_get(x,i);
-	    if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
-	    if (x_mean>1) {
-	      gsl_vector_set(x, i, 2-geno);
-	    }
-	  }
-	  
-	  // Calculate statistics.
-	  time_start=clock();
-	  gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
-	  gsl_vector_mul (x, env);
-	  gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
-	  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-	  
-	  // Initial values.
-	  gsl_matrix_memcpy (V_g, V_g_null);
-	  gsl_matrix_memcpy (V_e, V_e_null);
-	  gsl_matrix_memcpy (B, B_null);
-	  
-	  if (a_mode==2 || a_mode==3 || a_mode==4) {
-	    if (a_mode==3 || a_mode==4) {
-	      logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, 
-			     &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, 
-			     UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, 
-			     V_e, &B_sub2.matrix);
-	      logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, 
-			     &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, 
-			     Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-	      MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, 
-			   &B_sub2.matrix, se_B_null2);
-	    }
-	    
-	    if (a_mode==2 || a_mode==4) {
-	      logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, 
-			     &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, 
-			     UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, 
-			     V_e, &B_sub2.matrix);
-	      logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, 
-			     &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, 
-			     Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-	      MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, 
-			   &B_sub2.matrix, se_B_null2);
-	    }
-	  }
-	  
-	  time_start=clock();
-	  
-	  // 3 is before 1.
-	  if (a_mode==3 || a_mode==4) {
-	    p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, 
-			      V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-	    
-	    if (p_score<p_nr && crt==1) {
-	      logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, 
-			     Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-	      p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-	    }
-	  }
-	  
-	  if (a_mode==2 || a_mode==4) {
-	    logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, 
-			   E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, 
-			   UltVehiU, UltVehiE, V_g, V_e, B);
-
-	    // Calculate beta and Vbeta.
-	    p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, 
-			    V_e, UltVehiY, beta, Vbeta);
-	    p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-	    
-	    if (p_lrt<p_nr) {
-	      logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, 
-			     xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, 
-			     crt_b, crt_c);
-	      
-	      // Calculate beta and Vbeta.
-	      p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, 
-			      V_e, UltVehiY, beta, Vbeta);
-	      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-	      if (crt==1) {
-		p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-	      }
-	    }
-	  }
-	  
-	  if (a_mode==1 || a_mode==4) {
-	    logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, 
-			   E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, 
-			   UltVehiU, UltVehiE, V_g, V_e, B);
-	    p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, 
-			     V_e, UltVehiY, beta, Vbeta);
-	    
-	    if (p_wald<p_nr) {
-	      logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, 
-			     xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, 
-			     crt_b, crt_c);
-	      p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, 
-			       V_e, UltVehiY, beta, Vbeta);
-	      
-	      if (crt==1) {
-		p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-	      }
-	    }
-	  }
-	  
-	  if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
-	  time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	  // Store summary data.
-	  for (size_t i=0; i<d_size; i++) {
-	    v_beta[i]=gsl_vector_get (beta, i);
-	  }
-	  
-	  c=0;
-	  for (size_t i=0; i<d_size; i++) {
-	    for (size_t j=i; j<d_size; j++) {
-	      v_Vg[c]=gsl_matrix_get (V_g, i, j);
-	      v_Ve[c]=gsl_matrix_get (V_e, i, j);
-	      v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-	      c++;
-	    }
-	  }
-	  
-	  MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, 
-			   v_Vg, v_Ve, v_Vbeta};
-	  sumStat.push_back(SNPs);
-	}
-	cout<<endl;
-
-	infile.close();
-	infile.clear();
-
-	gsl_matrix_free(U_hat);
-	gsl_matrix_free(E_hat);
-	gsl_matrix_free(OmegaU);
-	gsl_matrix_free(OmegaE);
-	gsl_matrix_free(UltVehiY);
-	gsl_matrix_free(UltVehiBX);
-	gsl_matrix_free(UltVehiU);
-	gsl_matrix_free(UltVehiE);
-
-	gsl_matrix_free(Hi_all);
-	gsl_matrix_free(Hiy_all);
-	gsl_matrix_free(xHi_all);
-	gsl_matrix_free(Hessian);
-
-	gsl_vector_free(x);
-
-	gsl_matrix_free(Y);
-	gsl_matrix_free(X);
-	gsl_matrix_free(V_g);
-	gsl_matrix_free(V_e);
-	gsl_matrix_free(B);
-	gsl_vector_free(beta);
-	gsl_matrix_free(Vbeta);
-
-	gsl_matrix_free(V_g_null);
-	gsl_matrix_free(V_e_null);
-	gsl_matrix_free(B_null);
-	gsl_matrix_free(se_B_null1);
-	gsl_matrix_free(se_B_null2);
-
-	return;
+void MVLMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+                            const gsl_matrix *UtW, const gsl_matrix *UtY,
+                            const gsl_vector *env) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  clock_t time_start = clock();
+  time_UtX = 0;
+  time_opt = 0;
+
+  char ch[1];
+  bitset<8> b;
+
+  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+  double crt_a, crt_b, crt_c;
+  int n_bit, n_miss, ci_total, ci_test;
+  double geno, x_mean;
+  size_t c = 0;
+  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2;
+  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+  // Large matrices for EM.
+  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+  // Large matrices for NR.
+  // Each dxd block is H_k^{-1}.
+  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+  // Each column is H_k^{-1}y_k
+  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+  // Each dcxdc block is x_k\otimes H_k^{-1}.
+  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+  gsl_vector *x = gsl_vector_alloc(n_size);
+
+  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_vector *beta = gsl_vector_alloc(d_size);
+  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+  // Null estimates for initial values.
+  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+  gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1);
+  gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size);
+
+  gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size);
+  gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1);
+  gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix(
+      xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size);
+
+  gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+  gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+  gsl_matrix_view xHi_all_sub2 =
+      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+  gsl_matrix_transpose_memcpy(Y, UtY);
+
+  gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size);
+  gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW);
+  gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2);
+  gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
+
+  gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1);
+  gsl_vector_set_zero(&X_row1.vector);
+  gsl_vector_view X_row2 = gsl_matrix_row(X, c_size);
+  gsl_vector_set_zero(&X_row2.vector);
+
+  gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1);
+  gsl_vector_set_zero(&B_col1.vector);
+  gsl_vector_view B_col2 = gsl_matrix_column(B, c_size);
+  gsl_vector_set_zero(&B_col2.vector);
+
+  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min,
+             l_max, n_region, V_g, V_e, &B_sub1.matrix);
+
+  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub1.matrix);
+  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+                  &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+                  crt_b, crt_c);
+  MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+              se_B_null1);
+
+  c = 0;
+  Vg_remle_null.clear();
+  Ve_remle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_remle_null.clear();
+  se_beta_remle_null.clear();
+  for (size_t i = 0; i < se_B_null1->size1; i++) {
+    for (size_t j = 0; j < se_B_null1->size2; j++) {
+      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+    }
+  }
+  logl_remle_H0 = logl_H0;
+
+  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+  cout.precision(4);
+  cout << "REMLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "REMLE likelihood = " << logl_H0 << endl;
+
+  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+                  V_e, &B_sub1.matrix);
+  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+                  &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+                  crt_b, crt_c);
+  MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+              se_B_null1);
+
+  c = 0;
+  Vg_mle_null.clear();
+  Ve_mle_null.clear();
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+      c++;
+    }
+  }
+  beta_mle_null.clear();
+  se_beta_mle_null.clear();
+  for (size_t i = 0; i < se_B_null1->size1; i++) {
+    for (size_t j = 0; j < se_B_null1->size2; j++) {
+      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+    }
+  }
+  logl_mle_H0 = logl_H0;
+
+  cout << "MLE estimate for Vg in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_g, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Vg): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE estimate for Ve in the null model: " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      cout << gsl_matrix_get(V_e, i, j) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "se(Ve): " << endl;
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = 0; j <= i; j++) {
+      c = GetIndex(i, j, d_size);
+      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+    }
+    cout << endl;
+  }
+  cout << "MLE likelihood = " << logl_H0 << endl;
+
+  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+  for (size_t i = 0; i < d_size; i++) {
+    v_beta.push_back(0.0);
+  }
+  for (size_t i = 0; i < d_size; i++) {
+    for (size_t j = i; j < d_size; j++) {
+      v_Vg.push_back(0.0);
+      v_Ve.push_back(0.0);
+      v_Vbeta.push_back(0.0);
+    }
+  }
+
+  gsl_matrix_memcpy(V_g_null, V_g);
+  gsl_matrix_memcpy(V_e_null, V_e);
+  gsl_matrix_memcpy(B_null, B);
+
+  // Start reading genotypes and analyze.
+  // Calculate n_bit and c, the number of bit for each SNP.
+  if (ni_total % 4 == 0) {
+    n_bit = ni_total / 4;
+  } else {
+    n_bit = ni_total / 4 + 1;
+  }
+
+  // Print the first three magic numbers.
+  for (int i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+    if (indicator_snp[t] == 0) {
+      continue;
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    for (int i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+
+        if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 0) {
+          ci_total++;
+          continue;
+        }
+
+        if (b[2 * j] == 0) {
+          if (b[2 * j + 1] == 0) {
+            gsl_vector_set(x, ci_test, 2);
+            x_mean += 2.0;
+          } else {
+            gsl_vector_set(x, ci_test, 1);
+            x_mean += 1.0;
+          }
+        } else {
+          if (b[2 * j + 1] == 1) {
+            gsl_vector_set(x, ci_test, 0);
+          } else {
+            gsl_vector_set(x, ci_test, -9);
+            n_miss++;
+          }
+        }
+
+        ci_total++;
+        ci_test++;
+      }
+    }
+
+    x_mean /= (double)(ni_test - n_miss);
+
+    for (size_t i = 0; i < ni_test; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+      if (x_mean > 1) {
+        gsl_vector_set(x, i, 2 - geno);
+      }
+    }
+
+    // Calculate statistics.
+    time_start = clock();
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+    gsl_vector_mul(x, env);
+    gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Initial values.
+    gsl_matrix_memcpy(V_g, V_g_null);
+    gsl_matrix_memcpy(V_e, V_e_null);
+    gsl_matrix_memcpy(B, B_null);
+
+    if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
+      if (a_mode == 3 || a_mode == 4) {
+        logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+                        Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+                        UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+        logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+                        Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+                        Hessian, crt_a, crt_b, crt_c);
+        MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+                    se_B_null2);
+      }
+
+      if (a_mode == 2 || a_mode == 4) {
+        logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+                        Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+                        UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+        logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+                        Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+                        Hessian, crt_a, crt_b, crt_c);
+        MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+                    se_B_null2);
+      }
+    }
+
+    time_start = clock();
+
+    // 3 is before 1.
+    if (a_mode == 3 || a_mode == 4) {
+      p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null,
+                         V_e_null, UltVehiY, beta, Vbeta);
+
+      if (p_score < p_nr && crt == 1) {
+        logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                        Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+        p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+      }
+    }
+
+    if (a_mode == 2 || a_mode == 4) {
+      logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                      V_g, V_e, B);
+
+      // Calculate beta and Vbeta.
+      p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                       UltVehiY, beta, Vbeta);
+      p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+      if (p_lrt < p_nr) {
+        logl_H1 =
+            MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                  Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+        // Calculate beta and Vbeta.
+        p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                         UltVehiY, beta, Vbeta);
+        p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+        if (crt == 1) {
+          p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+        }
+      }
+    }
+
+    if (a_mode == 1 || a_mode == 4) {
+      logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+                      OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+                      V_g, V_e, B);
+      p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                        UltVehiY, beta, Vbeta);
+
+      if (p_wald < p_nr) {
+        logl_H1 =
+            MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+                  Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+        p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+                          UltVehiY, beta, Vbeta);
+
+        if (crt == 1) {
+          p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+        }
+      }
+    }
+
+    if (x_mean > 1) {
+      gsl_vector_scale(beta, -1.0);
+    }
+
+    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    // Store summary data.
+    for (size_t i = 0; i < d_size; i++) {
+      v_beta[i] = gsl_vector_get(beta, i);
+    }
+
+    c = 0;
+    for (size_t i = 0; i < d_size; i++) {
+      for (size_t j = i; j < d_size; j++) {
+        v_Vg[c] = gsl_matrix_get(V_g, i, j);
+        v_Ve[c] = gsl_matrix_get(V_e, i, j);
+        v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+        c++;
+      }
+    }
+
+    MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+    sumStat.push_back(SNPs);
+  }
+  cout << endl;
+
+  infile.close();
+  infile.clear();
+
+  gsl_matrix_free(U_hat);
+  gsl_matrix_free(E_hat);
+  gsl_matrix_free(OmegaU);
+  gsl_matrix_free(OmegaE);
+  gsl_matrix_free(UltVehiY);
+  gsl_matrix_free(UltVehiBX);
+  gsl_matrix_free(UltVehiU);
+  gsl_matrix_free(UltVehiE);
+
+  gsl_matrix_free(Hi_all);
+  gsl_matrix_free(Hiy_all);
+  gsl_matrix_free(xHi_all);
+  gsl_matrix_free(Hessian);
+
+  gsl_vector_free(x);
+
+  gsl_matrix_free(Y);
+  gsl_matrix_free(X);
+  gsl_matrix_free(V_g);
+  gsl_matrix_free(V_e);
+  gsl_matrix_free(B);
+  gsl_vector_free(beta);
+  gsl_matrix_free(Vbeta);
+
+  gsl_matrix_free(V_g_null);
+  gsl_matrix_free(V_e_null);
+  gsl_matrix_free(B_null);
+  gsl_matrix_free(se_B_null1);
+  gsl_matrix_free(se_B_null2);
+
+  return;
 }