about summary refs log tree commit diff
path: root/src/prdt.cpp
diff options
context:
space:
mode:
authorPeter Carbonetto2017-08-02 13:07:23 -0500
committerGitHub2017-08-02 13:07:23 -0500
commitadaf9557f776ca274b51e921af0542ef1b84eb61 (patch)
tree8f7581bec20eb815848d02f926cb949ed038008b /src/prdt.cpp
parent84360c191f418bf8682b35e0c8235fcc3bd19a06 (diff)
parent1e49e7a27d0f4b811a87c64db1e875779766f6b0 (diff)
downloadpangemma-adaf9557f776ca274b51e921af0542ef1b84eb61.tar.gz
Merge pull request #62 from genenetwork/spacing
Spacing and coding style.
Diffstat (limited to 'src/prdt.cpp')
-rw-r--r--src/prdt.cpp988
1 files changed, 499 insertions, 489 deletions
diff --git a/src/prdt.cpp b/src/prdt.cpp
index b29d150..3e7c004 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -16,527 +16,537 @@
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
-#include <iostream>
-#include <sstream>
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
+#include <bitset>
+#include <cmath>
 #include <fstream>
-#include <string>
 #include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
+#include <sstream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <cmath>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
-#include "gsl/gsl_blas.h"
+#include <string>
+#include <vector>
 
-#include "io.h"
-#include "lapack.h"
 #include "gzstream.h"
 #include "io.h"
-#include "prdt.h"
+#include "io.h"
+#include "lapack.h"
 #include "mathfunc.h"
+#include "prdt.h"
 
 using namespace std;
 
-void PRDT::CopyFromParam (PARAM &cPar) {
-	a_mode=cPar.a_mode;
-	d_pace=cPar.d_pace;
+void PRDT::CopyFromParam(PARAM &cPar) {
+  a_mode = cPar.a_mode;
+  d_pace = cPar.d_pace;
 
-	file_bfile=cPar.file_bfile;
-	file_geno=cPar.file_geno;
-	file_out=cPar.file_out;
-	path_out=cPar.path_out;
+  file_bfile = cPar.file_bfile;
+  file_geno = cPar.file_geno;
+  file_out = cPar.file_out;
+  path_out = cPar.path_out;
 
-	indicator_pheno=cPar.indicator_pheno;
-	indicator_cvt=cPar.indicator_cvt;
-	indicator_idv=cPar.indicator_idv;
+  indicator_pheno = cPar.indicator_pheno;
+  indicator_cvt = cPar.indicator_cvt;
+  indicator_idv = cPar.indicator_idv;
 
-	snpInfo=cPar.snpInfo;
-	mapRS2est=cPar.mapRS2est;
+  snpInfo = cPar.snpInfo;
+  mapRS2est = cPar.mapRS2est;
 
-	time_eigen=0;
+  time_eigen = 0;
 
-	n_ph=cPar.n_ph;
-	np_obs=cPar.np_obs;
-	np_miss=cPar.np_miss;
-	ns_total=cPar.ns_total;
-	ns_test=0;
+  n_ph = cPar.n_ph;
+  np_obs = cPar.np_obs;
+  np_miss = cPar.np_miss;
+  ns_total = cPar.ns_total;
+  ns_test = 0;
 
-	return;
+  return;
 }
 
-void PRDT::CopyToParam (PARAM &cPar) {
-	cPar.ns_test=ns_test;
-	cPar.time_eigen=time_eigen;
+void PRDT::CopyToParam(PARAM &cPar) {
+  cPar.ns_test = ns_test;
+  cPar.time_eigen = time_eigen;
 
-	return;
+  return;
 }
 
-void PRDT::WriteFiles (gsl_vector *y_prdt) {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".";
-	file_str+="prdt";
-	file_str+=".txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	size_t ci_test=0;
-	for (size_t i=0; i<indicator_idv.size(); i++) {
-		if (indicator_idv[i]==1) {
-			outfile<<"NA"<<endl;
-		} else {
-			outfile<<gsl_vector_get (y_prdt, ci_test)<<endl;
-			ci_test++;
-		}
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+void PRDT::WriteFiles(gsl_vector *y_prdt) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".";
+  file_str += "prdt";
+  file_str += ".txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  size_t ci_test = 0;
+  for (size_t i = 0; i < indicator_idv.size(); i++) {
+    if (indicator_idv[i] == 1) {
+      outfile << "NA" << endl;
+    } else {
+      outfile << gsl_vector_get(y_prdt, ci_test) << endl;
+      ci_test++;
+    }
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
-void PRDT::WriteFiles (gsl_matrix *Y_full)  {
-	string file_str;
-	file_str=path_out+"/"+file_out;
-	file_str+=".prdt.txt";
-
-	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {
-	  cout<<"error writing file: "<<file_str.c_str()<<endl;
-	  return;
-	}
-
-	size_t ci_test=0;
-	for (size_t i=0; i<indicator_cvt.size(); i++) {
-		if (indicator_cvt[i]==0) {
-			outfile<<"NA"<<endl;
-		} else {
-			for (size_t j=0; j<Y_full->size2; j++) {
-				outfile << gsl_matrix_get(Y_full,ci_test,j) <<
-				  "\t";
-			}
-			outfile<<endl;
-			ci_test++;
-		}
-	}
-
-	outfile.close();
-	outfile.clear();
-	return;
+void PRDT::WriteFiles(gsl_matrix *Y_full) {
+  string file_str;
+  file_str = path_out + "/" + file_out;
+  file_str += ".prdt.txt";
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str.c_str() << endl;
+    return;
+  }
+
+  size_t ci_test = 0;
+  for (size_t i = 0; i < indicator_cvt.size(); i++) {
+    if (indicator_cvt[i] == 0) {
+      outfile << "NA" << endl;
+    } else {
+      for (size_t j = 0; j < Y_full->size2; j++) {
+        outfile << gsl_matrix_get(Y_full, ci_test, j) << "\t";
+      }
+      outfile << endl;
+      ci_test++;
+    }
+  }
+
+  outfile.close();
+  outfile.clear();
+  return;
 }
 
-void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
-	size_t ni_test=u_hat->size, ni_total=G->size1;
-
-	gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test);
-	gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test);
-	gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test);
-	gsl_vector *eval=gsl_vector_alloc (ni_test);
-	gsl_vector *Utu=gsl_vector_alloc (ni_test);
-	gsl_vector *w=gsl_vector_alloc (ni_total);
-	gsl_permutation *pmt=gsl_permutation_alloc (ni_test);
-
-	//center matrix G based on indicator_idv
-	for (size_t i=0; i<ni_total; i++) {
-		gsl_vector_set(w, i, indicator_idv[i]);
-	}
-	CenterMatrix(G, w);
-
-	//obtain Koo and Kfo
-	size_t o_i=0, o_j=0;
-	double d;
-	for (size_t i=0; i<indicator_idv.size(); i++) {
-		o_j=0;
-		for (size_t j=0; j<indicator_idv.size(); j++) {
-			d=gsl_matrix_get(G, i, j);
-			if (indicator_idv[i]==1 && indicator_idv[j]==1) {
-				gsl_matrix_set(Goo, o_i, o_j, d);
-			}
-			if (indicator_idv[i]==0 && indicator_idv[j]==1) {
-				gsl_matrix_set(Gfo, i-o_i, o_j, d);
-			}
-			if (indicator_idv[j]==1) {o_j++;}
-		}
-		if (indicator_idv[i]==1) {o_i++;}
-	}
-
-	//matrix operations to get u_prdt
-	cout<<"Start Eigen-Decomposition..."<<endl;
-	clock_t time_start=clock();
-	EigenDecomp (Goo, U, eval, 0);
-	for (size_t i=0; i<eval->size; i++) {
-		if (gsl_vector_get(eval,i)<1e-10) {
-		  gsl_vector_set(eval, i, 0);
-		}
-	}
-
-	time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-	gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu);
-	for (size_t i=0; i<eval->size; i++) {
-		d=gsl_vector_get(eval, i);
-		if (d!=0) {
-		  d=gsl_vector_get(Utu, i)/d;
-		  gsl_vector_set(Utu, i, d);
-		}
-	}
-	gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
-
-	// Free matrices.
-	gsl_matrix_free(Goo);
-	gsl_matrix_free(Gfo);
-	gsl_matrix_free(U);
-	gsl_vector_free(eval);
-	gsl_vector_free(Utu);
-	gsl_vector_free(w);
-	gsl_permutation_free(pmt);
-
-	return;
+void PRDT::AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
+  size_t ni_test = u_hat->size, ni_total = G->size1;
+
+  gsl_matrix *Goo = gsl_matrix_alloc(ni_test, ni_test);
+  gsl_matrix *Gfo = gsl_matrix_alloc(ni_total - ni_test, ni_test);
+  gsl_matrix *U = gsl_matrix_alloc(ni_test, ni_test);
+  gsl_vector *eval = gsl_vector_alloc(ni_test);
+  gsl_vector *Utu = gsl_vector_alloc(ni_test);
+  gsl_vector *w = gsl_vector_alloc(ni_total);
+  gsl_permutation *pmt = gsl_permutation_alloc(ni_test);
+
+  // center matrix G based on indicator_idv
+  for (size_t i = 0; i < ni_total; i++) {
+    gsl_vector_set(w, i, indicator_idv[i]);
+  }
+  CenterMatrix(G, w);
+
+  // obtain Koo and Kfo
+  size_t o_i = 0, o_j = 0;
+  double d;
+  for (size_t i = 0; i < indicator_idv.size(); i++) {
+    o_j = 0;
+    for (size_t j = 0; j < indicator_idv.size(); j++) {
+      d = gsl_matrix_get(G, i, j);
+      if (indicator_idv[i] == 1 && indicator_idv[j] == 1) {
+        gsl_matrix_set(Goo, o_i, o_j, d);
+      }
+      if (indicator_idv[i] == 0 && indicator_idv[j] == 1) {
+        gsl_matrix_set(Gfo, i - o_i, o_j, d);
+      }
+      if (indicator_idv[j] == 1) {
+        o_j++;
+      }
+    }
+    if (indicator_idv[i] == 1) {
+      o_i++;
+    }
+  }
+
+  // matrix operations to get u_prdt
+  cout << "Start Eigen-Decomposition..." << endl;
+  clock_t time_start = clock();
+  EigenDecomp(Goo, U, eval, 0);
+  for (size_t i = 0; i < eval->size; i++) {
+    if (gsl_vector_get(eval, i) < 1e-10) {
+      gsl_vector_set(eval, i, 0);
+    }
+  }
+
+  time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+  gsl_blas_dgemv(CblasTrans, 1.0, U, u_hat, 0.0, Utu);
+  for (size_t i = 0; i < eval->size; i++) {
+    d = gsl_vector_get(eval, i);
+    if (d != 0) {
+      d = gsl_vector_get(Utu, i) / d;
+      gsl_vector_set(Utu, i, d);
+    }
+  }
+  gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, eval);
+  gsl_blas_dgemv(CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
+
+  // Free matrices.
+  gsl_matrix_free(Goo);
+  gsl_matrix_free(Gfo);
+  gsl_matrix_free(U);
+  gsl_vector_free(eval);
+  gsl_vector_free(Utu);
+  gsl_vector_free(w);
+  gsl_permutation_free(pmt);
+
+  return;
 }
 
-void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
-	igzstream infile (file_geno.c_str(), igzstream::in);
-	if (!infile) {
-	  cout<<"error reading genotype file:"<<file_geno<<endl;
-	  return;
-	}
-
-	string line;
-	char *ch_ptr;
-	string rs;
-
-	size_t n_miss, n_train_nomiss, c_phen;
-	double geno, x_mean, x_train_mean, effect_size;
-
-	gsl_vector *x=gsl_vector_alloc (y_prdt->size);
-	gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size);
-
-	ns_test=0;
-
-	// Start reading genotypes and analyze.
-	for (size_t t=0; t<ns_total; ++t) {
-		!safeGetline(infile, line).eof();
-		if (t%d_pace==0 || t==(ns_total-1)) {
-		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
-		}
-
-		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		rs=ch_ptr;
-		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");
-
-		if (mapRS2est.count(rs)==0) {
-		  continue;
-		} else {
-		  effect_size=mapRS2est[rs];
-		}
-
-		x_mean=0.0;
-		c_phen=0;
-		n_miss=0;
-		x_train_mean=0;
-		n_train_nomiss=0;
-
-		gsl_vector_set_zero(x_miss);
-
-		for (size_t i=0; i<indicator_idv.size(); ++i) {
-			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==1) {
-				if (strcmp(ch_ptr, "NA")!=0) {
-					geno=atof(ch_ptr);
-					x_train_mean+=geno;
-					n_train_nomiss++;
-				}
-			} else {
-				if (strcmp(ch_ptr, "NA")==0) {
-					gsl_vector_set(x_miss, c_phen, 0.0);
-					n_miss++;
-				} else {
-					geno=atof(ch_ptr);
-
-					gsl_vector_set(x, c_phen, geno);
-					gsl_vector_set(x_miss, c_phen, 1.0);
-					x_mean+=geno;
-				}
-				c_phen++;
-			}
-		}
-
-		if (x->size==n_miss) {
-		  cout << "snp " << rs << " has missing genotype for all " <<
-		    "individuals and will be ignored." << endl;
-		  continue;}
-
-
-		x_mean/=(double)(x->size-n_miss);
-		x_train_mean/=(double)(n_train_nomiss);
-
-
-		for (size_t i=0; i<x->size; ++i) {
-			geno=gsl_vector_get(x, i);
-			if (gsl_vector_get (x_miss, i)==0) {
-				gsl_vector_set(x, i, x_mean-x_train_mean);
-			} else {
-				gsl_vector_set(x, i, geno-x_train_mean);
-			}
-		}
-
-		gsl_vector_scale (x, effect_size);
-		gsl_vector_add (y_prdt, x);
-
-		ns_test++;
-	}
-	cout<<endl;
-
-	gsl_vector_free (x);
-	gsl_vector_free (x_miss);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) {
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  if (!infile) {
+    cout << "error reading genotype file:" << file_geno << endl;
+    return;
+  }
+
+  string line;
+  char *ch_ptr;
+  string rs;
+
+  size_t n_miss, n_train_nomiss, c_phen;
+  double geno, x_mean, x_train_mean, effect_size;
+
+  gsl_vector *x = gsl_vector_alloc(y_prdt->size);
+  gsl_vector *x_miss = gsl_vector_alloc(y_prdt->size);
+
+  ns_test = 0;
+
+  // Start reading genotypes and analyze.
+  for (size_t t = 0; t < ns_total; ++t) {
+    !safeGetline(infile, line).eof();
+    if (t % d_pace == 0 || t == (ns_total - 1)) {
+      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+    }
+
+    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    rs = ch_ptr;
+    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok(NULL, " , \t");
+
+    if (mapRS2est.count(rs) == 0) {
+      continue;
+    } else {
+      effect_size = mapRS2est[rs];
+    }
+
+    x_mean = 0.0;
+    c_phen = 0;
+    n_miss = 0;
+    x_train_mean = 0;
+    n_train_nomiss = 0;
+
+    gsl_vector_set_zero(x_miss);
+
+    for (size_t i = 0; i < indicator_idv.size(); ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      if (indicator_idv[i] == 1) {
+        if (strcmp(ch_ptr, "NA") != 0) {
+          geno = atof(ch_ptr);
+          x_train_mean += geno;
+          n_train_nomiss++;
+        }
+      } else {
+        if (strcmp(ch_ptr, "NA") == 0) {
+          gsl_vector_set(x_miss, c_phen, 0.0);
+          n_miss++;
+        } else {
+          geno = atof(ch_ptr);
+
+          gsl_vector_set(x, c_phen, geno);
+          gsl_vector_set(x_miss, c_phen, 1.0);
+          x_mean += geno;
+        }
+        c_phen++;
+      }
+    }
+
+    if (x->size == n_miss) {
+      cout << "snp " << rs << " has missing genotype for all "
+           << "individuals and will be ignored." << endl;
+      continue;
+    }
+
+    x_mean /= (double)(x->size - n_miss);
+    x_train_mean /= (double)(n_train_nomiss);
+
+    for (size_t i = 0; i < x->size; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (gsl_vector_get(x_miss, i) == 0) {
+        gsl_vector_set(x, i, x_mean - x_train_mean);
+      } else {
+        gsl_vector_set(x, i, geno - x_train_mean);
+      }
+    }
+
+    gsl_vector_scale(x, effect_size);
+    gsl_vector_add(y_prdt, x);
+
+    ns_test++;
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+  gsl_vector_free(x_miss);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
-void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
-	string file_bed=file_bfile+".bed";
-	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {
-	  cout<<"error reading bed file:"<<file_bed<<endl;
-	  return;
-	}
-
-	char ch[1];
-	bitset<8> b;
-	string rs;
-
-	size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
-	double geno, x_mean, x_train_mean, effect_size;
-
-	gsl_vector *x=gsl_vector_alloc (y_prdt->size);
-
-	// Calculate n_bit and c, the number of bit for each SNP.
-	if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;}
-	else {n_bit=indicator_idv.size()/4+1; }
-
-	// Print the first 3 magic numbers.
-	for (size_t i=0; i<3; ++i) {
-		infile.read(ch,1);
-		b=ch[0];
-	}
-
-	ns_test=0;
-
-	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-		if (t%d_pace==0 || t==snpInfo.size()-1) {
-		  ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
-		}
-
-		rs=snpInfo[t].rs_number;
-
-		if (mapRS2est.count(rs)==0) {
-		  continue;
-		} else {
-		  effect_size=mapRS2est[rs];
-		}
-
-		// n_bit, and 3 is the number of magic numbers.
-		infile.seekg(t*n_bit+3);
-
-		// Read genotypes.
-		x_mean=0.0;
-		n_miss=0;
-		ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0;
-		for (size_t i=0; i<n_bit; ++i) {
-			infile.read(ch,1);
-			b=ch[0];
-
-			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {
-				if ((i==(n_bit-1)) &&
-				    ci_total==indicator_idv.size()) {
-				  break;
-				}
-				if (indicator_idv[ci_total]==1) {
-					if (b[2*j]==0) {
-						if (b[2*j+1]==0) {
-						  x_train_mean+=2.0;
-						  n_train_nomiss++;
-						}
-						else {
-						  x_train_mean+=1.0;
-						  n_train_nomiss++;
-						}
-					}
-					else {
-						if (b[2*j+1]==1) {
-						  n_train_nomiss++;
-						}
-						else {}
-					}
-				} else {
-					if (b[2*j]==0) {
-						if (b[2*j+1]==0) {
-						  gsl_vector_set(x,ci_test,2);
-						  x_mean+=2.0;
-						}
-						else {
-						  gsl_vector_set(x,ci_test,1);
-						  x_mean+=1.0;
-						}
-					}
-					else {
-						if (b[2*j+1]==1) {
-						  gsl_vector_set(x,ci_test,0);
-						}
-						else {
-						  gsl_vector_set(x,ci_test,-9);
-						  n_miss++;
-						}
-					}
-					ci_test++;
-				}
-				ci_total++;
-
-			}
-		}
-
-		if (x->size==n_miss) {
-		  cout << "snp " << rs << " has missing genotype for all " <<
-		    "individuals and will be ignored."<<endl;
-		  continue;
-		}
-
-		x_mean/=(double)(x->size-n_miss);
-		x_train_mean/=(double)(n_train_nomiss);
-
-		for (size_t i=0; i<x->size; ++i) {
-			geno=gsl_vector_get(x, i);
-			if (geno==-9) {
-				gsl_vector_set(x, i, x_mean-x_train_mean);
-			} else {
-				gsl_vector_set(x, i, geno-x_train_mean);
-			}
-		}
-
-		gsl_vector_scale (x, effect_size);
-		gsl_vector_add (y_prdt, x);
-
-		ns_test++;
-	}
-	cout<<endl;
-
-	gsl_vector_free (x);
-
-	infile.close();
-	infile.clear();
-
-	return;
+void PRDT::AnalyzePlink(gsl_vector *y_prdt) {
+  string file_bed = file_bfile + ".bed";
+  ifstream infile(file_bed.c_str(), ios::binary);
+  if (!infile) {
+    cout << "error reading bed file:" << file_bed << endl;
+    return;
+  }
+
+  char ch[1];
+  bitset<8> b;
+  string rs;
+
+  size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
+  double geno, x_mean, x_train_mean, effect_size;
+
+  gsl_vector *x = gsl_vector_alloc(y_prdt->size);
+
+  // Calculate n_bit and c, the number of bit for each SNP.
+  if (indicator_idv.size() % 4 == 0) {
+    n_bit = indicator_idv.size() / 4;
+  } else {
+    n_bit = indicator_idv.size() / 4 + 1;
+  }
+
+  // Print the first 3 magic numbers.
+  for (size_t i = 0; i < 3; ++i) {
+    infile.read(ch, 1);
+    b = ch[0];
+  }
+
+  ns_test = 0;
+
+  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+    }
+
+    rs = snpInfo[t].rs_number;
+
+    if (mapRS2est.count(rs) == 0) {
+      continue;
+    } else {
+      effect_size = mapRS2est[rs];
+    }
+
+    // n_bit, and 3 is the number of magic numbers.
+    infile.seekg(t * n_bit + 3);
+
+    // Read genotypes.
+    x_mean = 0.0;
+    n_miss = 0;
+    ci_total = 0;
+    ci_test = 0;
+    x_train_mean = 0;
+    n_train_nomiss = 0;
+    for (size_t i = 0; i < n_bit; ++i) {
+      infile.read(ch, 1);
+      b = ch[0];
+
+      // Minor allele homozygous: 2.0; major: 0.0.
+      for (size_t j = 0; j < 4; ++j) {
+        if ((i == (n_bit - 1)) && ci_total == indicator_idv.size()) {
+          break;
+        }
+        if (indicator_idv[ci_total] == 1) {
+          if (b[2 * j] == 0) {
+            if (b[2 * j + 1] == 0) {
+              x_train_mean += 2.0;
+              n_train_nomiss++;
+            } else {
+              x_train_mean += 1.0;
+              n_train_nomiss++;
+            }
+          } else {
+            if (b[2 * j + 1] == 1) {
+              n_train_nomiss++;
+            } else {
+            }
+          }
+        } else {
+          if (b[2 * j] == 0) {
+            if (b[2 * j + 1] == 0) {
+              gsl_vector_set(x, ci_test, 2);
+              x_mean += 2.0;
+            } else {
+              gsl_vector_set(x, ci_test, 1);
+              x_mean += 1.0;
+            }
+          } else {
+            if (b[2 * j + 1] == 1) {
+              gsl_vector_set(x, ci_test, 0);
+            } else {
+              gsl_vector_set(x, ci_test, -9);
+              n_miss++;
+            }
+          }
+          ci_test++;
+        }
+        ci_total++;
+      }
+    }
+
+    if (x->size == n_miss) {
+      cout << "snp " << rs << " has missing genotype for all "
+           << "individuals and will be ignored." << endl;
+      continue;
+    }
+
+    x_mean /= (double)(x->size - n_miss);
+    x_train_mean /= (double)(n_train_nomiss);
+
+    for (size_t i = 0; i < x->size; ++i) {
+      geno = gsl_vector_get(x, i);
+      if (geno == -9) {
+        gsl_vector_set(x, i, x_mean - x_train_mean);
+      } else {
+        gsl_vector_set(x, i, geno - x_train_mean);
+      }
+    }
+
+    gsl_vector_scale(x, effect_size);
+    gsl_vector_add(y_prdt, x);
+
+    ns_test++;
+  }
+  cout << endl;
+
+  gsl_vector_free(x);
+
+  infile.close();
+  infile.clear();
+
+  return;
 }
 
 // Predict missing phenotypes using ridge regression.
 // Y_hat contains fixed effects
-void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
-		       gsl_matrix *Y_full) {
-	gsl_vector *y_obs=gsl_vector_alloc (np_obs);
-	gsl_vector *y_miss=gsl_vector_alloc (np_miss);
-	gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs);
-	gsl_matrix *H_mo=gsl_matrix_alloc (np_miss, np_obs);
-	gsl_vector *Hiy=gsl_vector_alloc (np_obs);
-
-	size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0;
-
-	// Obtain H_oo, H_mo.
-	c_obs1=0; c_miss1=0;
-	for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) {
-		if (indicator_cvt[i1]==0) {continue;}
-		for (vector<int>::size_type j1=0; j1<n_ph; ++j1) {
-
-			c_obs2=0; c_miss2=0;
-			for (vector<int>::size_type i2=0;
-			     i2<indicator_pheno.size(); ++i2) {
-				if (indicator_cvt[i2]==0) {continue;}
-				for (vector<int>::size_type j2=0;
-				     j2<n_ph; j2++) {
-
-					if (indicator_pheno[i2][j2]==1) {
-					      if (indicator_pheno[i1][j1]==1) {
-						gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
-						} else {
-							gsl_matrix_set (H_mo, c_miss1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
-						}
-						c_obs2++;
-					} else {
-						c_miss2++;
-					}
-				}
-			}
-
-			if (indicator_pheno[i1][j1]==1) {
-				c_obs1++;
-			} else {
-				c_miss1++;
-			}
-		}
-
-	}
-
-	// Do LU decomposition of H_oo.
-	int sig;
-	gsl_permutation * pmt=gsl_permutation_alloc (np_obs);
-	LUDecomp (H_oo, pmt, &sig);
-
-		// Obtain y_obs=y_full-y_hat.
-		// Add the fixed effects part to y_miss: y_miss=y_hat.
-		c_obs1=0; c_miss1=0;
-		for (vector<int>::size_type i=0;
-		     i<indicator_pheno.size(); ++i) {
-			if (indicator_cvt[i]==0) {continue;}
-
-			for (vector<int>::size_type j=0; j<n_ph; ++j) {
-				if (indicator_pheno[i][j]==1) {
-					gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j)-gsl_matrix_get (Y_hat, i, j) );
-					c_obs1++;
-				} else {
-					gsl_vector_set (y_miss, c_miss1, gsl_matrix_get (Y_hat, i, j) );
-					c_miss1++;
-				}
-			}
-		}
-
-		LUSolve (H_oo, pmt, y_obs, Hiy);
-
-		gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
-
-		// Put back predicted y_miss to Y_full.
-		c_miss1=0;
-		for (vector<int>::size_type i=0;
-		     i<indicator_pheno.size(); ++i) {
-			if (indicator_cvt[i]==0) {continue;}
-
-			for (vector<int>::size_type j=0; j<n_ph; ++j) {
-				if (indicator_pheno[i][j]==0) {
-					gsl_matrix_set (Y_full, i, j, gsl_vector_get (y_miss, c_miss1) );
-					c_miss1++;
-				}
-			}
-		}
-
-	// Free matrices.
-	gsl_vector_free(y_obs);
-	gsl_vector_free(y_miss);
-	gsl_matrix_free(H_oo);
-	gsl_matrix_free(H_mo);
-	gsl_vector_free(Hiy);
-
-	return;
+void PRDT::MvnormPrdt(const gsl_matrix *Y_hat, const gsl_matrix *H,
+                      gsl_matrix *Y_full) {
+  gsl_vector *y_obs = gsl_vector_alloc(np_obs);
+  gsl_vector *y_miss = gsl_vector_alloc(np_miss);
+  gsl_matrix *H_oo = gsl_matrix_alloc(np_obs, np_obs);
+  gsl_matrix *H_mo = gsl_matrix_alloc(np_miss, np_obs);
+  gsl_vector *Hiy = gsl_vector_alloc(np_obs);
+
+  size_t c_obs1 = 0, c_obs2 = 0, c_miss1 = 0, c_miss2 = 0;
+
+  // Obtain H_oo, H_mo.
+  c_obs1 = 0;
+  c_miss1 = 0;
+  for (vector<int>::size_type i1 = 0; i1 < indicator_pheno.size(); ++i1) {
+    if (indicator_cvt[i1] == 0) {
+      continue;
+    }
+    for (vector<int>::size_type j1 = 0; j1 < n_ph; ++j1) {
+
+      c_obs2 = 0;
+      c_miss2 = 0;
+      for (vector<int>::size_type i2 = 0; i2 < indicator_pheno.size(); ++i2) {
+        if (indicator_cvt[i2] == 0) {
+          continue;
+        }
+        for (vector<int>::size_type j2 = 0; j2 < n_ph; j2++) {
+
+          if (indicator_pheno[i2][j2] == 1) {
+            if (indicator_pheno[i1][j1] == 1) {
+              gsl_matrix_set(
+                  H_oo, c_obs1, c_obs2,
+                  gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2));
+            } else {
+              gsl_matrix_set(
+                  H_mo, c_miss1, c_obs2,
+                  gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2));
+            }
+            c_obs2++;
+          } else {
+            c_miss2++;
+          }
+        }
+      }
+
+      if (indicator_pheno[i1][j1] == 1) {
+        c_obs1++;
+      } else {
+        c_miss1++;
+      }
+    }
+  }
+
+  // Do LU decomposition of H_oo.
+  int sig;
+  gsl_permutation *pmt = gsl_permutation_alloc(np_obs);
+  LUDecomp(H_oo, pmt, &sig);
+
+  // Obtain y_obs=y_full-y_hat.
+  // Add the fixed effects part to y_miss: y_miss=y_hat.
+  c_obs1 = 0;
+  c_miss1 = 0;
+  for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) {
+    if (indicator_cvt[i] == 0) {
+      continue;
+    }
+
+    for (vector<int>::size_type j = 0; j < n_ph; ++j) {
+      if (indicator_pheno[i][j] == 1) {
+        gsl_vector_set(y_obs, c_obs1, gsl_matrix_get(Y_full, i, j) -
+                                          gsl_matrix_get(Y_hat, i, j));
+        c_obs1++;
+      } else {
+        gsl_vector_set(y_miss, c_miss1, gsl_matrix_get(Y_hat, i, j));
+        c_miss1++;
+      }
+    }
+  }
+
+  LUSolve(H_oo, pmt, y_obs, Hiy);
+
+  gsl_blas_dgemv(CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
+
+  // Put back predicted y_miss to Y_full.
+  c_miss1 = 0;
+  for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) {
+    if (indicator_cvt[i] == 0) {
+      continue;
+    }
+
+    for (vector<int>::size_type j = 0; j < n_ph; ++j) {
+      if (indicator_pheno[i][j] == 0) {
+        gsl_matrix_set(Y_full, i, j, gsl_vector_get(y_miss, c_miss1));
+        c_miss1++;
+      }
+    }
+  }
+
+  // Free matrices.
+  gsl_vector_free(y_obs);
+  gsl_vector_free(y_miss);
+  gsl_matrix_free(H_oo);
+  gsl_matrix_free(H_mo);
+  gsl_vector_free(Hiy);
+
+  return;
 }
-
-