about summary refs log tree commit diff
path: root/src/io.cpp
diff options
context:
space:
mode:
authorxiangzhou2015-07-11 12:57:37 -0400
committerxiangzhou2015-07-11 12:57:37 -0400
commitb3b491cd9143d33bfebd4c5b26629573afcf0970 (patch)
tree37fc935d3e11a7b28fca4a0e4033125efb870490 /src/io.cpp
parente6c7cc839136b84f9486b7baa18b6f4a163db7ac (diff)
downloadpangemma-b3b491cd9143d33bfebd4c5b26629573afcf0970.tar.gz
add GXE test
Diffstat (limited to 'src/io.cpp')
-rw-r--r--src/io.cpp2482
1 files changed, 2131 insertions, 351 deletions
diff --git a/src/io.cpp b/src/io.cpp
index 7ed95c4..d6a70dd 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -28,7 +28,7 @@
 #include <cstring>
 #include <cmath>
 #include <stdio.h>
-#include <stdlib.h> 
+#include <stdlib.h>
 
 #include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
@@ -39,6 +39,7 @@
 #include "lapack.h"
 #include "gzstream.h"
 #include "mathfunc.h"
+#include "eigenlib.h"
 
 #ifdef FORCE_FLOAT
 #include "io_float.h"
@@ -54,10 +55,10 @@ using namespace std;
 //Print process bar
 void ProgressBar (string str, double p, double total)
 {
-	double progress = (100.0 * p / total); 
-	int barsize = (int) (progress / 2.0); 
+	double progress = (100.0 * p / total);
+	int barsize = (int) (progress / 2.0);
 	char bar[51];
-	
+
 	cout<<str;
 	for (int i = 0; i <50; i++) {
 		if (i<barsize) {bar[i] = '=';}
@@ -65,7 +66,7 @@ void ProgressBar (string str, double p, double total)
 		cout<<bar[i];
 	}
 	cout<<setprecision(2)<<fixed<<progress<<"%\r"<<flush;
-	
+
 	return;
 }
 
@@ -73,10 +74,10 @@ void ProgressBar (string str, double p, double total)
 //Print process bar (with acceptance ratio)
 void ProgressBar (string str, double p, double total, double ratio)
 {
-	double progress = (100.0 * p / total); 
-	int barsize = (int) (progress / 2.0); 
+	double progress = (100.0 * p / total);
+	int barsize = (int) (progress / 2.0);
 	char bar[51];
-	
+
 	cout<<str;
 	for (int i = 0; i <50; i++) {
 		if (i<barsize) {bar[i] = '=';}
@@ -84,8 +85,8 @@ void ProgressBar (string str, double p, double total, double ratio)
 		cout<<bar[i];
 	}
 	cout<<setprecision(2)<<fixed<<progress<<"%    "<<ratio<<"\r"<<flush;
-	
-	
+
+
 	return;
 }
 
@@ -130,18 +131,18 @@ bool ReadFile_snps (const string &file_snps, set<string> &setSnps)
 
 	ifstream infile (file_snps.c_str(), ifstream::in);
 	if (!infile) {cout<<"error! fail to open snps file: "<<file_snps<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	while (getline(infile, line)) {
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		setSnps.insert(ch_ptr); 
+		setSnps.insert(ch_ptr);
 	}
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -151,15 +152,15 @@ bool ReadFile_log (const string &file_log, double &pheno_mean)
 {
 	ifstream infile (file_log.c_str(), ifstream::in);
 	if (!infile) {cout<<"error! fail to open log file: "<<file_log<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
 	size_t flag=0;
-	
+
 	while (getline(infile, line)) {
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		ch_ptr=strtok (NULL, " , \t");
-		
+
 		if (ch_ptr!=NULL && strcmp(ch_ptr, "estimated")==0) {
 			ch_ptr=strtok (NULL, " , \t");
 			if (ch_ptr!=NULL && strcmp(ch_ptr, "mean")==0) {
@@ -171,13 +172,13 @@ bool ReadFile_log (const string &file_log, double &pheno_mean)
 				}
 			}
 		}
-		
+
 		if (flag==1) {break;}
 	}
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -187,18 +188,18 @@ bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, map
 {
 	mapRS2chr.clear();
 	mapRS2bp.clear();
-	
+
 	ifstream infile (file_anno.c_str(), ifstream::in);
 	if (!infile) {cout<<"error opening annotation file: "<<file_anno<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	string rs;
 	long int b_pos;
 	string chr;
 	double cM;
-	
+
 	while (!safeGetline(infile, line).eof()) {
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		rs=ch_ptr;
@@ -208,15 +209,15 @@ bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, map
 		if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {chr="-9";} else {chr=ch_ptr;}
 		ch_ptr=strtok (NULL, " , \t");
 		if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {cM=-9;} else {cM=atof(ch_ptr);}
-		
+
 		mapRS2chr[rs]=chr;
 		mapRS2bp[rs]=b_pos;
 		mapRS2cM[rs]=cM;
 	}
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -225,28 +226,28 @@ bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, vect
 {
 	indicator_idv.clear();
 	pheno.clear();
-	
+
 	igzstream infile (file_pheno.c_str(), igzstream::in);
 //	ifstream infile (file_pheno.c_str(), ifstream::in);
 	if (!infile) {cout<<"error! fail to open phenotype file: "<<file_pheno<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	string id;
 	double p;
 	while (!safeGetline(infile, line).eof()) {
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		for (int i=0; i<(p_column-1); ++i) {
-			ch_ptr=strtok (NULL, " , \t");	
-		}		
+			ch_ptr=strtok (NULL, " , \t");
+		}
 		if (strcmp(ch_ptr, "NA")==0) {indicator_idv.push_back(0); pheno.push_back(-9);}		//pheno is different from pimass2
 		else {p=atof(ch_ptr); indicator_idv.push_back(1); pheno.push_back(p);}
 	}
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -257,48 +258,48 @@ bool ReadFile_pheno (const string &file_pheno, vector<vector<int> > &indicator_p
 {
 	indicator_pheno.clear();
 	pheno.clear();
-	
+
 	igzstream infile (file_pheno.c_str(), igzstream::in);
 //	ifstream infile (file_pheno.c_str(), ifstream::in);
 	if (!infile) {cout<<"error! fail to open phenotype file: "<<file_pheno<<endl; return false;}
 
 	string line;
 	char *ch_ptr;
-  
+
 	string id;
 	double p;
-	
+
 	vector<double> pheno_row;
 	vector<int> ind_pheno_row;
-	
+
 	size_t p_max=*max_element(p_column.begin(), p_column.end() );
 	map<size_t, size_t> mapP2c;
 	for (size_t i=0; i<p_column.size(); i++) {
 		mapP2c[p_column[i]]=i;
 		pheno_row.push_back(-9);
 		ind_pheno_row.push_back(0);
-	}	
-	
+	}
+
 	while (!safeGetline(infile, line).eof()) {
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
-		
+
 		size_t i=0;
-		while (i<p_max ) {			
+		while (i<p_max ) {
 			if (mapP2c.count(i+1)!=0) {
 				if (strcmp(ch_ptr, "NA")==0) {ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;}
 				else {p=atof(ch_ptr); ind_pheno_row[mapP2c[i+1]]=1; pheno_row[mapP2c[i+1]]=p;}
 			}
 			i++;
-			ch_ptr=strtok (NULL, " , \t");	
+			ch_ptr=strtok (NULL, " , \t");
 		}
-		
-		indicator_pheno.push_back(ind_pheno_row);	
-		pheno.push_back(pheno_row);			
+
+		indicator_pheno.push_back(ind_pheno_row);
+		pheno.push_back(pheno_row);
 	}
- 
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -306,44 +307,44 @@ bool ReadFile_pheno (const string &file_pheno, vector<vector<int> > &indicator_p
 bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt)
 {
 	indicator_cvt.clear();
-	
+
 	ifstream infile (file_cvt.c_str(), ifstream::in);
 	if (!infile) {cout<<"error! fail to open covariates file: "<<file_cvt<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
-	double d;	
-	
-	int flag_na=0;	
-	
+	double d;
+
+	int flag_na=0;
+
 	while (!safeGetline(infile, line).eof()) {
 		vector<double> v_d; flag_na=0;
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		while (ch_ptr!=NULL) {
 			if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
 			else {d=atof(ch_ptr);}
-			
+
 			v_d.push_back(d);
-			ch_ptr=strtok (NULL, " , \t");	
+			ch_ptr=strtok (NULL, " , \t");
 		}
-		if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);} 
+		if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);}
 		cvt.push_back(v_d);
 	}
-	
+
 	if (indicator_cvt.empty()) {n_cvt=0;}
 	else {
 		flag_na=0;
 		for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
 			if (indicator_cvt[i]==0) {continue;}
-			
+
 			if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
 			if (flag_na!=0 && n_cvt!=cvt[i].size()) {cout<<"error! number of covariates in row "<<i<<" do not match other rows."<<endl; return false;}
 		}
 	}
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -353,20 +354,20 @@ bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<ve
 bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo)
 {
 	snpInfo.clear();
-	
+
 	ifstream infile (file_bim.c_str(), ifstream::in);
 	if (!infile) {cout<<"error opening .bim file: "<<file_bim<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	string rs;
 	long int b_pos;
 	string chr;
 	double cM;
 	string major;
 	string minor;
-	
+
 	while (getline(infile, line)) {
 		ch_ptr=strtok ((char *)line.c_str(), " \t");
 		chr=ch_ptr;
@@ -380,13 +381,13 @@ bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo)
 		minor=ch_ptr;
 		ch_ptr=strtok (NULL, " \t");
 		major=ch_ptr;
-		
-		SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, -9, -9, -9};
+
+		SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0};
 		snpInfo.push_back(sInfo);
 	}
-	
+
 	infile.close();
-	infile.clear();	
+	infile.clear();
 	return true;
 }
 
@@ -396,8 +397,8 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
 {
 	indicator_pheno.clear();
 	pheno.clear();
-	mapID2num.clear();	
-	
+	mapID2num.clear();
+
 	igzstream infile (file_fam.c_str(), igzstream::in);
 	//ifstream infile (file_fam.c_str(), ifstream::in);
 	if (!infile) {cout<<"error opening .fam file: "<<file_fam<<endl; return false;}
@@ -411,15 +412,15 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
 
 	vector<double> pheno_row;
 	vector<int> ind_pheno_row;
-	
+
 	size_t p_max=*max_element(p_column.begin(), p_column.end() );
 	map<size_t, size_t> mapP2c;
 	for (size_t i=0; i<p_column.size(); i++) {
 		mapP2c[p_column[i]]=i;
 		pheno_row.push_back(-9);
 		ind_pheno_row.push_back(0);
-	}	
-	
+	}
+
 	while (!safeGetline(infile, line).eof()) {
 		ch_ptr=strtok ((char *)line.c_str(), " \t");
 		ch_ptr=strtok (NULL, " \t");
@@ -428,7 +429,7 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
 		ch_ptr=strtok (NULL, " \t");
 		ch_ptr=strtok (NULL, " \t");
 		ch_ptr=strtok (NULL, " \t");
-		
+
 		size_t i=0;
 		while (i<p_max ) {
 			if (mapP2c.count(i+1)!=0 ) {
@@ -436,23 +437,23 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
 					ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;
 				} else {
 					p=atof(ch_ptr);
-					
+
 					if (p==-9) {ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;}
 					else {ind_pheno_row[mapP2c[i+1]]=1; pheno_row[mapP2c[i+1]]=p;}
 				}
 			}
 			i++;
-			ch_ptr=strtok (NULL, " , \t");	
+			ch_ptr=strtok (NULL, " , \t");
 		}
-		
+
 		indicator_pheno.push_back(ind_pheno_row);
-		pheno.push_back(pheno_row);				
-		
+		pheno.push_back(pheno_row);
+
 		mapID2num[id]=c; c++;
 	}
- 
+
 	infile.close();
-	infile.clear();	
+	infile.clear();
 	return true;
 }
 
@@ -466,7 +467,7 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
 {
 	indicator_snp.clear();
 	snpInfo.clear();
-	
+
 	igzstream infile (file_geno.c_str(), igzstream::in);
 //	ifstream infile (file_geno.c_str(), ifstream::in);
 	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
@@ -478,112 +479,118 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
 	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
 	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
 	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-	
+
 	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;	
+	//eigenlib_dgemm("T", "N", 1.0, W, W, 0.0, WtW);
+	int sig;
 	LUDecomp (WtW, pmt, &sig);
 	LUInvert (WtW, pmt, WtWi);
-	
+
 	double v_x, v_w;
 	int c_idv=0;
-	
+
 	string line;
 	char *ch_ptr;
-		
+
 	string rs;
 	long int b_pos;
 	string chr;
 	string major;
 	string minor;
 	double cM;
-  
+	size_t file_pos;
+
 	double maf, geno, geno_old;
 	size_t n_miss;
 	size_t n_0, n_1, n_2;
 	int flag_poly;
-	
+
 	int ni_total=indicator_idv.size();
 	int ni_test=0;
 	for (int i=0; i<ni_total; ++i) {
 		ni_test+=indicator_idv[i];
 	}
 	ns_test=0;
-	
-	while (!safeGetline(infile, line).eof()) {		
+
+	file_pos=0;
+	while (!safeGetline(infile, line).eof()) {
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		rs=ch_ptr;
 		ch_ptr=strtok (NULL, " , \t");
 		minor=ch_ptr;
 		ch_ptr=strtok (NULL, " , \t");
 		major=ch_ptr;
-		
+
 		if (setSnps.size()!=0 && setSnps.count(rs)==0) {
-			SNPINFO sInfo={"-9", rs, -9, -9, minor, major, -9, -9, -9};
-			snpInfo.push_back(sInfo);
-			indicator_snp.push_back(0);
-			continue;
+		  SNPINFO sInfo={"-9", rs, -9, -9, minor, major, 0, -9, -9, 0, 0, file_pos};
+		  snpInfo.push_back(sInfo);
+		  indicator_snp.push_back(0);
+
+		  file_pos++;
+		  continue;
 		}
-				
+
 		if (mapRS2bp.count(rs)==0) {chr="-9"; b_pos=-9;cM=-9;}
-		else {b_pos=mapRS2bp[rs]; chr=mapRS2chr[rs]; cM=mapRS2cM[rs];}		
-				
+		else {b_pos=mapRS2bp[rs]; chr=mapRS2chr[rs]; cM=mapRS2cM[rs];}
+
 		maf=0; n_miss=0; flag_poly=0; geno_old=-9;
 		n_0=0; n_1=0; n_2=0;
 		c_idv=0; gsl_vector_set_zero (genotype_miss);
 		for (int i=0; i<ni_total; ++i) {
 			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[i]==0) {continue;}		
+			if (indicator_idv[i]==0) {continue;}
 
 			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue;}
-			
+
 			geno=atof(ch_ptr);
 			if (geno>=0 && geno<=0.5) {n_0++;}
 			if (geno>0.5 && geno<1.5) {n_1++;}
 			if (geno>=1.5 && geno<=2.0) {n_2++;}
-			
-			gsl_vector_set (genotype, c_idv, geno); 
-			
+
+			gsl_vector_set (genotype, c_idv, geno);
+
 //			if (geno<0) {n_miss++; continue;}
-			
+
 			if (flag_poly==0) {geno_old=geno; flag_poly=2;}
 			if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
-			
+
 			maf+=geno;
-			
+
 			c_idv++;
 		}
-		maf/=2.0*(double)(ni_test-n_miss);	
-		
-		SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf};
+		maf/=2.0*(double)(ni_test-n_miss);
+
+		SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf, ni_test-n_miss, 0, file_pos};
 		snpInfo.push_back(sInfo);
-		
+		file_pos++;
+
 		if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
-		
+
 		if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
-		
+
 		if (flag_poly!=1) {indicator_snp.push_back(0); continue;}
-		
+
 		if (hwe_level!=0 && maf_level!=-1) {
 			if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
 		}
-		
+
 		//filter SNP if it is correlated with W
 		//unless W has only one column, of 1s
-		for (size_t i=0; i<genotype->size; ++i) {			
-			if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}		
+		for (size_t i=0; i<genotype->size; ++i) {
+			if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
 		}
-		
+
 		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
 		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
 		gsl_blas_ddot (genotype, genotype, &v_x);
 		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-		
+
 		if (W->size2!=1 && v_w/v_x >= r2_level) {indicator_snp.push_back(0); continue;}
-		
-		indicator_snp.push_back(1); 
+
+		indicator_snp.push_back(1);
 		ns_test++;
 	}
-	
+
 	gsl_vector_free (genotype);
 	gsl_vector_free (genotype_miss);
 	gsl_matrix_free (WtW);
@@ -591,10 +598,10 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
 	gsl_vector_free (Wtx);
 	gsl_vector_free (WtWiWtx);
 	gsl_permutation_free (pmt);
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -602,13 +609,13 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
 
 
 
-      
+
 //Read bed file, the first time
 bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test)
 {
 	indicator_snp.clear();
 	size_t ns_total=snpInfo.size();
-	
+
 	ifstream infile (file_bed.c_str(), ios::binary);
 	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
 
@@ -619,25 +626,25 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
 	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
 	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
 	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-	
+
 	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-	int sig;	
+	int sig;
 	LUDecomp (WtW, pmt, &sig);
 	LUInvert (WtW, pmt, WtWi);
-	
+
 	double v_x, v_w, geno;
 	size_t c_idv=0;
-	
+
 	char ch[1];
 	bitset<8> b;
-  	
+
 	size_t ni_total=indicator_idv.size();
 	size_t ni_test=0;
 	for (size_t i=0; i<ni_total; ++i) {
 		ni_test+=indicator_idv[i];
 	}
 	ns_test=0;
-	
+
 	//calculate n_bit and c, the number of bit for each snp
 	size_t n_bit;
 	if (ni_total%4==0) {n_bit=ni_total/4;}
@@ -648,19 +655,20 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
 		infile.read(ch,1);
 		b=ch[0];
 	}
-	
+
 	double maf;
 	size_t n_miss;
-	size_t n_0, n_1, n_2, c;	
-	
+	size_t n_0, n_1, n_2, c;
+
 	//start reading snps and doing association test
 	for (size_t t=0; t<ns_total; ++t) {
 		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers
-		
+
 		if (setSnps.size()!=0 && setSnps.count(snpInfo[t].rs_number)==0) {
 			snpInfo[t].n_miss=-9;
 			snpInfo[t].missingness=-9;
 			snpInfo[t].maf=-9;
+			snpInfo[t].file_position=t;
 			indicator_snp.push_back(0);
 			continue;
 		}
@@ -675,52 +683,55 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
 				if ((i==(n_bit-1)) && c==ni_total) {break;}
 				if (indicator_idv[c]==0) {c++; continue;}
 				c++;
-				
+
 				if (b[2*j]==0) {
 					if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); maf+=2.0; n_2++;}
 					else {gsl_vector_set(genotype, c_idv, 1.0); maf+=1.0; n_1++;}
 				}
 				else {
-					if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); maf+=0.0; n_0++;}                                  
+					if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); maf+=0.0; n_0++;}
 					else {gsl_vector_set(genotype_miss, c_idv, 1); n_miss++; }
 				}
 				c_idv++;
 			}
 		}
 		maf/=2.0*(double)(ni_test-n_miss);
-		
+
 		snpInfo[t].n_miss=n_miss;
 		snpInfo[t].missingness=(double)n_miss/(double)ni_test;
 		snpInfo[t].maf=maf;
-		
+		snpInfo[t].n_idv=ni_test-n_miss;
+		snpInfo[t].n_nb=0;
+		snpInfo[t].file_position=t;
+
 		if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
-		
+
 		if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
-		
+
 		if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) {indicator_snp.push_back(0); continue;}
-		
+
 		if (hwe_level!=1 && maf_level!=-1) {
 			if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
 		}
-			
-		
+
+
 		//filter SNP if it is correlated with W
 		//unless W has only one column, of 1s
-		for (size_t i=0; i<genotype->size; ++i) {			
-			if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}		
+		for (size_t i=0; i<genotype->size; ++i) {
+			if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
 		}
-		
+
 		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
 		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
 		gsl_blas_ddot (genotype, genotype, &v_x);
 		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-		
+
 		if (W->size2!=1 && v_w/v_x > r2_level) {indicator_snp.push_back(0); continue;}
-		
-		indicator_snp.push_back(1); 
+
+		indicator_snp.push_back(1);
 		ns_test++;
 	}
-	
+
 	gsl_vector_free (genotype);
 	gsl_vector_free (genotype_miss);
 	gsl_matrix_free (WtW);
@@ -728,63 +739,177 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
 	gsl_vector_free (Wtx);
 	gsl_vector_free (WtWiWtx);
 	gsl_permutation_free (pmt);
-		  
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
 
 
-void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G) 
+
+
+//read the genotype for one SNP; remember to read empty lines
+//geno stores original genotypes without centering
+//missing values are replaced by mean
+bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv, igzstream &infile, gsl_vector *geno, double &geno_mean)
+{
+  size_t ni_total=indicator_idv.size();
+
+  //  if (infile.eof()) {infile.clear();}
+  //  infile.seekg(pos);
+
+  string line;
+  char *ch_ptr;
+  bool flag=false;
+
+  for (size_t i=0; i<inc; i++) {
+    !safeGetline(infile, line).eof();
+  }
+
+  if (!safeGetline(infile, line).eof()) {
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    ch_ptr=strtok (NULL, " , \t");
+    ch_ptr=strtok (NULL, " , \t");
+
+    geno_mean=0.0;
+    double d;
+    size_t c_idv=0;
+    vector<size_t> geno_miss;
+
+    for (size_t i=0; i<ni_total; ++i) {
+      ch_ptr=strtok (NULL, " , \t");
+      if (indicator_idv[i]==0) {continue;}
+
+      if (strcmp(ch_ptr, "NA")==0) {
+	geno_miss.push_back(c_idv);
+      } else {
+	d=atof(ch_ptr);
+	gsl_vector_set (geno, c_idv, d);
+	geno_mean+=d;
+      }
+      c_idv++;
+    }
+
+    geno_mean/=(double)(c_idv-geno_miss.size() );
+
+    for (size_t i=0; i<geno_miss.size(); ++i) {
+      gsl_vector_set(geno, geno_miss[i], geno_mean);
+    }
+    flag=true;
+  }
+
+  return flag;
+}
+
+
+//for plink, store SNPs as double too
+void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, ifstream &infile, gsl_vector *geno, double &geno_mean)
+{
+  size_t ni_total=indicator_idv.size(), n_bit;
+  if (ni_total%4==0) {n_bit=ni_total/4;}
+  else {n_bit=ni_total/4+1;}
+  infile.seekg(pos*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+  //read genotypes
+  char ch[1];
+  bitset<8> b;
+
+  geno_mean=0.0;
+  size_t c=0, c_idv=0;
+  vector<size_t> geno_miss;
+
+  for (size_t i=0; i<n_bit; ++i) {
+    infile.read(ch,1);
+    b=ch[0];
+    for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+      if ((i==(n_bit-1)) && c==ni_total) {break;}
+      if (indicator_idv[c]==0) {c++; continue;}
+      c++;
+
+      if (b[2*j]==0) {
+	if (b[2*j+1]==0) {
+	  gsl_vector_set (geno, c_idv, 2);
+	  geno_mean+=2.0;
+	} else {
+	  gsl_vector_set (geno, c_idv, 1);
+	  geno_mean+=1.0;
+	}
+      } else {
+	if (b[2*j+1]==1) {
+	  gsl_vector_set (geno, c_idv, 0);
+	  geno_mean+=0.0;
+	} else {
+	  geno_miss.push_back(c_idv);
+	}
+      }
+
+      c_idv++;
+    }
+  }
+
+  geno_mean/=(double)(c_idv-geno_miss.size());
+
+  for (size_t i=0; i<geno_miss.size(); ++i) {
+    gsl_vector_set(geno, geno_miss[i], geno_mean);
+  }
+
+  return;
+}
+
+
+
+
+
+void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
 {
 	igzstream infile (file_kin.c_str(), igzstream::in);
 //	ifstream infile (file_kin.c_str(), ifstream::in);
 	if (!infile) {cout<<"error! fail to open kinship file: "<<file_kin<<endl; error=true; return;}
-	
+
 	size_t ni_total=indicator_idv.size();
-	
+
 	gsl_matrix_set_zero (G);
-	
+
 	string line;
-	char *ch_ptr;	
+	char *ch_ptr;
 	double d;
-	
+
 	if (k_mode==1) {
 		size_t i_test=0, i_total=0, j_test=0, j_total=0;
 		while (getline(infile, line)) {
-			if (i_total==ni_total) {cout<<"error! number of rows in the kinship file is larger than the number of phentypes."<<endl; error=true;}			
-			
+			if (i_total==ni_total) {cout<<"error! number of rows in the kinship file is larger than the number of phentypes."<<endl; error=true;}
+
 			if (indicator_idv[i_total]==0) {i_total++; continue;}
-			
+
 			j_total=0; j_test=0;
 			ch_ptr=strtok ((char *)line.c_str(), " , \t");
 			while (ch_ptr!=NULL) {
 				if (j_total==ni_total) {cout<<"error! number of columns in the kinship file is larger than the number of phentypes for row = "<<i_total<<endl; error=true;}
-				
+
 				d=atof(ch_ptr);
-				if (indicator_idv[j_total]==1) {gsl_matrix_set (G, i_test, j_test, d); j_test++;}				
+				if (indicator_idv[j_total]==1) {gsl_matrix_set (G, i_test, j_test, d); j_test++;}
 				j_total++;
-				
+
 				ch_ptr=strtok (NULL, " , \t");
 			}
 			if (j_total!=ni_total) {cout<<"error! number of columns in the kinship file do not match the number of phentypes for row = "<<i_total<<endl; error=true;}
-			i_total++; i_test++;			
+			i_total++; i_test++;
 		}
 		if (i_total!=ni_total) {cout<<"error! number of rows in the kinship file do not match the number of phentypes."<<endl; error=true;}
-	}	
-	else {  
+	}
+	else {
 		map<size_t, size_t> mapID2ID;
 		size_t c=0;
 		for (size_t i=0; i<indicator_idv.size(); i++) {
 			if (indicator_idv[i]==1) {mapID2ID[i]=c; c++;}
 		}
-		
+
 		string id1, id2;
 		double Cov_d;
 		size_t n_id1, n_id2;
-		
+
 		while (getline(infile, line)) {
 			ch_ptr=strtok ((char *)line.c_str(), " , \t");
 			id1=ch_ptr;
@@ -794,10 +919,10 @@ void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<strin
 			d=atof(ch_ptr);
 			if (mapID2num.count(id1)==0 || mapID2num.count(id2)==0) {continue;}
 			if (indicator_idv[mapID2num[id1]]==0 || indicator_idv[mapID2num[id2]]==0) {continue;}
-			
+
 			n_id1=mapID2ID[mapID2num[id1]];
 			n_id2=mapID2ID[mapID2num[id2]];
-			
+
 			Cov_d=gsl_matrix_get(G, n_id1, n_id2);
 			if (Cov_d!=0 && Cov_d!=d) {cout<<"error! redundant and unequal terms in the kinship file, for id1 = "<<id1<<" and id2 = "<<id2<<endl;}
 			else {
@@ -806,15 +931,15 @@ void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<strin
 			}
 		}
 	}
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return;
 }
 
 
-void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G) 
+void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
 {
 	igzstream infile (file_mk.c_str(), igzstream::in);
 	if (!infile) {cout<<"error! fail to open file: "<<file_mk<<endl; error=true; return;}
@@ -830,101 +955,101 @@ void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string,
 	}
 
 	infile.close();
-	infile.clear();	
+	infile.clear();
 	return;
 }
 
 
-void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U) 
+void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U)
 {
 	igzstream infile (file_ku.c_str(), igzstream::in);
 //	ifstream infile (file_ku.c_str(), ifstream::in);
 	if (!infile) {cout<<"error! fail to open the U file: "<<file_ku<<endl; error=true; return;}
-	
+
 	size_t n_row=U->size1, n_col=U->size2, i_row=0, i_col=0;
-	
+
 	gsl_matrix_set_zero (U);
-	
+
 	string line;
-	char *ch_ptr;	
+	char *ch_ptr;
 	double d;
-	
+
 	while (getline(infile, line)) {
-		if (i_row==n_row) {cout<<"error! number of rows in the U file is larger than expected."<<endl; error=true;}			
-				
+		if (i_row==n_row) {cout<<"error! number of rows in the U file is larger than expected."<<endl; error=true;}
+
 		i_col=0;
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		while (ch_ptr!=NULL) {
 			if (i_col==n_col) {cout<<"error! number of columns in the U file is larger than expected, for row = "<<i_row<<endl; error=true;}
-			
+
 			d=atof(ch_ptr);
-			gsl_matrix_set (U, i_row, i_col, d);			
+			gsl_matrix_set (U, i_row, i_col, d);
 			i_col++;
-			
+
 			ch_ptr=strtok (NULL, " , \t");
 		}
-		
+
 		i_row++;
 	}
-		
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return;
 }
 
 
 
 
-void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval) 
+void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval)
 {
 	igzstream infile (file_kd.c_str(), igzstream::in);
 //	ifstream infile (file_kd.c_str(), ifstream::in);
 	if (!infile) {cout<<"error! fail to open the D file: "<<file_kd<<endl; error=true; return;}
-	
+
 	size_t n_row=eval->size, i_row=0;
-	
+
 	gsl_vector_set_zero (eval);
-	
+
 	string line;
-	char *ch_ptr;	
+	char *ch_ptr;
 	double d;
-	
+
 	while (getline(infile, line)) {
-		if (i_row==n_row) {cout<<"error! number of rows in the D file is larger than expected."<<endl; error=true;}			
-		
+		if (i_row==n_row) {cout<<"error! number of rows in the D file is larger than expected."<<endl; error=true;}
+
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		d=atof(ch_ptr);
-		
+
 		ch_ptr=strtok (NULL, " , \t");
 		if (ch_ptr!=NULL) {cout<<"error! number of columns in the D file is larger than expected, for row = "<<i_row<<endl; error=true;}
-		
+
 		gsl_vector_set (eval, i_row, d);
-		
+
 		i_row++;
 	}
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return;
 }
 
 
 
 //read bimbam mean genotype file and calculate kinship matrix
-bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin) 
+bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
 {
 	igzstream infile (file_geno.c_str(), igzstream::in);
 	//ifstream infile (file_geno.c_str(), ifstream::in);
 	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	size_t n_miss;
 	double d, geno_mean, geno_var;
-	
+
 	size_t ni_total=matrix_kin->size1;
 	gsl_vector *geno=gsl_vector_alloc (ni_total);
 	gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
@@ -934,11 +1059,11 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
 		!safeGetline(infile, line).eof();
 		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
 		if (indicator_snp[t]==0) {continue;}
-		
+
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		ch_ptr=strtok (NULL, " , \t");
 		ch_ptr=strtok (NULL, " , \t");
-		
+
 		geno_mean=0.0; n_miss=0; geno_var=0.0;
 		gsl_vector_set_all(geno_miss, 0);
 		for (size_t i=0; i<ni_total; ++i) {
@@ -952,44 +1077,49 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
 				geno_var+=d*d;
 			}
 		}
-		
+
 		geno_mean/=(double)(ni_total-n_miss);
 		geno_var+=geno_mean*geno_mean*(double)n_miss;
 		geno_var/=(double)ni_total;
 		geno_var-=geno_mean*geno_mean;
 //		geno_var=geno_mean*(1-geno_mean*0.5);
-		
+
 		for (size_t i=0; i<ni_total; ++i) {
 			if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
-		}		
-		
+		}
+
 		gsl_vector_add_constant (geno, -1.0*geno_mean);
-		
+
 		if (geno_var!=0) {
-			if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
-			else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
-			else {cout<<"Unknown kinship mode."<<endl;}
+		  if (k_mode==1) {
+		    gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);
+		    //eigenlib_dsyr (1.0, geno, matrix_kin);
+		  } else if (k_mode==2) {
+		    gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);
+		    //eigenlib_dsyr (1.0/geno_var, geno, matrix_kin);
+		  } else {
+		    cout<<"Unknown kinship mode."<<endl;
+		  }
 		}
-		
 		ns_test++;
-    }	
+    }
 	cout<<endl;
-	
+
 	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
-	
+
 	for (size_t i=0; i<ni_total; ++i) {
 		for (size_t j=0; j<i; ++j) {
 			d=gsl_matrix_get (matrix_kin, j, i);
 			gsl_matrix_set (matrix_kin, i, j, d);
 		}
 	}
-	
+
 	gsl_vector_free (geno);
 	gsl_vector_free (geno_miss);
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -999,23 +1129,23 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
 
 
 
-bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin) 
+bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
 {
 	ifstream infile (file_bed.c_str(), ios::binary);
 	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
-		
+
 	char ch[1];
 	bitset<8> b;
-	
+
 	size_t n_miss, ci_total;
 	double d, geno_mean, geno_var;
-	
+
 	size_t ni_total=matrix_kin->size1;
 	gsl_vector *geno=gsl_vector_alloc (ni_total);
 
 	size_t ns_test=0;
 	int n_bit;
-	
+
 	//calculate n_bit and c, the number of bit for each snp
 	if (ni_total%4==0) {n_bit=ni_total/4;}
 	else {n_bit=ni_total/4+1; }
@@ -1024,14 +1154,14 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
 	for (int i=0; i<3; ++i) {
 		infile.read(ch,1);
 		b=ch[0];
-	}	
-	
+	}
+
 	for (size_t t=0; t<indicator_snp.size(); ++t) {
 		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
 		if (indicator_snp[t]==0) {continue;}
-		
+
 		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers
-		
+
 		//read genotypes
 		geno_mean=0.0;	n_miss=0; ci_total=0; geno_var=0.0;
 		for (int i=0; i<n_bit; ++i) {
@@ -1045,51 +1175,51 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
 					else {gsl_vector_set(geno, ci_total, 1.0); geno_mean+=1.0; geno_var+=1.0;}
 				}
 				else {
-					if (b[2*j+1]==1) {gsl_vector_set(geno, ci_total, 0.0); }      
+					if (b[2*j+1]==1) {gsl_vector_set(geno, ci_total, 0.0); }
 					else {gsl_vector_set(geno, ci_total, -9.0); n_miss++; }
 				}
 
 				ci_total++;
 			}
 		}
-				
+
 		geno_mean/=(double)(ni_total-n_miss);
 		geno_var+=geno_mean*geno_mean*(double)n_miss;
 		geno_var/=(double)ni_total;
 		geno_var-=geno_mean*geno_mean;
 //		geno_var=geno_mean*(1-geno_mean*0.5);
-		
+
 		for (size_t i=0; i<ni_total; ++i) {
 			d=gsl_vector_get(geno,i);
 			if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
-		}		
-		
+		}
+
 		gsl_vector_add_constant (geno, -1.0*geno_mean);
-		
+
 		if (geno_var!=0) {
 			if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
 			else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
 			else {cout<<"Unknown kinship mode."<<endl;}
 		}
-		
+
 		ns_test++;
-    }	
+    }
 	cout<<endl;
-	
+
 	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
-	
+
 	for (size_t i=0; i<ni_total; ++i) {
 		for (size_t j=0; j<i; ++j) {
 			d=gsl_matrix_get (matrix_kin, j, i);
 			gsl_matrix_set (matrix_kin, i, j, d);
 		}
 	}
-	
+
 	gsl_vector_free (geno);
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
@@ -1103,65 +1233,65 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
 	igzstream infile (file_geno.c_str(), igzstream::in);
 //	ifstream infile (file_geno.c_str(), ifstream::in);
 	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	if (calc_K==true) {gsl_matrix_set_zero (K);}
-	
+
 	gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
 	gsl_vector *genotype_miss=gsl_vector_alloc (UtX->size1);
 	double geno, geno_mean;
 	size_t n_miss;
-	
+
 	int ni_total=(int)indicator_idv.size();
 	int ns_total=(int)indicator_snp.size();
 	int ni_test=UtX->size1;
 	int ns_test=UtX->size2;
-	
+
 	int c_idv=0, c_snp=0;
-	
+
 	for (int i=0; i<ns_total; ++i) {
 		!safeGetline(infile, line).eof();
-		if (indicator_snp[i]==0) {continue;}	
-		
+		if (indicator_snp[i]==0) {continue;}
+
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		ch_ptr=strtok (NULL, " , \t");
 		ch_ptr=strtok (NULL, " , \t");
-		
+
 		c_idv=0; geno_mean=0; n_miss=0;
 		gsl_vector_set_zero (genotype_miss);
 		for (int j=0; j<ni_total; ++j) {
 			ch_ptr=strtok (NULL, " , \t");
-			if (indicator_idv[j]==0) {continue;}			
-			
+			if (indicator_idv[j]==0) {continue;}
+
 			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++;}
-			else {			
+			else {
 				geno=atof(ch_ptr);
-				gsl_vector_set (genotype, c_idv, geno); 
+				gsl_vector_set (genotype, c_idv, geno);
 				geno_mean+=geno;
 			}
 			c_idv++;
 		}
-		
+
 		geno_mean/=(double)(ni_test-n_miss);
-		
-		for (size_t i=0; i<genotype->size; ++i) {			
+
+		for (size_t i=0; i<genotype->size; ++i) {
 			if (gsl_vector_get (genotype_miss, i)==1) {geno=0;}
 			else {geno=gsl_vector_get (genotype, i); geno-=geno_mean;}
-			
+
 			gsl_vector_set (genotype, i, geno);
 			gsl_matrix_set (UtX, i, c_snp, geno);
 		}
-		
+
 		if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
-		
+
 		c_snp++;
-	}	
-	
+	}
+
 	if (calc_K==true) {
 		gsl_matrix_scale (K, 1.0/(double)ns_test);
-		
+
 		for (size_t i=0; i<genotype->size; ++i) {
 			for (size_t j=0; j<i; ++j) {
 				geno=gsl_matrix_get (K, j, i);
@@ -1169,18 +1299,106 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
 			}
 		}
 	}
-	
+
 	gsl_vector_free (genotype);
 	gsl_vector_free (genotype_miss);
-	
+
 	infile.clear();
 	infile.close();
-	
+
 	return true;
 }
 
 
 
+//compact version of the above function, using uchar instead of gsl_matrix
+bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test)
+{
+	igzstream infile (file_geno.c_str(), igzstream::in);
+    //	ifstream infile (file_geno.c_str(), ifstream::in);
+	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+
+	Xt.clear();
+	vector<unsigned char> Xt_row;
+	for (size_t i=0; i<ni_test; i++) {
+	  Xt_row.push_back(0);
+	}
+
+	string line;
+	char *ch_ptr;
+
+	if (calc_K==true) {gsl_matrix_set_zero (K);}
+
+	gsl_vector *genotype=gsl_vector_alloc (ni_test);
+	gsl_vector *genotype_miss=gsl_vector_alloc (ni_test);
+	double geno, geno_mean;
+	size_t n_miss;
+
+	size_t ni_total= indicator_idv.size();
+	size_t ns_total= indicator_snp.size();
+
+	size_t c_idv=0, c_snp=0;
+
+	for (size_t i=0; i<ns_total; ++i) {
+		!safeGetline(infile, line).eof();
+		if (indicator_snp[i]==0) {continue;}
+
+		ch_ptr=strtok ((char *)line.c_str(), " , \t");
+		ch_ptr=strtok (NULL, " , \t");
+		ch_ptr=strtok (NULL, " , \t");
+
+		c_idv=0; geno_mean=0; n_miss=0;
+		gsl_vector_set_zero (genotype_miss);
+		for (uint j=0; j<ni_total; ++j) {
+			ch_ptr=strtok (NULL, " , \t");
+			if (indicator_idv[j]==0) {continue;}
+
+			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++;} else {
+				geno=atof(ch_ptr);
+				gsl_vector_set (genotype, c_idv, geno);
+				geno_mean+=geno;
+			}
+			c_idv++;
+		}
+
+		geno_mean/=(double)(ni_test-n_miss);
+
+		for (size_t j=0; j<genotype->size; ++j) {
+			if (gsl_vector_get (genotype_miss, j)==1) {
+			  geno=geno_mean;
+			} else {
+			  geno=gsl_vector_get (genotype, j);
+			}
+
+			Xt_row[j]=Double02ToUchar(geno);
+			gsl_vector_set (genotype, j, (geno-geno_mean));
+		}
+		Xt.push_back(Xt_row);
+
+		if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
+
+		c_snp++;
+	}
+
+	if (calc_K==true) {
+		gsl_matrix_scale (K, 1.0/(double)ns_test);
+
+		for (size_t i=0; i<genotype->size; ++i) {
+			for (size_t j=0; j<i; ++j) {
+				geno=gsl_matrix_get (K, j, i);
+				gsl_matrix_set (K, i, j, geno);
+			}
+		}
+	}
+
+	gsl_vector_free (genotype);
+	gsl_vector_free (genotype_miss);
+
+	infile.clear();
+	infile.close();
+
+	return true;
+}
 
 
 
@@ -1190,79 +1408,79 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
 {
 	ifstream infile (file_bed.c_str(), ios::binary);
 	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
-	
+
 	char ch[1];
 	bitset<8> b;
-	
-	int ni_total=(int)indicator_idv.size();
-	int ns_total=(int)indicator_snp.size();
-	int ni_test=UtX->size1;
-	int ns_test=UtX->size2;
+
+	size_t ni_total=indicator_idv.size();
+	size_t ns_total=indicator_snp.size();
+	size_t ni_test=UtX->size1;
+	size_t ns_test=UtX->size2;
 	int n_bit;
-	
+
 	if (ni_total%4==0) {n_bit=ni_total/4;}
 	else {n_bit=ni_total/4+1;}
-	
+
 	//print the first three majic numbers
 	for (int i=0; i<3; ++i) {
 		infile.read(ch,1);
 		b=ch[0];
 	}
-	
+
 	if (calc_K==true) {gsl_matrix_set_zero (K);}
-	
-	gsl_vector *genotype=gsl_vector_alloc (UtX->size1);	
-	
+
+	gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
+
 	double geno, geno_mean;
-	size_t n_miss;	
-	int c_idv=0, c_snp=0, c=0;
-	
+	size_t n_miss;
+	size_t c_idv=0, c_snp=0, c=0;
+
 	//start reading snps and doing association test
-	for (int t=0; t<ns_total; ++t) {
-		if (indicator_snp[t]==0) {continue;}	
+	for (size_t t=0; t<ns_total; ++t) {
+		if (indicator_snp[t]==0) {continue;}
 		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers
-		
+
 		//read genotypes
 		c_idv=0; geno_mean=0.0; n_miss=0; c=0;
 		for (int i=0; i<n_bit; ++i) {
 			infile.read(ch,1);
 			b=ch[0];
 			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
-				if ((i==(n_bit-1)) && c==ni_total) {break;}				
+			  if ((i==(n_bit-1)) && c==ni_total) {break;}
 				if (indicator_idv[c]==0) {c++; continue;}
 				c++;
-				
+
 				if (b[2*j]==0) {
 					if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); geno_mean+=2.0;}
 					else {gsl_vector_set(genotype, c_idv, 1.0); geno_mean+=1.0;}
 				}
 				else {
-					if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}                               
+					if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
 					else {gsl_vector_set(genotype, c_idv, -9.0); n_miss++;}
 				}
 				c_idv++;
 			}
 		}
-		
+
 		geno_mean/=(double)(ni_test-n_miss);
-		
-		for (size_t i=0; i<genotype->size; ++i) {		
+
+		for (size_t i=0; i<genotype->size; ++i) {
 			geno=gsl_vector_get (genotype, i);
 			if (geno==-9) {geno=0;}
 			else {geno-=geno_mean;}
-			
+
 			gsl_vector_set (genotype, i, geno);
 			gsl_matrix_set (UtX, i, c_snp, geno);
 		}
-		
+
 		if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
-		
+
 		c_snp++;
-	}	
-	
+	}
+
 	if (calc_K==true) {
 		gsl_matrix_scale (K, 1.0/(double)ns_test);
-		
+
 		for (size_t i=0; i<genotype->size; ++i) {
 			for (size_t j=0; j<i; ++j) {
 				geno=gsl_matrix_get (K, j, i);
@@ -1270,39 +1488,144 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
 			}
 		}
 	}
-	
-	gsl_vector_free (genotype);		  
+
+	gsl_vector_free (genotype);
 	infile.clear();
 	infile.close();
-	
+
 	return true;
 }
 
 
 
 
+//compact version of the above function, using uchar instead of gsl_matrix
+bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test)
+{
+	ifstream infile (file_bed.c_str(), ios::binary);
+	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+
+	Xt.clear();
+	vector<unsigned char> Xt_row;
+	for (size_t i=0; i<ni_test; i++) {
+	  Xt_row.push_back(0);
+	}
+
+	char ch[1];
+	bitset<8> b;
+
+	size_t ni_total=indicator_idv.size();
+	size_t ns_total=indicator_snp.size();
+	int n_bit;
+
+	if (ni_total%4==0) {n_bit=ni_total/4;}
+	else {n_bit=ni_total/4+1;}
+
+	//print the first three majic numbers
+	for (int i=0; i<3; ++i) {
+		infile.read(ch,1);
+		b=ch[0];
+	}
+
+	if (calc_K==true) {gsl_matrix_set_zero (K);}
+
+	gsl_vector *genotype=gsl_vector_alloc (ni_test);
+
+	double geno, geno_mean;
+	size_t n_miss;
+	size_t c_idv=0, c_snp=0, c=0;
+
+	//start reading snps and doing association test
+	for (size_t t=0; t<ns_total; ++t) {
+		if (indicator_snp[t]==0) {continue;}
+		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers
+
+		//read genotypes
+		c_idv=0; geno_mean=0.0; n_miss=0; c=0;
+		for (int i=0; i<n_bit; ++i) {
+			infile.read(ch,1);
+			b=ch[0];
+			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
+			  if ((i==(n_bit-1)) && c==ni_total) {break;}
+				if (indicator_idv[c]==0) {c++; continue;}
+				c++;
+
+				if (b[2*j]==0) {
+					if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); geno_mean+=2.0;}
+					else {gsl_vector_set(genotype, c_idv, 1.0); geno_mean+=1.0;}
+				}
+				else {
+					if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
+					else {gsl_vector_set(genotype, c_idv, -9.0); n_miss++;}
+				}
+				c_idv++;
+			}
+		}
+
+		geno_mean/=(double)(ni_test-n_miss);
+
+		for (size_t i=0; i<genotype->size; ++i) {
+			geno=gsl_vector_get (genotype, i);
+			if (geno==-9) {geno=geno_mean;}
+
+			Xt_row[i]=Double02ToUchar(geno);
+
+			geno-=geno_mean;
+
+			gsl_vector_set (genotype, i, geno);
+		}
+		Xt.push_back(Xt_row);
+
+		if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
+
+		c_snp++;
+	}
+
+	if (calc_K==true) {
+		gsl_matrix_scale (K, 1.0/(double)ns_test);
+
+		for (size_t i=0; i<genotype->size; ++i) {
+			for (size_t j=0; j<i; ++j) {
+				geno=gsl_matrix_get (K, j, i);
+				gsl_matrix_set (K, i, j, geno);
+			}
+		}
+	}
+
+	gsl_vector_free (genotype);
+	infile.clear();
+	infile.close();
+
+	return true;
+}
+
+
+
+
+
+
 
 bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map<string, double> &mapRS2est)
 {
 	mapRS2est.clear();
-	
+
 	ifstream infile (file_est.c_str(), ifstream::in);
 	if (!infile) {cout<<"error opening estimated parameter file: "<<file_est<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	string rs;
 	double alpha, beta, gamma, d;
-	
+
 	//header
 	getline(infile, line);
-	
+
 	size_t n=*max_element(est_column.begin(), est_column.end());
-	
+
 	while (getline(infile, line)) {
-		ch_ptr=strtok ((char *)line.c_str(), " \t");		
-		
+		ch_ptr=strtok ((char *)line.c_str(), " \t");
+
 		alpha=0.0; beta=0.0; gamma=1.0;
 		for (size_t i=0; i<n+1; ++i) {
 			if (i==est_column[0]-1) {rs=ch_ptr;}
@@ -1311,9 +1634,9 @@ bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map
 			if (i==est_column[3]-1) {gamma=atof(ch_ptr);}
 			if (i<n) {ch_ptr=strtok (NULL, " \t");}
 		}
-		
+
 		d=alpha+beta*gamma;
-		
+
 		if (mapRS2est.count(rs)==0) {
 			mapRS2est[rs]=d;
 		}
@@ -1321,7 +1644,7 @@ bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map
 			cout<<"the same SNP occurs more than once in estimated parameter file: "<<rs<<endl; return false;
 		}
 	}
-	
+
 	infile.clear();
 	infile.close();
 	return true;
@@ -1337,7 +1660,7 @@ bool CountFileLines (const string &file_input, size_t &n_lines)
 
 	n_lines=count(istreambuf_iterator<char>(infile), istreambuf_iterator<char>(), '\n');
 	infile.seekg (0, ios::beg);
-	
+
 	return true;
 }
 
@@ -1348,25 +1671,25 @@ bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SN
 {
 	vec_read.clear();
 	ng_total=0;
-	
-	ifstream infile (file_gene.c_str(), ifstream::in);
+
+	igzstream infile (file_gene.c_str(), igzstream::in);
 	if (!infile) {cout<<"error! fail to open gene expression file: "<<file_gene<<endl; return false;}
-	
+
 	string line;
 	char *ch_ptr;
 	string rs;
-	
+
 	size_t n_idv=0, t=0;
-	
+
 	//header
 	getline(infile, line);
-	
+
 	while (getline(infile, line)) {
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		rs=ch_ptr;
-		
-		ch_ptr=strtok (NULL, " , \t");	
-		
+
+		ch_ptr=strtok (NULL, " , \t");
+
 		t=0;
 		while (ch_ptr!=NULL) {
 			if (ng_total==0) {
@@ -1374,25 +1697,1482 @@ bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SN
 				t++;
 				n_idv++;
 			} else {
-				vec_read[t]+=atof(ch_ptr);		
+				vec_read[t]+=atof(ch_ptr);
 				t++;
 			}
-			
-			ch_ptr=strtok (NULL, " , \t");	
+
+			ch_ptr=strtok (NULL, " , \t");
 		}
-		
+
 		if (t!=n_idv) {cout<<"error! number of columns doesn't match in row: "<<ng_total<<endl; return false;}
-		
-		SNPINFO sInfo={"-9", rs, -9, -9, "-9", "-9", -9, -9, -9};
+
+		SNPINFO sInfo={"-9", rs, -9, -9, "-9", "-9", 0, -9, -9, 0, 0, 0};
 		snpInfo.push_back(sInfo);
-		
+
 		ng_total++;
 	}
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return true;
 }
 
 
+
+
+
+
+
+// WJA Added
+//Read Oxford sample file
+bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, const vector<size_t> &p_column, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt)
+{
+	indicator_pheno.clear();
+	pheno.clear();
+	indicator_cvt.clear();
+
+	igzstream infile (file_sample.c_str(), igzstream::in);
+
+	if (!infile) {cout<<"error! fail to open sample file: "<<file_sample<<endl; return false;}
+
+	string line;
+	char *ch_ptr;
+
+
+	string id;
+	double p,d;
+
+	vector<double> pheno_row;
+	vector<int> ind_pheno_row;
+	int flag_na=0;
+
+	size_t num_cols=0;
+	size_t num_p_in_file=0;
+	size_t num_cvt_in_file=0;
+
+//	size_t p_max=*max_element(p_column.begin(), p_column.end());
+
+	map<size_t, size_t> mapP2c;
+	for (size_t i=0; i<p_column.size(); i++) {
+		mapP2c[p_column[i]]=i;
+		pheno_row.push_back(-9);
+		ind_pheno_row.push_back(0);
+	}
+	// read header line1
+	if(!safeGetline(infile, line).eof()) {
+		ch_ptr=strtok((char *)line.c_str(), " ");
+		if(strcmp(ch_ptr, "ID_1")!=0) {return false;}
+		ch_ptr=strtok(NULL, " ");
+		if(strcmp(ch_ptr, "ID_2")!=0) {return false;}
+		ch_ptr=strtok(NULL, " ");
+		if(strcmp(ch_ptr, "missing")!=0) {return false;}
+		while (ch_ptr!=NULL) {
+			num_cols++;
+			ch_ptr=strtok (NULL, " ");
+
+		}
+		num_cols--;
+	}
+
+	vector<map<uint32_t, size_t> > cvt_factor_levels;
+
+	char col_type[num_cols];
+	// read header line2
+	if(!safeGetline(infile, line).eof()) {
+		ch_ptr=strtok ((char *)line.c_str(), " ");
+		if(strcmp(ch_ptr, "0")!=0) {return false;}
+		ch_ptr=strtok(NULL, " ");
+		if(strcmp(ch_ptr, "0")!=0) {return false;}
+		ch_ptr=strtok(NULL, " ");
+		if(strcmp(ch_ptr, "0")!=0) {return false;}
+		size_t it=0;
+		ch_ptr=strtok (NULL, " ");
+		if(ch_ptr!=NULL)
+			while(ch_ptr!=NULL){
+				col_type[it++]=ch_ptr[0];
+				if(ch_ptr[0]=='D') {cvt_factor_levels.push_back(map<uint32_t, size_t>());num_cvt_in_file++;}
+				if(ch_ptr[0]=='C') {num_cvt_in_file++;}
+				if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) {num_p_in_file++;}
+				ch_ptr=strtok(NULL, " ");
+			}
+
+	}
+
+	while (!safeGetline(infile, line).eof()) {
+
+		ch_ptr=strtok ((char *)line.c_str(), " ");
+
+		for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " ");}
+
+
+		size_t i=0;
+		size_t p_i=0;
+		size_t fac_cvt_i=0;
+
+		while (i<num_cols) {
+
+			if((col_type[i]=='P')||(col_type[i]=='B'))
+			{
+				if (mapP2c.count(p_i+1)!=0) {
+					if (strcmp(ch_ptr, "NA")==0) {ind_pheno_row[mapP2c[p_i+1]]=0; pheno_row[mapP2c[p_i+1]]=-9;}
+					else {p=atof(ch_ptr); ind_pheno_row[mapP2c[p_i+1]]=1; pheno_row[mapP2c[p_i+1]]=p;}
+				}
+				p_i++;
+			}
+			if(col_type[i]=='D')
+			{
+				// NOTE THIS DOES NOT CHECK TO BE SURE LEVEL IS INTEGRAL i.e for atoi error
+				if (strcmp(ch_ptr, "NA")!=0) {uint32_t level=atoi(ch_ptr); if(cvt_factor_levels[fac_cvt_i].count(level) == 0) {cvt_factor_levels[fac_cvt_i][level]=cvt_factor_levels[fac_cvt_i].size();}}
+				fac_cvt_i++;
+			}
+
+			ch_ptr=strtok (NULL, " ");
+			i++;
+		}
+
+
+		indicator_pheno.push_back(ind_pheno_row);
+		pheno.push_back(pheno_row);
+
+	}
+	// close and reopen the file
+ 	infile.close();
+ 	infile.clear();
+
+	if(num_cvt_in_file>0)
+	{
+		igzstream infile2 (file_sample.c_str(), igzstream::in);
+
+		if (!infile2) {cout<<"error! fail to open sample file: "<<file_sample<<endl; return false;}
+		// skip header
+		safeGetline(infile2, line);
+		safeGetline(infile2, line);
+
+		// pull in the covariates now we now the number of factor levels
+		while (!safeGetline(infile2, line).eof()) {
+
+			vector<double> v_d; flag_na=0;
+			ch_ptr=strtok ((char *)line.c_str(), " ");
+
+			for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " ");}
+
+
+			size_t i=0;
+			size_t fac_cvt_i=0;
+			size_t num_fac_levels;
+			while (i<num_cols) {
+
+				if(col_type[i]=='C')
+				{
+					if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
+					else {d=atof(ch_ptr);}
+
+					v_d.push_back(d);
+				}
+
+
+				if(col_type[i]=='D')
+				{
+					// NOTE THIS DOES NOT CHECK TO BE SURE LEVEL IS INTEGRAL i.e for atoi error
+					num_fac_levels=cvt_factor_levels[fac_cvt_i].size();
+					if(num_fac_levels>1)
+					{
+						if (strcmp(ch_ptr, "NA")==0) {flag_na=1; for(size_t it=0;it<num_fac_levels-1; it++) {v_d.push_back(-9);}}
+						else {uint32_t level=atoi(ch_ptr); for(size_t it=0;it<num_fac_levels-1;it++) {cvt_factor_levels[fac_cvt_i][level]==it+1 ? v_d.push_back(1.0) : v_d.push_back(0.0); }}
+					}
+					fac_cvt_i++;
+				}
+
+				ch_ptr=strtok (NULL, " ");
+				i++;
+			}
+
+			if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);}
+			cvt.push_back(v_d);
+
+
+		}
+
+		if (indicator_cvt.empty()) {n_cvt=0;}
+		else {
+			flag_na=0;
+			for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
+				if (indicator_cvt[i]==0) {continue;}
+
+				if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
+					if (flag_na!=0 && n_cvt!=cvt[i].size()) {cout<<"error! number of covariates in row "<<i<<" do not match other rows."<<endl; return false;}
+			}
+		}
+
+		infile2.close();
+		infile2.clear();
+	}
+ 	return true;
+}
+
+
+
+// WJA Added
+//Read bgen file, the first time
+#include <cstdint>
+#include <assert.h>
+bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test)
+{
+
+	indicator_snp.clear();
+
+	ifstream infile (file_bgen.c_str(), ios::binary);
+	if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return false;}
+
+	gsl_vector *genotype=gsl_vector_alloc (W->size1);
+	gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
+	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
+	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
+	gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
+	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+
+	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+	int sig;
+	LUDecomp (WtW, pmt, &sig);
+	LUInvert (WtW, pmt, WtWi);
+
+	// read in header
+	uint32_t bgen_snp_block_offset;
+	uint32_t bgen_header_length;
+	uint32_t bgen_nsamples;
+	uint32_t bgen_nsnps;
+	uint32_t bgen_flags;
+	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+	bgen_snp_block_offset-=4;
+	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+	bgen_snp_block_offset-=4;
+	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+	bgen_snp_block_offset-=4;
+	infile.ignore(4+bgen_header_length-20);
+	bgen_snp_block_offset-=4+bgen_header_length-20;
+	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+	bgen_snp_block_offset-=4;
+	bool CompressedSNPBlocks=bgen_flags&0x1;
+	bool LongIds=bgen_flags&0x4;
+
+	if(!LongIds) {return false;}
+
+	infile.ignore(bgen_snp_block_offset);
+
+	ns_test=0;
+
+	size_t ns_total=static_cast<size_t>(bgen_nsnps);
+
+	snpInfo.clear();
+	string rs;
+	long int b_pos;
+	string chr;
+//	double cM;
+	string major;
+	string minor;
+	string id;
+
+	double v_x, v_w;
+	int c_idv=0;
+
+
+	double maf, geno, geno_old;
+	size_t n_miss;
+	size_t n_0, n_1, n_2;
+	int flag_poly;
+
+	double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+
+	size_t ni_total=indicator_idv.size();   // total number of samples in phenotype file
+	size_t ni_test=0;   // number of samples to use in test
+
+	uint32_t bgen_N;
+	uint16_t bgen_LS;
+	uint16_t bgen_LR;
+	uint16_t bgen_LC;
+	uint32_t bgen_SNP_pos;
+	uint32_t bgen_LA;
+	std::string bgen_A_allele;
+	uint32_t bgen_LB;
+	std::string bgen_B_allele;
+	uint32_t bgen_P;
+	size_t unzipped_data_size;
+
+	for (size_t i=0; i<ni_total; ++i) {
+
+		ni_test+=indicator_idv[i];
+	}
+
+
+
+//	ns_total=1;
+	for (size_t t=0; t<ns_total; ++t) {
+
+		id.clear();
+		rs.clear();
+		chr.clear();
+		bgen_A_allele.clear();
+		bgen_B_allele.clear();
+
+		infile.read(reinterpret_cast<char*>(&bgen_N),4);
+		infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+		id.resize(bgen_LS);
+		infile.read(&id[0], bgen_LS);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+		rs.resize(bgen_LR);
+		infile.read(&rs[0], bgen_LR);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+		chr.resize(bgen_LC);
+		infile.read(&chr[0], bgen_LC);
+
+		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+		bgen_A_allele.resize(bgen_LA);
+		infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+		bgen_B_allele.resize(bgen_LB);
+		infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+		// should we switch according to MAF?
+		minor=bgen_B_allele;
+		major=bgen_A_allele;
+		b_pos=static_cast<long int>(bgen_SNP_pos);
+
+		uint16_t unzipped_data[3*bgen_N];
+
+		if (setSnps.size()!=0 && setSnps.count(rs)==0) {
+			SNPINFO sInfo={"-9", rs, -9, -9, minor, major, -9, -9, -9};
+			snpInfo.push_back(sInfo);
+			indicator_snp.push_back(0);
+			if(CompressedSNPBlocks)
+				infile.read(reinterpret_cast<char*>(&bgen_P),4);
+			else
+				bgen_P=6*bgen_N;
+
+			infile.ignore(static_cast<size_t>(bgen_P));
+
+			continue;
+		}
+
+
+		if(CompressedSNPBlocks)
+		{
+			infile.read(reinterpret_cast<char*>(&bgen_P),4);
+			uint8_t zipped_data[bgen_P];
+
+			unzipped_data_size=6*bgen_N;
+
+			infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+			int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+			assert(result == Z_OK);
+
+		}
+		else
+		{
+			bgen_P=6*bgen_N;
+			infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+
+		}
+
+
+		maf=0; n_miss=0; flag_poly=0; geno_old=-9;
+		n_0=0; n_1=0; n_2=0;
+		c_idv=0;
+		gsl_vector_set_zero (genotype_miss);
+		for (size_t i=0; i<bgen_N; ++i) {
+			// CHECK this set correctly!
+			if (indicator_idv[i]==0) {continue;}
+
+
+			bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+			bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+			bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+			bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+
+			//CHECK 0.1 OK
+			if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue;}
+
+
+			bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+			bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+			bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+			geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+			if (geno>=0 && geno<=0.5) {n_0++;}
+			if (geno>0.5 && geno<1.5) {n_1++;}
+			if (geno>=1.5 && geno<=2.0) {n_2++;}
+
+			gsl_vector_set (genotype, c_idv, geno);
+
+			// CHECK WHAT THIS DOES
+			if (flag_poly==0) {geno_old=geno; flag_poly=2;}
+			if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
+
+			maf+=geno;
+
+			c_idv++;
+		}
+
+		maf/=2.0*static_cast<double>(ni_test-n_miss);
+
+		SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf};
+		snpInfo.push_back(sInfo);
+
+		if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
+
+		if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
+
+		if (flag_poly!=1) {indicator_snp.push_back(0); continue;}
+
+		if (hwe_level!=0 && maf_level!=-1) {
+			if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
+		}
+
+		//filter SNP if it is correlated with W
+		//unless W has only one column, of 1s
+		for (size_t i=0; i<genotype->size; ++i) {
+			if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
+		}
+
+		gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
+		gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+		gsl_blas_ddot (genotype, genotype, &v_x);
+		gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
+
+		if (W->size2!=1 && v_w/v_x >= r2_level) {indicator_snp.push_back(0); continue;}
+
+		indicator_snp.push_back(1);
+		ns_test++;
+
+	}
+
+
+
+
+	return true;
+
+}
+
+
+//read oxford genotype file and calculate kinship matrix
+bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
+{
+	string file_bgen=file_oxford;
+	ifstream infile (file_bgen.c_str(), ios::binary);
+	if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return false;}
+
+
+	// read in header
+	uint32_t bgen_snp_block_offset;
+	uint32_t bgen_header_length;
+	uint32_t bgen_nsamples;
+	uint32_t bgen_nsnps;
+	uint32_t bgen_flags;
+	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+	bgen_snp_block_offset-=4;
+	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+	bgen_snp_block_offset-=4;
+	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+	bgen_snp_block_offset-=4;
+	infile.ignore(4+bgen_header_length-20);
+	bgen_snp_block_offset-=4+bgen_header_length-20;
+	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+	bgen_snp_block_offset-=4;
+	bool CompressedSNPBlocks=bgen_flags&0x1;
+//	bool LongIds=bgen_flags&0x4;
+
+	infile.ignore(bgen_snp_block_offset);
+
+	double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+	uint32_t bgen_N;
+	uint16_t bgen_LS;
+	uint16_t bgen_LR;
+	uint16_t bgen_LC;
+	uint32_t bgen_SNP_pos;
+	uint32_t bgen_LA;
+	std::string bgen_A_allele;
+	uint32_t bgen_LB;
+	std::string bgen_B_allele;
+	uint32_t bgen_P;
+	size_t unzipped_data_size;
+	string id;
+	string rs;
+	string chr;
+	double genotype;
+
+
+	size_t n_miss;
+	double d, geno_mean, geno_var;
+
+	size_t ni_total=matrix_kin->size1;
+	gsl_vector *geno=gsl_vector_alloc (ni_total);
+	gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
+
+	size_t ns_test=0;
+	for (size_t t=0; t<indicator_snp.size(); ++t) {
+
+		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
+
+		id.clear();
+		rs.clear();
+		chr.clear();
+		bgen_A_allele.clear();
+		bgen_B_allele.clear();
+
+		infile.read(reinterpret_cast<char*>(&bgen_N),4);
+		infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+		id.resize(bgen_LS);
+		infile.read(&id[0], bgen_LS);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+		rs.resize(bgen_LR);
+		infile.read(&rs[0], bgen_LR);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+		chr.resize(bgen_LC);
+		infile.read(&chr[0], bgen_LC);
+
+		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+		bgen_A_allele.resize(bgen_LA);
+		infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+		bgen_B_allele.resize(bgen_LB);
+		infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+
+
+		uint16_t unzipped_data[3*bgen_N];
+
+		if (indicator_snp[t]==0) {
+			if(CompressedSNPBlocks)
+				infile.read(reinterpret_cast<char*>(&bgen_P),4);
+			else
+				bgen_P=6*bgen_N;
+
+			infile.ignore(static_cast<size_t>(bgen_P));
+
+			continue;
+		}
+
+
+
+		if(CompressedSNPBlocks)
+		{
+
+
+			infile.read(reinterpret_cast<char*>(&bgen_P),4);
+			uint8_t zipped_data[bgen_P];
+
+			unzipped_data_size=6*bgen_N;
+
+			infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+
+			int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+			assert(result == Z_OK);
+
+		}
+		else
+		{
+
+			bgen_P=6*bgen_N;
+			infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+		}
+
+
+
+		geno_mean=0.0; n_miss=0; geno_var=0.0;
+		gsl_vector_set_all(geno_miss, 0);
+
+		for (size_t i=0; i<bgen_N; ++i) {
+
+
+				bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+				bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+				bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+				// WJA
+				bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+				if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(geno_miss, i, 0.0); n_miss++;}
+				else {
+
+					bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+					bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+					bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+					genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+					gsl_vector_set(geno, i, genotype);
+					gsl_vector_set(geno_miss, i, 1.0);
+					geno_mean+=genotype;
+					geno_var+=genotype*genotype;
+				}
+
+		}
+
+
+		geno_mean/=(double)(ni_total-n_miss);
+		geno_var+=geno_mean*geno_mean*(double)n_miss;
+		geno_var/=(double)ni_total;
+		geno_var-=geno_mean*geno_mean;
+//		geno_var=geno_mean*(1-geno_mean*0.5);
+
+		for (size_t i=0; i<ni_total; ++i) {
+			if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+		}
+
+		gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+		if (geno_var!=0) {
+			if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
+			else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
+			else {cout<<"Unknown kinship mode."<<endl;}
+		}
+
+		ns_test++;
+    }
+	cout<<endl;
+
+	gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
+
+	for (size_t i=0; i<ni_total; ++i) {
+		for (size_t j=0; j<i; ++j) {
+			d=gsl_matrix_get (matrix_kin, j, i);
+			gsl_matrix_set (matrix_kin, i, j, d);
+		}
+	}
+
+	gsl_vector_free (geno);
+	gsl_vector_free (geno_miss);
+
+	infile.close();
+	infile.clear();
+
+	return true;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//read header to determine which column contains which item
+bool ReadHeader (const string &line, HEADER &header)
+{
+  string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID"};
+  set<string> rs_set(rs_ptr, rs_ptr+10);
+  string chr_ptr[]={"chr","CHR"};
+  set<string> chr_set(chr_ptr, chr_ptr+2);
+  string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", "bp", "BP"};
+  set<string> pos_set(pos_ptr, pos_ptr+8);
+  string cm_ptr[]={"cm","CM"};
+  set<string> cm_set(cm_ptr, cm_ptr+2);
+  string a1_ptr[]={"a1","A1","allele1","ALLELE1"};
+  set<string> a1_set(a1_ptr, a1_ptr+4);
+  string a0_ptr[]={"a0","A0","allele0","ALLELE0"};
+  set<string> a0_set(a0_ptr, a0_ptr+4);
+
+  string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
+  set<string> z_set(z_ptr, z_ptr+6);
+  string beta_ptr[]={"beta","BETA","b","B"};
+  set<string> beta_set(beta_ptr, beta_ptr+4);
+  string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"};
+  set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4);
+  string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"};
+  set<string> chisq_set(chisq_ptr, chisq_ptr+4);
+  string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"};
+  set<string> p_set(p_ptr, p_ptr+6);
+
+  string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"};
+  set<string> n_set(n_ptr, n_ptr+6);
+  string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"};
+  set<string> nmis_set(nmis_ptr, nmis_ptr+6);
+  string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"};
+  set<string> nobs_set(nobs_ptr, nobs_ptr+4);
+
+  string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq","ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"};
+  set<string> af_set(af_ptr, af_ptr+10);
+  string var_ptr[]={"var","VAR"};
+  set<string> var_set(var_ptr, var_ptr+2);
+
+  string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"};
+  set<string> ws_set(ws_ptr, ws_ptr+4);
+  string cor_ptr[]={"cor","COR","r","R"};
+  set<string> cor_set(cor_ptr, cor_ptr+4);
+
+  header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; header.cor_col=0; header.coln=0;
+
+  char *ch_ptr;
+  string type;
+  size_t n_error=0;
+
+  ch_ptr=strtok ((char *)line.c_str(), " , \t");
+  while (ch_ptr!=NULL) {
+    type=ch_ptr;
+    if (rs_set.count(type)!=0) {
+      if (header.rs_col==0) {header.rs_col=header.coln+1;} else {cout<<"error! more than two rs columns in the file."<<endl; n_error++;}
+    } else if (chr_set.count(type)!=0) {
+      if (header.chr_col==0) {header.chr_col=header.coln+1;} else {cout<<"error! more than two chr columns in the file."<<endl; n_error++;}
+    } else if (pos_set.count(type)!=0) {
+      if (header.pos_col==0) {header.pos_col=header.coln+1;} else {cout<<"error! more than two pos columns in the file."<<endl; n_error++;}
+    } else if (cm_set.count(type)!=0) {
+      if (header.cm_col==0) {header.cm_col=header.coln+1;} else {cout<<"error! more than two cm columns in the file."<<endl; n_error++;}
+    } else if (a1_set.count(type)!=0) {
+      if (header.a1_col==0) {header.a1_col=header.coln+1;} else {cout<<"error! more than two allele1 columns in the file."<<endl; n_error++;}
+    } else if (a0_set.count(type)!=0) {
+      if (header.a0_col==0) {header.a0_col=header.coln+1;} else {cout<<"error! more than two allele0 columns in the file."<<endl; n_error++;}
+    } else if (z_set.count(type)!=0) {
+      if (header.z_col==0) {header.z_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
+    } else if (beta_set.count(type)!=0) {
+      if (header.beta_col==0) {header.beta_col=header.coln+1;} else {cout<<"error! more than two beta columns in the file."<<endl; n_error++;}
+    } else if (sebeta_set.count(type)!=0) {
+      if (header.sebeta_col==0) {header.sebeta_col=header.coln+1;} else {cout<<"error! more than two se_beta columns in the file."<<endl; n_error++;}
+    } else if (chisq_set.count(type)!=0) {
+      if (header.chisq_col==0) {header.chisq_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
+    } else if (p_set.count(type)!=0) {
+      if (header.p_col==0) {header.p_col=header.coln+1;} else {cout<<"error! more than two p columns in the file."<<endl; n_error++;}
+    } else if (n_set.count(type)!=0) {
+      if (header.n_col==0) {header.n_col=header.coln+1;} else {cout<<"error! more than two n_total columns in the file."<<endl; n_error++;}
+    } else if (nmis_set.count(type)!=0) {
+      if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else {cout<<"error! more than two n_mis columns in the file."<<endl; n_error++;}
+    } else if (nobs_set.count(type)!=0) {
+      if (header.nobs_col==0) {header.nobs_col=header.coln+1;} else {cout<<"error! more than two n_obs columns in the file."<<endl; n_error++;}
+    } else if (ws_set.count(type)!=0) {
+      if (header.ws_col==0) {header.ws_col=header.coln+1;} else {cout<<"error! more than two window_size columns in the file."<<endl; n_error++;}
+    } else if (af_set.count(type)!=0) {
+      if (header.af_col==0) {header.af_col=header.coln+1;} else {cout<<"error! more than two af columns in the file."<<endl; n_error++;}
+    } else if (cor_set.count(type)!=0) {
+      if (header.cor_col==0) {header.cor_col=header.coln+1;} else {cout<<"error! more than two cor columns in the file."<<endl; n_error++;}
+    } else {}
+
+    ch_ptr=strtok (NULL, " , \t");
+    header.coln++;
+  }
+
+  if (header.cor_col!=0 && header.cor_col!=header.coln) {cout<<"error! the cor column should be the last column."<<endl; n_error++;}
+
+  if (header.rs_col==0) {
+    if (header.chr_col!=0 && header.pos_col!=0) {
+      cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl;
+    } else {
+      cout<<"error! missing an rs column."<<endl; n_error++;
+    }
+  }
+
+  if (n_error==0) {return true;} else {return false;}
+}
+
+
+
+
+//read category file, record mapRS2in
+//the category file does not contain a null category
+//so if a snp has 0 for all categories, then it is not included in the analysis
+bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_t &n_vc)
+{
+  mapRS2cat.clear();
+
+  igzstream infile (file_cat.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return false;}
+
+  string line;
+  char *ch_ptr;
+
+  string rs, chr, a1, a0, pos, cm;
+  size_t i_cat;// ns_vc=0;
+
+  //read header
+  HEADER header;
+  !safeGetline(infile, line).eof();
+  ReadHeader (line, header);
+
+  //use the header to count the number of categories
+  n_vc=header.coln;
+  if (header.rs_col!=0) {n_vc--;}
+  if (header.chr_col!=0) {n_vc--;}
+  if (header.pos_col!=0) {n_vc--;}
+  if (header.cm_col!=0) {n_vc--;}
+  if (header.a1_col!=0) {n_vc--;}
+  if (header.a0_col!=0) {n_vc--;}
+
+  //read the following lines to record mapRS2cat
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+    i_cat=0;
+    for (size_t i=0; i<header.coln; i++) {
+      if (header.rs_col!=0 && header.rs_col==i+1) {
+	rs=ch_ptr;
+      } else if (header.chr_col!=0 && header.chr_col==i+1) {
+	chr=ch_ptr;
+      } else if (header.pos_col!=0 && header.pos_col==i+1) {
+	pos=ch_ptr;
+      } else if (header.cm_col!=0 && header.cm_col==i+1) {
+	cm=ch_ptr;
+      } else if (header.a1_col!=0 && header.a1_col==i+1) {
+	a1=ch_ptr;
+      } else if (header.a0_col!=0 && header.a0_col==i+1) {
+	a0=ch_ptr;
+      } else if (atoi(ch_ptr)==1 || atoi(ch_ptr)==0) {
+	if (i_cat==0) {
+	  if (header.rs_col==0) {
+	    rs=chr+":"+pos;
+	  }
+	}
+
+	if (atoi(ch_ptr)==1 && mapRS2cat.count(rs)==0) {mapRS2cat[rs]=i_cat;}
+	i_cat++;
+      } else {}
+
+      ch_ptr=strtok (NULL, " , \t");
+    }
+
+    //if (mapRS2cat.count(rs)==0) {mapRS2cat[rs]=n_vc+1; ns_vc++;}
+  }
+
+  //if (ns_vc>0) {n_vc++;}
+
+  infile.clear();
+  infile.close();
+
+  return true;
+}
+
+
+
+
+//read bimbam mean genotype file and calculate kinship matrix; this time, the kinship matrix is not centered, and can contain multiple K matrix
+bool BimbamKin (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, const int k_mode, const int display_pace, const map<string, size_t> &mapRS2cat, map<string, double> &mapRS2var, vector<SNPINFO> &snpInfo, gsl_matrix *matrix_kin)
+{
+	igzstream infile (file_geno.c_str(), igzstream::in);
+	//ifstream infile (file_geno.c_str(), ifstream::in);
+	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+
+	string line;
+	char *ch_ptr;
+
+	size_t n_miss;
+	double d, geno_mean, geno_var;
+
+	size_t ni_test=matrix_kin->size1;
+	gsl_vector *geno=gsl_vector_alloc (ni_test);
+	gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
+
+	size_t n_vc=matrix_kin->size2/ni_test, i_vc;
+	string rs;
+	vector<size_t> ns_vec;
+	for (size_t i=0; i<n_vc; i++) {
+	  ns_vec.push_back(0);
+	}
+
+	size_t ns_test=0;
+	for (size_t t=0; t<indicator_snp.size(); ++t) {
+		!safeGetline(infile, line).eof();
+		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
+		if (indicator_snp[t]==0) {continue;}
+
+		ch_ptr=strtok ((char *)line.c_str(), " , \t");
+		ch_ptr=strtok (NULL, " , \t");
+		ch_ptr=strtok (NULL, " , \t");
+
+		rs=snpInfo[t].rs_number;//this line is new
+
+		geno_mean=0.0; n_miss=0; geno_var=0.0;
+		gsl_vector_set_all(geno_miss, 0);
+
+		size_t j=0;
+		for (size_t i=0; i<indicator_idv.size(); ++i) {
+		  if (indicator_idv[i]==0) {continue;}
+			ch_ptr=strtok (NULL, " , \t");
+			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(geno_miss, i, 0); n_miss++;}
+			else {
+				d=atof(ch_ptr);
+				gsl_vector_set (geno, j, d);
+				gsl_vector_set (geno_miss, j, 1);
+				geno_mean+=d;
+				geno_var+=d*d;
+			}
+			j++;
+		}
+
+		geno_mean/=(double)(ni_test-n_miss);
+		geno_var+=geno_mean*geno_mean*(double)n_miss;
+		geno_var/=(double)ni_test;
+		geno_var-=geno_mean*geno_mean;
+//		geno_var=geno_mean*(1-geno_mean*0.5);
+
+		for (size_t i=0; i<ni_test; ++i) {
+			if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+		}
+
+		//this line is new; removed
+		//gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+		if (geno_var!=0) {
+		  mapRS2var[rs]=geno_var;
+
+		  if (k_mode==1) {
+		    if (n_vc==1 || mapRS2cat.size()==0 ) {
+		      gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);
+                ns_vec[0]++;
+		    } else if (mapRS2cat.count(rs)!=0) {
+		      i_vc=mapRS2cat.at(rs);
+		      ns_vec[i_vc]++;
+		      gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
+		      gsl_blas_dsyr (CblasUpper, 1.0, geno, &kin_sub.matrix);
+		    }
+
+		    //eigenlib_dsyr (1.0, geno, matrix_kin);
+		  } else if (k_mode==2) {
+		    if (n_vc==1 || mapRS2cat.size()==0 ) {
+		      gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);
+                ns_vec[0]++;
+		    } else if (mapRS2cat.count(rs)!=0) {
+		      i_vc=mapRS2cat.at(rs);
+		      ns_vec[i_vc]++;
+		      gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
+		      gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, &kin_sub.matrix);
+		    }
+		  } else {
+		    cout<<"Unknown kinship mode."<<endl;
+		  }
+		}
+		ns_test++;
+    }
+	cout<<endl;
+
+	for (size_t t=0; t<n_vc; t++) {
+	  if (ns_vec[t]!=0) {gsl_matrix_scale (matrix_kin, 1.0/(double)ns_vec[t]);}
+
+	  for (size_t i=0; i<ni_test; ++i) {
+	    for (size_t j=0; j<i; ++j) {
+	      d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
+	      gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
+	    }
+	  }
+	}
+
+	gsl_vector_free (geno);
+	gsl_vector_free (geno_miss);
+
+	infile.close();
+	infile.clear();
+
+	return true;
+}
+
+
+
+
+
+
+
+bool PlinkKin (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, const int k_mode, const int display_pace, const map<string, size_t> &mapRS2cat, map<string, double> &mapRS2var, vector<SNPINFO> &snpInfo, gsl_matrix *matrix_kin)
+{
+	ifstream infile (file_bed.c_str(), ios::binary);
+	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+
+	char ch[1];
+	bitset<8> b;
+
+	size_t n_miss, ci_total, ci_test;
+	double d, geno_mean, geno_var;
+
+	size_t ni_test=matrix_kin->size1;
+	size_t ni_total=indicator_idv.size();
+	gsl_vector *geno=gsl_vector_alloc (ni_test);
+
+	size_t ns_test=0;
+	int n_bit;
+
+	size_t n_vc=matrix_kin->size2/ni_test, i_vc;
+	string rs;
+	vector<size_t> ns_vec;
+	for (size_t i=0; i<n_vc; i++) {
+	  ns_vec.push_back(0);
+	}
+
+	//calculate n_bit and c, the number of bit for each snp
+	if (ni_total%4==0) {n_bit=ni_total/4;}
+	else {n_bit=ni_total/4+1; }
+
+	//print the first three majic numbers
+	for (int i=0; i<3; ++i) {
+		infile.read(ch,1);
+		b=ch[0];
+	}
+
+	for (size_t t=0; t<indicator_snp.size(); ++t) {
+		if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs  ", t, indicator_snp.size()-1);}
+		if (indicator_snp[t]==0) {continue;}
+
+		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers
+
+		rs=snpInfo[t].rs_number;//this line is new
+
+		//read genotypes
+		geno_mean=0.0;	n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
+		for (int i=0; i<n_bit; ++i) {
+			infile.read(ch,1);
+			b=ch[0];
+			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
+				if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
+				if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+				if (b[2*j]==0) {
+					if (b[2*j+1]==0) {gsl_vector_set(geno, ci_test, 2.0); geno_mean+=2.0; geno_var+=4.0; }
+					else {gsl_vector_set(geno, ci_test, 1.0); geno_mean+=1.0; geno_var+=1.0;}
+				}
+				else {
+					if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
+					else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
+				}
+
+				ci_test++;
+				ci_total++;
+			}
+		}
+
+
+		geno_mean/=(double)(ni_test-n_miss);
+		geno_var+=geno_mean*geno_mean*(double)n_miss;
+		geno_var/=(double)ni_test;
+		geno_var-=geno_mean*geno_mean;
+//		geno_var=geno_mean*(1-geno_mean*0.5);
+
+		for (size_t i=0; i<ni_test; ++i) {
+			d=gsl_vector_get(geno,i);
+			if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
+		}
+
+		//this line is new; removed
+		//gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+		if (geno_var!=0) {
+		  mapRS2var[rs]=geno_var;
+			if (k_mode==1) {
+			  if (n_vc==1 || mapRS2cat.size()==0 ) {
+			    gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);
+			    ns_vec[0]++;
+			  } else if (mapRS2cat.count(rs)!=0) {
+			    i_vc=mapRS2cat.at(rs);
+			    ns_vec[i_vc]++;
+			    gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
+			    gsl_blas_dsyr (CblasUpper, 1.0, geno, &kin_sub.matrix);
+			  }
+			} else if (k_mode==2) {
+			  if (n_vc==1 || mapRS2cat.size()==0 ) {
+			    gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);
+			    ns_vec[0]++;
+			  } else if (mapRS2cat.count(rs)!=0) {
+			    i_vc=mapRS2cat.at(rs);
+			    ns_vec[i_vc]++;
+			    gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
+			    gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, &kin_sub.matrix);
+			  }
+			} else {
+			  cout<<"Unknown kinship mode."<<endl;
+			}
+		}
+
+		ns_test++;
+    }
+	cout<<endl;
+
+	for (size_t t=0; t<n_vc; t++) {
+	  if (ns_vec[t]!=0) {gsl_matrix_scale (matrix_kin, 1.0/(double)ns_vec[t]);}
+
+	  for (size_t i=0; i<ni_test; ++i) {
+	    for (size_t j=0; j<i; ++j) {
+	      d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
+	      gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
+	      //cout<<d<<" ";
+	    }
+	    //cout<<endl;
+	  }
+	}
+
+	d=0;
+	for (size_t i=0; i<ni_test; ++i) {
+	  for (size_t j=0; j<ni_test; ++j) {
+	    d+=gsl_matrix_get (matrix_kin, i, j)*gsl_matrix_get (matrix_kin, i, j);
+	  }
+	}
+	d/=(double)ni_test*(double)ni_test;
+	//cout<<"trace = "<<scientific<<d-1/(double)ni_test<<endl;
+
+
+
+	gsl_vector_free (geno);
+
+	infile.close();
+	infile.clear();
+
+	return true;
+}
+
+
+
+//read var file, store mapRS2var
+bool ReadFile_var (const string &file_var, map<string, double> &mapRS2var)
+{
+  mapRS2var.clear();
+
+  igzstream infile (file_var.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open var file: "<<file_var<<endl; return false;}
+
+  char *ch_ptr;
+  string line, rs;
+  double var;
+
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    rs=ch_ptr;
+    ch_ptr=strtok (NULL, " , \t");
+    var=atof(ch_ptr);
+    mapRS2var[rs]=var;
+  }
+
+  return true;
+}
+
+
+//read beta file, use the mapRS2var to select snps (and to provide var if maf/var is not provided in the beta file), calculate q
+void ReadFile_beta (const string &file_beta, const int k_mode, const map<string, size_t> &mapRS2cat, const map<string, double> &mapRS2var, gsl_vector *q, gsl_vector *s, size_t &ni_total, size_t &ns_total, size_t &ns_test)
+{
+  gsl_vector_set_zero(q);
+  ni_total=0; ns_total=0; ns_test=0;
+
+  igzstream infile (file_beta.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open beta file: "<<file_beta<<endl; return;}
+
+  string line;
+  char *ch_ptr;
+  string type;
+
+  string rs, chr, a1, a0, pos, cm;
+  double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
+  size_t n_total=0, n_mis=0, n_obs=0;
+
+  vector<double> vec_q, vec_s;
+  for (size_t i=0; i<q->size; i++) {
+    vec_q.push_back(0.0);
+    vec_s.push_back(0.0);
+  }
+
+  //read header
+  HEADER header;
+  !safeGetline(infile, line).eof();
+  ReadHeader (line, header);
+
+  if (header.n_col==0 ) {
+    if (header.nobs_col==0 && header.nmis_col==0) {
+      cout<<"error! missing sample size in the beta file."<<endl;
+    } else {
+      cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+    }
+  }
+
+  if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) && header.chisq_col==0 && header.p_col==0) {
+    cout<<"error! missing z scores in the beta file."<<endl;
+  }
+
+  if (header.af_col==0 && header.var_col==0 && mapRS2var.size()==0) {
+    cout<<"error! missing allele frequency in the beta file."<<endl;
+  }
+
+  while (!safeGetline(infile, line).eof()) {
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+    z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
+    n_total=0; n_mis=0; n_obs=0; af=0; var_x=0;
+    for (size_t i=0; i<header.coln; i++) {
+      if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
+      if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
+      if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
+      if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
+      if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
+      if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+
+      if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
+      if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
+      if (header.sebeta_col!=0 && header.sebeta_col==i+1) {se_beta=atof(ch_ptr);}
+      if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
+      if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
+
+      if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
+      if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
+      if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+
+      if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
+      if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+
+      ch_ptr=strtok (NULL, " , \t");
+    }
+
+    if (header.rs_col==0) {
+      rs=chr+":"+pos;
+    }
+
+    if (header.n_col==0) {
+      n_total=n_mis+n_obs;
+    }
+
+    //both z values and beta/se_beta have directions, while chisq/pvalue do not
+    if (header.z_col!=0) {
+      zsquare=z*z;
+    } else if (header.beta_col!=0 && header.sebeta_col!=0) {
+      z=beta/se_beta;
+      zsquare=z*z;
+    } else if (header.chisq_col!=0) {
+      zsquare=chisq;
+    } else if (header.p_col!=0) {
+      zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
+    } else {zsquare=0;}
+
+    //if the snp is also present in cor file, then do calculations
+    if (mapRS2var.count(rs)!=0 && (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
+      //obtain var_x
+      if (k_mode==1) {
+	if (header.var_col==0) {
+	  if (header.af_col!=0) {
+	    var_x=2.0*af*(1.0-af);
+	  } else {
+	    var_x=mapRS2var.at(rs);
+	  }
+	}
+      } else {
+	var_x=1.0;
+      }
+
+      //compute q
+      if (mapRS2cat.size()!=0) {
+	vec_q[mapRS2cat.at(rs) ]+=(zsquare-1.0)*var_x/(double)n_total;
+	vec_s[mapRS2cat.at(rs) ]+=var_x;
+      } else {
+	vec_q[0]+=(zsquare-1.0)*var_x/(double)n_total;
+	vec_s[0]+=var_x;
+      }
+
+      ni_total=max(ni_total, n_total);
+      ns_test++;
+    }
+
+    ns_total++;
+  }
+
+  //save q
+  for (size_t i=0; i<q->size; i++) {
+    if (vec_s[i]!=0) {
+      gsl_vector_set(q, i, vec_q[i]/vec_s[i]);
+    }
+    gsl_vector_set(s, i, vec_s[i]);
+  }
+
+  infile.clear();
+  infile.close();
+
+  return;
+}
+
+
+
+
+//read S file: S and Svar
+void ReadFile_s (const string &file_s, gsl_matrix *S, gsl_matrix *Svar)
+{
+  igzstream infile (file_s.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open s file: "<<file_s<<endl; return;}
+
+  string line;
+  char *ch_ptr;
+  double d;
+
+  for (size_t i=0; i<S->size1; i++) {
+    !safeGetline(infile, line).eof();
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    for (size_t j=0; j<S->size2; j++) {
+      d=gsl_matrix_get(S, i, j)+atof(ch_ptr);
+      gsl_matrix_set(S, i, j, d);
+      ch_ptr=strtok (NULL, " , \t");
+    }
+  }
+
+  for (size_t i=0; i<Svar->size1; i++) {
+    !safeGetline(infile, line).eof();
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    for (size_t j=0; j<Svar->size2; j++) {
+      d=gsl_matrix_get(Svar, i, j)+atof(ch_ptr);
+      gsl_matrix_set(Svar, i, j, d);
+      ch_ptr=strtok (NULL, " , \t");
+    }
+  }
+
+  infile.clear();
+  infile.close();
+
+  return;
+}
+
+
+
+
+void ReadFile_ms (const string &file_ms, gsl_matrix *S, gsl_matrix *Svar)
+{
+  gsl_matrix_set_zero(S);
+  gsl_matrix_set_zero(Svar);
+
+  string file_name;
+
+  igzstream infile (file_ms.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open ms file: "<<file_ms<<endl; return;}
+
+  while (!safeGetline(infile, file_name).eof()) {
+    ReadFile_s(file_name, S, Svar);
+  }
+
+  infile.clear();
+  infile.close();
+
+  return;
+}
+
+
+
+
+//read V file: V (i.e. Q)
+void ReadFile_v (const string &file_v, gsl_matrix *V)
+{
+  igzstream infile (file_v.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open v file: "<<file_v<<endl; return;}
+
+  string line;
+  char *ch_ptr;
+  double d;
+
+  for (size_t i=0; i<V->size1; i++) {
+    !safeGetline(infile, line).eof();
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    for (size_t j=0; j<V->size2; j++) {
+      d=gsl_matrix_get(V, i, j)+atof(ch_ptr);
+      gsl_matrix_set(V, i, j, d);
+      ch_ptr=strtok (NULL, " , \t");
+    }
+  }
+
+  infile.clear();
+  infile.close();
+
+  return;
+}
+
+
+void ReadFile_mv (const string &file_mv, gsl_matrix *V)
+{
+  gsl_matrix_set_zero(V);
+
+  string file_name;
+
+  igzstream infile (file_mv.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open ms file: "<<file_mv<<endl; return;}
+
+  while (!safeGetline(infile, file_name).eof()) {
+    ReadFile_v(file_name, V);
+  }
+
+  infile.clear();
+  infile.close();
+
+  return;
+}
+
+
+//read q file: q, s and ni_test
+void ReadFile_q (const string &file_s, gsl_vector *q_vec, gsl_vector *s_vec, double &df)
+{
+  igzstream infile (file_s.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open s file: "<<file_s<<endl; return;}
+
+  string line;
+  char *ch_ptr;
+  double d;
+
+  for (size_t i=0; i<q_vec->size; i++) {
+    !safeGetline(infile, line).eof();
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    d=gsl_vector_get(q_vec, i)+atof(ch_ptr);
+    gsl_vector_set(q_vec, i, d);
+  }
+
+  for (size_t i=0; i<s_vec->size; i++) {
+    !safeGetline(infile, line).eof();
+    ch_ptr=strtok ((char *)line.c_str(), " , \t");
+    d=gsl_vector_get(s_vec, i)+atof(ch_ptr);
+    gsl_vector_set(s_vec, i, d);
+  }
+
+  !safeGetline(infile, line).eof();
+  ch_ptr=strtok ((char *)line.c_str(), " , \t");
+  df=atof(ch_ptr);
+
+  infile.clear();
+  infile.close();
+
+  return;
+}
+
+
+
+void ReadFile_mq (const string &file_mq, gsl_vector *q_vec, gsl_vector *s_vec, double &df)
+{
+  gsl_vector_set_zero(q_vec);
+  gsl_vector_set_zero(s_vec);
+
+  string file_name;
+
+  igzstream infile (file_mq.c_str(), igzstream::in);
+  if (!infile) {cout<<"error! fail to open mq file: "<<file_mq<<endl; return;}
+
+  while (!safeGetline(infile, file_name).eof()) {
+    ReadFile_q(file_name, q_vec, s_vec, df);
+  }
+
+  infile.clear();
+  infile.close();
+
+  return;
+}