about summary refs log tree commit diff
path: root/src/lm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/lm.cpp')
-rw-r--r--src/lm.cpp516
1 files changed, 380 insertions, 136 deletions
diff --git a/src/lm.cpp b/src/lm.cpp
index 7577d0a..b4bc010 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -1,17 +1,17 @@
 /*
  Genome-wide Efficient Mixed Model Association (GEMMA)
  Copyright (C) 2011  Xiang Zhou
- 
+
  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
- 
+
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
@@ -26,7 +26,7 @@
 #include <cmath>
 #include <iostream>
 #include <stdio.h>
-#include <stdlib.h> 
+#include <stdlib.h>
 #include <bitset>
 #include <cstring>
 
@@ -57,48 +57,50 @@ using namespace std;
 
 
 
-void LM::CopyFromParam (PARAM &cPar) 
+void LM::CopyFromParam (PARAM &cPar)
 {
 	a_mode=cPar.a_mode;
 	d_pace=cPar.d_pace;
-	
+
 	file_bfile=cPar.file_bfile;
 	file_geno=cPar.file_geno;
 	file_out=cPar.file_out;
 	path_out=cPar.path_out;
 	file_gene=cPar.file_gene;
-	
+	// WJA added
+	file_oxford=cPar.file_oxford;
+
 	time_opt=0.0;
-	
+
 	ni_total=cPar.ni_total;
 	ns_total=cPar.ns_total;
 	ni_test=cPar.ni_test;
 	ns_test=cPar.ns_test;
 	n_cvt=cPar.n_cvt;
-	
+
 	ng_total=cPar.ng_total;
 	ng_test=0;
-	
-	indicator_idv=cPar.indicator_idv;	
-	indicator_snp=cPar.indicator_snp;	
+
+	indicator_idv=cPar.indicator_idv;
+	indicator_snp=cPar.indicator_snp;
 	snpInfo=cPar.snpInfo;
-	
+
 	return;
 }
 
 
-void LM::CopyToParam (PARAM &cPar) 
+void LM::CopyToParam (PARAM &cPar)
 {
-	cPar.time_opt=time_opt;	
-	
+	cPar.time_opt=time_opt;
+
 	cPar.ng_test=ng_test;
-	
+
 	return;
 }
 
 
 
-void LM::WriteFiles () 
+void LM::WriteFiles ()
 {
 	string file_str;
 	file_str=path_out+"/"+file_out;
@@ -109,7 +111,7 @@ void LM::WriteFiles ()
 
 	if (!file_gene.empty()) {
 		outfile<<"geneID"<<"\t";
-		
+
 		if (a_mode==51) {
 			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
 		} else if (a_mode==52) {
@@ -119,10 +121,10 @@ void LM::WriteFiles ()
 		} else if (a_mode==54) {
 			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
 		} else {}
-				
-		for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {	
+
+		for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
 			outfile<<snpInfo[t].rs_number<<"\t";
-			
+
 			if (a_mode==51) {
 				outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<endl;
 			} else if (a_mode==52) {
@@ -132,10 +134,10 @@ void LM::WriteFiles ()
 			} else if (a_mode==54) {
 				outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
 			} else {}
-		}	
+		}
 	}  else {
-		outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-		
+		outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"<<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
+
 		if (a_mode==51) {
 			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
 		} else if (a_mode==52) {
@@ -145,13 +147,13 @@ void LM::WriteFiles ()
 		} else if (a_mode==54) {
 			outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
 		} else {}
-		
+
 		size_t t=0;
 		for (size_t i=0; i<snpInfo.size(); ++i) {
 			if (indicator_snp[i]==0) {continue;}
-			
-			outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-			
+
+			outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<ni_test-snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
+
 			if (a_mode==51) {
 				outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<endl;
 			} else if (a_mode==52) {
@@ -164,8 +166,8 @@ void LM::WriteFiles ()
 			t++;
 		}
 	}
-	
-		
+
+
 	outfile.close();
 	outfile.clear();
 	return;
@@ -179,21 +181,21 @@ void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *Wt
 {
 	size_t c_size=Wty->size;
 	double d;
-	
+
 	gsl_vector *WtWiWtx=gsl_vector_alloc (c_size);
-	
+
 	gsl_blas_ddot (x, x, &xPwx);
 	gsl_blas_ddot (x, y, &xPwy);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);	
-	
-	gsl_blas_ddot (WtWiWtx, Wtx, &d);	
+	gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+
+	gsl_blas_ddot (WtWiWtx, Wtx, &d);
 	xPwx-=d;
-	
-	gsl_blas_ddot (WtWiWtx, Wty, &d);	
+
+	gsl_blas_ddot (WtWiWtx, Wty, &d);
 	xPwy-=d;
-	
+
 	gsl_vector_free (WtWiWtx);
-	
+
 	return;
 }
 
@@ -202,17 +204,17 @@ void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y,
 {
 	size_t c_size=Wty->size;
 	double d;
-	
+
 	gsl_vector *WtWiWty=gsl_vector_alloc (c_size);
-	
+
 	gsl_blas_ddot (y, y, &yPwy);
-	gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);	
-	
-	gsl_blas_ddot (WtWiWty, Wty, &d);	
+	gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);
+
+	gsl_blas_ddot (WtWiWty, Wty, &d);
 	yPwy-=d;
-	
+
 	gsl_vector_free (WtWiWty);
-	
+
 	return;
 }
 
@@ -223,38 +225,38 @@ void LmCalcP (const size_t test_mode, const double yPwy, const double xPwy, cons
 {
 	double yPxy=yPwy-xPwy*xPwy/xPwx;
 	double se_wald, se_score;
-	
+
 	beta=xPwy/xPwx;
 	se_wald=sqrt(yPxy/(df*xPwx) );
 	se_score=sqrt(yPwy/((double)n_size*xPwx) );
-	
+
 	p_wald=gsl_cdf_fdist_Q (beta*beta/(se_wald*se_wald), 1.0, df);
 	p_score=gsl_cdf_fdist_Q (beta*beta/(se_score*se_score), 1.0, df);
 	p_lrt=gsl_cdf_chisq_Q ((double)n_size*(log(yPwy)-log(yPxy)), 1);
-	
+
 	if (test_mode==3) {se=se_score;} else {se=se_wald;}
-	
+
 	return;
 }
 
 
 
 
-void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x) 
+void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
 {
 	ifstream infile (file_gene.c_str(), ifstream::in);
 	if (!infile) {cout<<"error reading gene expression file:"<<file_gene<<endl; return;}
-	
+
 	clock_t time_start=clock();
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
 	int c_phen;
 	string rs; //gene id
 	double d;
-	
+
 	//calculate some basic quantities
 	double yPwy, xPwy, xPwx;
 	double df=(double)W->size1-(double)W->size2-1.0;
@@ -262,7 +264,7 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
 	gsl_vector *y=gsl_vector_alloc (W->size1);
 
 	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);	
+	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
 	gsl_vector *Wty=gsl_vector_alloc (W->size2);
 	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
 	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
@@ -274,42 +276,42 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
 
 	gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
 	CalcvPv(WtWi, Wtx, x, xPwx);
-		
+
 	//header
 	getline(infile, line);
-	
+
 	for (size_t t=0; t<ng_total; t++) {
 		getline(infile, line);
 		if (t%d_pace==0 || t==ng_total-1) {ProgressBar ("Performing Analysis ", t, ng_total-1);}
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		rs=ch_ptr;
-		
-		c_phen=0; 
+
+		c_phen=0;
 		for (size_t i=0; i<indicator_idv.size(); ++i) {
 			ch_ptr=strtok (NULL, " , \t");
 			if (indicator_idv[i]==0) {continue;}
-			
-			d=atof(ch_ptr); 			
+
+			d=atof(ch_ptr);
 			gsl_vector_set(y, c_phen, d);
-			
+
 			c_phen++;
 		}
-				
-		//calculate statistics		
-		time_start=clock();	
-	
+
+		//calculate statistics
+		time_start=clock();
+
 		gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
 		CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy);
-		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);	
-	
+		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+
 		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		
+
 		//store summary data
 		SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
 		sumStat.push_back(SNPs);
 	}
 	cout<<endl;
-	
+
 	gsl_vector_free(y);
 
 	gsl_matrix_free(WtW);
@@ -317,31 +319,259 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
 	gsl_vector_free(Wty);
 	gsl_vector_free(Wtx);
 	gsl_permutation_free(pmt);
-	
+
 	infile.close();
 	infile.clear();
-	
+
 	return;
 }
 
 
 
 
+// WJA added
+#include <assert.h>
+void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
+{
+	string file_bgen=file_oxford+".bgen";
+	ifstream infile (file_bgen.c_str(), ios::binary);
+	if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;}
+
+
+	clock_t time_start=clock();
+
+	string line;
+	char *ch_ptr;
+
+	double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+	int n_miss, c_phen;
+	double geno, x_mean;
+
+	//calculate some basic quantities
+	double yPwy, xPwy, xPwx;
+	double df=(double)W->size1-(double)W->size2-1.0;
+
+	gsl_vector *x=gsl_vector_alloc (W->size1);
+	gsl_vector *x_miss=gsl_vector_alloc (W->size1);
+
+	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
+	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+	gsl_vector *Wty=gsl_vector_alloc (W->size2);
+	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
+	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+
+	gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+	int sig;
+	LUDecomp (WtW, pmt, &sig);
+	LUInvert (WtW, pmt, WtWi);
+
+	gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
+	CalcvPv(WtWi, Wty, y, yPwy);
+
+	// read in header
+	uint32_t bgen_snp_block_offset;
+	uint32_t bgen_header_length;
+	uint32_t bgen_nsamples;
+	uint32_t bgen_nsnps;
+	uint32_t bgen_flags;
+	infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+	infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+	bgen_snp_block_offset-=4;
+	infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+	bgen_snp_block_offset-=4;
+	infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+	bgen_snp_block_offset-=4;
+	infile.ignore(4+bgen_header_length-20);
+	bgen_snp_block_offset-=4+bgen_header_length-20;
+	infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+	bgen_snp_block_offset-=4;
+	bool CompressedSNPBlocks=bgen_flags&0x1;
+//	bool LongIds=bgen_flags&0x4;
+
+	infile.ignore(bgen_snp_block_offset);
+
+	double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+	uint32_t bgen_N;
+	uint16_t bgen_LS;
+	uint16_t bgen_LR;
+	uint16_t bgen_LC;
+	uint32_t bgen_SNP_pos;
+	uint32_t bgen_LA;
+	std::string bgen_A_allele;
+	uint32_t bgen_LB;
+	std::string bgen_B_allele;
+	uint32_t bgen_P;
+	size_t unzipped_data_size;
+	string id;
+	string rs;
+	string chr;
+	std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<<std::endl;
+
+
+
+	//start reading genotypes and analyze
+	for (size_t t=0; t<indicator_snp.size(); ++t)
+	{
+
+//		if (t>1) {break;}
+		if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs  ", t, ns_total-1);}
+		// read SNP header
+		id.clear();
+		rs.clear();
+		chr.clear();
+		bgen_A_allele.clear();
+		bgen_B_allele.clear();
+
+		infile.read(reinterpret_cast<char*>(&bgen_N),4);
+		infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+		id.resize(bgen_LS);
+		infile.read(&id[0], bgen_LS);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+		rs.resize(bgen_LR);
+		infile.read(&rs[0], bgen_LR);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+		chr.resize(bgen_LC);
+		infile.read(&chr[0], bgen_LC);
+
+		infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+		infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+		bgen_A_allele.resize(bgen_LA);
+		infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+		infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+		bgen_B_allele.resize(bgen_LB);
+		infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+
+
+		uint16_t unzipped_data[3*bgen_N];
+
+		if (indicator_snp[t]==0) {
+			if(CompressedSNPBlocks)
+				infile.read(reinterpret_cast<char*>(&bgen_P),4);
+			else
+				bgen_P=6*bgen_N;
+
+			infile.ignore(static_cast<size_t>(bgen_P));
+
+			continue;
+		}
+
+
+		if(CompressedSNPBlocks)
+		{
+
+
+			infile.read(reinterpret_cast<char*>(&bgen_P),4);
+			uint8_t zipped_data[bgen_P];
+
+			unzipped_data_size=6*bgen_N;
+
+			infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+
+			int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+			assert(result == Z_OK);
+
+		}
+		else
+		{
+
+			bgen_P=6*bgen_N;
+			infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+		}
+
+		x_mean=0.0; c_phen=0; n_miss=0;
+		gsl_vector_set_zero(x_miss);
+		for (size_t i=0; i<bgen_N; ++i) {
+			if (indicator_idv[i]==0) {continue;}
+
+
+				bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+				bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+				bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+				// WJA
+				bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+				if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+				else {
+
+					bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+					bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+					bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+					geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+					gsl_vector_set(x, c_phen, geno);
+					gsl_vector_set(x_miss, c_phen, 1.0);
+					x_mean+=geno;
+			}
+			c_phen++;
+		}
+
+		x_mean/=static_cast<double>(ni_test-n_miss);
+
+		for (size_t i=0; i<ni_test; ++i) {
+			if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+			geno=gsl_vector_get(x, i);
+			if (x_mean>1) {
+				gsl_vector_set(x, i, 2-geno);
+			}
+		}
+
+
+		//calculate statistics
+		time_start=clock();
+
+		gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+		CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+
+		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+		//store summary data
+		SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+		sumStat.push_back(SNPs);
+	}
+	cout<<endl;
+
+	gsl_vector_free(x);
+	gsl_vector_free(x_miss);
+
+	gsl_matrix_free(WtW);
+	gsl_matrix_free(WtWi);
+	gsl_vector_free(Wty);
+	gsl_vector_free(Wtx);
+	gsl_permutation_free(pmt);
+
+	infile.close();
+	infile.clear();
+
+	return;
+}
+
+
+
 void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
 {
 	igzstream infile (file_geno.c_str(), igzstream::in);
 	//	ifstream infile (file_geno.c_str(), ifstream::in);
 	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
-	
+
 	clock_t time_start=clock();
-	
+
 	string line;
 	char *ch_ptr;
-	
+
 	double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
 	int n_miss, c_phen;
 	double geno, x_mean;
-	
+
 	//calculate some basic quantities
 	double yPwy, xPwy, xPwx;
 	double df=(double)W->size1-(double)W->size2-1.0;
@@ -350,7 +580,7 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
 	gsl_vector *x_miss=gsl_vector_alloc (W->size1);
 
 	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);		
+	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
 	gsl_vector *Wty=gsl_vector_alloc (W->size2);
 	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
 	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
@@ -362,58 +592,58 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
 
 	gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
 	CalcvPv(WtWi, Wty, y, yPwy);
-	
-	//start reading genotypes and analyze	
+
+	//start reading genotypes and analyze
 	for (size_t t=0; t<indicator_snp.size(); ++t) {
 		//if (t>1) {break;}
 		getline(infile, line);
 		if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs  ", t, ns_total-1);}
 		if (indicator_snp[t]==0) {continue;}
-		
+
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		ch_ptr=strtok (NULL, " , \t");
 		ch_ptr=strtok (NULL, " , \t");
-		
+
 		x_mean=0.0; c_phen=0; n_miss=0;
 		gsl_vector_set_zero(x_miss);
 		for (size_t i=0; i<ni_total; ++i) {
 			ch_ptr=strtok (NULL, " , \t");
 			if (indicator_idv[i]==0) {continue;}
-			
+
 			if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
 			else {
-				geno=atof(ch_ptr); 				
-				
-				gsl_vector_set(x, c_phen, geno); 
-				gsl_vector_set(x_miss, c_phen, 1.0); 
+				geno=atof(ch_ptr);
+
+				gsl_vector_set(x, c_phen, geno);
+				gsl_vector_set(x_miss, c_phen, 1.0);
 				x_mean+=geno;
 			}
 			c_phen++;
-		}	
-		
+		}
+
 		x_mean/=(double)(ni_test-n_miss);
-		
+
 		for (size_t i=0; i<ni_test; ++i) {
 			if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
 			geno=gsl_vector_get(x, i);
 			if (x_mean>1) {
 				gsl_vector_set(x, i, 2-geno);
 			}
-		}		
-		
-		//calculate statistics		
-		time_start=clock();		
+		}
 
-		gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);		
+		//calculate statistics
+		time_start=clock();
+
+		gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
 		CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
 		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
-		
+
 		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		
+
 		//store summary data
 		SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
 		sumStat.push_back(SNPs);
-	}	
+	}
 	cout<<endl;
 
 	gsl_vector_free(x);
@@ -424,10 +654,10 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
 	gsl_vector_free(Wty);
 	gsl_vector_free(Wtx);
 	gsl_permutation_free(pmt);
-	
+
 	infile.close();
 	infile.clear();
-	
+
 	return;
 }
 
@@ -437,21 +667,21 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
 
 
 
-void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y) 
+void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
 {
 	string file_bed=file_bfile+".bed";
 	ifstream infile (file_bed.c_str(), ios::binary);
 	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-	
+
 	clock_t time_start=clock();
-	
+
 	char ch[1];
-	bitset<8> b;	
-	
+	bitset<8> b;
+
 	double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
 	int n_bit, n_miss, ci_total, ci_test;
 	double geno, x_mean;
-		
+
 	//calculate some basic quantities
 	double yPwy, xPwy, xPwx;
 	double df=(double)W->size1-(double)W->size2-1.0;
@@ -459,7 +689,7 @@ void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
 	gsl_vector *x=gsl_vector_alloc (W->size1);
 
 	gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
-	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);	
+	gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
 	gsl_vector *Wty=gsl_vector_alloc (W->size2);
 	gsl_vector *Wtx=gsl_vector_alloc (W->size2);
 	gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
@@ -471,90 +701,104 @@ void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
 
 	gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
 	CalcvPv(WtWi, Wty, y, yPwy);
-		
+
 	//calculate n_bit and c, the number of bit for each snp
 	if (ni_total%4==0) {n_bit=ni_total/4;}
 	else {n_bit=ni_total/4+1; }
-	
+
 	//print the first three majic numbers
 	for (int i=0; i<3; ++i) {
 		infile.read(ch,1);
 		b=ch[0];
 	}
-	
-	
+
+
 	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
 		if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);}
 		if (indicator_snp[t]==0) {continue;}
-		
+
 		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers
-		
+
 		//read genotypes
-		x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0; 
+		x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0;
 		for (int i=0; i<n_bit; ++i) {
 			infile.read(ch,1);
 			b=ch[0];
 			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
 				if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
 				if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-				
+
 				if (b[2*j]==0) {
 					if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
 					else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
 				}
 				else {
-					if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }                                  
+					if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
 					else {gsl_vector_set(x, ci_test, -9); n_miss++; }
 				}
-				
+
 				ci_total++;
 				ci_test++;
 			}
 		}
-		
+
 		x_mean/=(double)(ni_test-n_miss);
-		
-		for (size_t i=0; i<ni_test; ++i) {			
+
+		for (size_t i=0; i<ni_test; ++i) {
 			geno=gsl_vector_get(x,i);
 			if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
 			if (x_mean>1) {
 				gsl_vector_set(x, i, 2-geno);
 			}
 		}
-		
-		//calculate statistics		
-		time_start=clock();	
-		
+
+		//calculate statistics
+		time_start=clock();
+
 		gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
-		CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);		
-		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);    
+		CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+		LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
 
 		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-		
+
 		//store summary data
 		SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
 		sumStat.push_back(SNPs);
-	}	
+	}
 	cout<<endl;
-	
+
 	gsl_vector_free(x);
 
 	gsl_matrix_free(WtW);
-	gsl_matrix_free(WtWi);	
+	gsl_matrix_free(WtWi);
 	gsl_vector_free(Wty);
 	gsl_vector_free(Wtx);
 	gsl_permutation_free(pmt);
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return;
 }
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 //make sure that both y and X are centered already
-void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_t, double> > &pos_loglr) 
+void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_t, double> > &pos_loglr)
 {
 	double yty, xty, xtx, log_lr;
 	gsl_blas_ddot(y, y, &yty);
@@ -567,6 +811,6 @@ void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_
 	  log_lr=0.5*(double)y->size*(log(yty)-log(yty-xty*xty/xtx));
 	  pos_loglr.push_back(make_pair(i,log_lr) );
 	}
-	
+
 	return;
 }