about summary refs log tree commit diff
path: root/src/prdt.cpp
diff options
context:
space:
mode:
authorPeter Carbonetto2017-07-07 11:20:56 -0500
committerGitHub2017-07-07 11:20:56 -0500
commit86e96ede4ff0955bb2d03ac6c1bd7562a3984955 (patch)
tree33120540091e7d16b58f389a13949df397535912 /src/prdt.cpp
parentb3747413e6c5c8cd447e979157880676da66a342 (diff)
parentb9758364059d52e153a9f1b4fcae3bc3f3e68422 (diff)
downloadpangemma-86e96ede4ff0955bb2d03ac6c1bd7562a3984955.tar.gz
Merge pull request #51 from genenetwork/spacing
Spacing fixes.
Diffstat (limited to 'src/prdt.cpp')
-rw-r--r--src/prdt.cpp188
1 files changed, 94 insertions, 94 deletions
diff --git a/src/prdt.cpp b/src/prdt.cpp
index db0fa14..b29d150 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -1,17 +1,17 @@
 /*
  Genome-wide Efficient Mixed Model Association (GEMMA)
  Copyright (C) 2011-2017, Xiang Zhou
- 
+
  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
- 
+
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License
  along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
@@ -24,7 +24,7 @@
 #include <bitset>
 #include <vector>
 #include <stdio.h>
-#include <stdlib.h> 
+#include <stdlib.h>
 #include <cmath>
 #include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
@@ -43,36 +43,36 @@ using namespace std;
 void PRDT::CopyFromParam (PARAM &cPar) {
 	a_mode=cPar.a_mode;
 	d_pace=cPar.d_pace;
-	
+
 	file_bfile=cPar.file_bfile;
 	file_geno=cPar.file_geno;
 	file_out=cPar.file_out;
 	path_out=cPar.path_out;
-	
-	indicator_pheno=cPar.indicator_pheno;	
+
+	indicator_pheno=cPar.indicator_pheno;
 	indicator_cvt=cPar.indicator_cvt;
 	indicator_idv=cPar.indicator_idv;
-	
+
 	snpInfo=cPar.snpInfo;
 	mapRS2est=cPar.mapRS2est;
-	
+
 	time_eigen=0;
-	
+
 	n_ph=cPar.n_ph;
 	np_obs=cPar.np_obs;
 	np_miss=cPar.np_miss;
 	ns_total=cPar.ns_total;
-	ns_test=0;	
-	
+	ns_test=0;
+
 	return;
 }
 
 void PRDT::CopyToParam (PARAM &cPar) {
 	cPar.ns_test=ns_test;
 	cPar.time_eigen=time_eigen;
-	
+
 	return;
-}               
+}
 
 void PRDT::WriteFiles (gsl_vector *y_prdt) {
 	string file_str;
@@ -80,13 +80,13 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) {
 	file_str+=".";
 	file_str+="prdt";
 	file_str+=".txt";
-	
+
 	ofstream outfile (file_str.c_str(), ofstream::out);
 	if (!outfile) {
 	  cout<<"error writing file: "<<file_str.c_str()<<endl;
 	  return;
 	}
-	
+
 	size_t ci_test=0;
 	for (size_t i=0; i<indicator_idv.size(); i++) {
 		if (indicator_idv[i]==1) {
@@ -96,7 +96,7 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) {
 			ci_test++;
 		}
 	}
-	
+
 	outfile.close();
 	outfile.clear();
 	return;
@@ -106,13 +106,13 @@ void PRDT::WriteFiles (gsl_matrix *Y_full)  {
 	string file_str;
 	file_str=path_out+"/"+file_out;
 	file_str+=".prdt.txt";
-	
+
 	ofstream outfile (file_str.c_str(), ofstream::out);
 	if (!outfile) {
 	  cout<<"error writing file: "<<file_str.c_str()<<endl;
 	  return;
 	}
-	
+
 	size_t ci_test=0;
 	for (size_t i=0; i<indicator_cvt.size(); i++) {
 		if (indicator_cvt[i]==0) {
@@ -126,7 +126,7 @@ void PRDT::WriteFiles (gsl_matrix *Y_full)  {
 			ci_test++;
 		}
 	}
-	
+
 	outfile.close();
 	outfile.clear();
 	return;
@@ -134,21 +134,21 @@ void PRDT::WriteFiles (gsl_matrix *Y_full)  {
 
 void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
 	size_t ni_test=u_hat->size, ni_total=G->size1;
-	
+
 	gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test);
 	gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test);
-	gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test); 
+	gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test);
 	gsl_vector *eval=gsl_vector_alloc (ni_test);
 	gsl_vector *Utu=gsl_vector_alloc (ni_test);
 	gsl_vector *w=gsl_vector_alloc (ni_total);
 	gsl_permutation *pmt=gsl_permutation_alloc (ni_test);
-	
+
 	//center matrix G based on indicator_idv
 	for (size_t i=0; i<ni_total; i++) {
 		gsl_vector_set(w, i, indicator_idv[i]);
 	}
 	CenterMatrix(G, w);
-		
+
 	//obtain Koo and Kfo
 	size_t o_i=0, o_j=0;
 	double d;
@@ -166,7 +166,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
 		}
 		if (indicator_idv[i]==1) {o_i++;}
 	}
-		
+
 	//matrix operations to get u_prdt
 	cout<<"Start Eigen-Decomposition..."<<endl;
 	clock_t time_start=clock();
@@ -177,8 +177,8 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
 		}
 	}
 
-	time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);	
-	
+	time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
 	gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu);
 	for (size_t i=0; i<eval->size; i++) {
 		d=gsl_vector_get(eval, i);
@@ -189,7 +189,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
 	}
 	gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval);
 	gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
-	
+
 	// Free matrices.
 	gsl_matrix_free(Goo);
 	gsl_matrix_free(Gfo);
@@ -199,7 +199,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
 	gsl_vector_free(w);
 	gsl_permutation_free(pmt);
 
-	return;	
+	return;
 }
 
 void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
@@ -208,17 +208,17 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
 	  cout<<"error reading genotype file:"<<file_geno<<endl;
 	  return;
 	}
-	
+
 	string line;
 	char *ch_ptr;
 	string rs;
-	
+
 	size_t n_miss, n_train_nomiss, c_phen;
 	double geno, x_mean, x_train_mean, effect_size;
-	
+
 	gsl_vector *x=gsl_vector_alloc (y_prdt->size);
 	gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size);
-	
+
 	ns_test=0;
 
 	// Start reading genotypes and analyze.
@@ -227,24 +227,24 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
 		if (t%d_pace==0 || t==(ns_total-1)) {
 		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
 		}
-		
+
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		rs=ch_ptr;
 		ch_ptr=strtok (NULL, " , \t");
-		ch_ptr=strtok (NULL, " , \t");		
-		
+		ch_ptr=strtok (NULL, " , \t");
+
 		if (mapRS2est.count(rs)==0) {
 		  continue;
 		} else {
 		  effect_size=mapRS2est[rs];
 		}
-		
+
 		x_mean=0.0;
 		c_phen=0;
 		n_miss=0;
 		x_train_mean=0;
 		n_train_nomiss=0;
-		
+
 		gsl_vector_set_zero(x_miss);
 
 		for (size_t i=0; i<indicator_idv.size(); ++i) {
@@ -260,10 +260,10 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
 					gsl_vector_set(x_miss, c_phen, 0.0);
 					n_miss++;
 				} else {
-					geno=atof(ch_ptr); 	
-					
-					gsl_vector_set(x, c_phen, geno); 
-					gsl_vector_set(x_miss, c_phen, 1.0); 
+					geno=atof(ch_ptr);
+
+					gsl_vector_set(x, c_phen, geno);
+					gsl_vector_set(x_miss, c_phen, 1.0);
 					x_mean+=geno;
 				}
 				c_phen++;
@@ -274,12 +274,12 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
 		  cout << "snp " << rs << " has missing genotype for all " <<
 		    "individuals and will be ignored." << endl;
 		  continue;}
-		
+
 
 		x_mean/=(double)(x->size-n_miss);
 		x_train_mean/=(double)(n_train_nomiss);
-		
-		
+
+
 		for (size_t i=0; i<x->size; ++i) {
 			geno=gsl_vector_get(x, i);
 			if (gsl_vector_get (x_miss, i)==0) {
@@ -291,17 +291,17 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
 
 		gsl_vector_scale (x, effect_size);
 		gsl_vector_add (y_prdt, x);
-		
+
 		ns_test++;
-	}	
+	}
 	cout<<endl;
-	
+
 	gsl_vector_free (x);
 	gsl_vector_free (x_miss);
-	
+
 	infile.close();
 	infile.clear();
-	
+
 	return;
 }
 
@@ -312,35 +312,35 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
 	  cout<<"error reading bed file:"<<file_bed<<endl;
 	  return;
 	}
-	
+
 	char ch[1];
-	bitset<8> b;	
+	bitset<8> b;
 	string rs;
-	
+
 	size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
 	double geno, x_mean, x_train_mean, effect_size;
-	
+
 	gsl_vector *x=gsl_vector_alloc (y_prdt->size);
-	
+
 	// Calculate n_bit and c, the number of bit for each SNP.
 	if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;}
 	else {n_bit=indicator_idv.size()/4+1; }
-	
+
 	// Print the first 3 magic numbers.
 	for (size_t i=0; i<3; ++i) {
 		infile.read(ch,1);
 		b=ch[0];
-	}	
-	
+	}
+
 	ns_test=0;
-	
+
 	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
 		if (t%d_pace==0 || t==snpInfo.size()-1) {
 		  ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
 		}
-		
+
 		rs=snpInfo[t].rs_number;
-		
+
 		if (mapRS2est.count(rs)==0) {
 		  continue;
 		} else {
@@ -349,7 +349,7 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
 
 		// n_bit, and 3 is the number of magic numbers.
 		infile.seekg(t*n_bit+3);
-		
+
 		// Read genotypes.
 		x_mean=0.0;
 		n_miss=0;
@@ -359,7 +359,7 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
 			b=ch[0];
 
 			// Minor allele homozygous: 2.0; major: 0.0.
-			for (size_t j=0; j<4; ++j) {                
+			for (size_t j=0; j<4; ++j) {
 				if ((i==(n_bit-1)) &&
 				    ci_total==indicator_idv.size()) {
 				  break;
@@ -404,19 +404,19 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
 					ci_test++;
 				}
 				ci_total++;
-				
+
 			}
 		}
-		
+
 		if (x->size==n_miss) {
 		  cout << "snp " << rs << " has missing genotype for all " <<
 		    "individuals and will be ignored."<<endl;
 		  continue;
 		}
-		
+
 		x_mean/=(double)(x->size-n_miss);
 		x_train_mean/=(double)(n_train_nomiss);
-		
+
 		for (size_t i=0; i<x->size; ++i) {
 			geno=gsl_vector_get(x, i);
 			if (geno==-9) {
@@ -425,47 +425,47 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
 				gsl_vector_set(x, i, geno-x_train_mean);
 			}
 		}
-		
+
 		gsl_vector_scale (x, effect_size);
 		gsl_vector_add (y_prdt, x);
-		
+
 		ns_test++;
-	}	
+	}
 	cout<<endl;
-	
+
 	gsl_vector_free (x);
-	
+
 	infile.close();
-	infile.clear();	
-	
+	infile.clear();
+
 	return;
 }
 
 // Predict missing phenotypes using ridge regression.
 // Y_hat contains fixed effects
 void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
-		       gsl_matrix *Y_full) {	
+		       gsl_matrix *Y_full) {
 	gsl_vector *y_obs=gsl_vector_alloc (np_obs);
 	gsl_vector *y_miss=gsl_vector_alloc (np_miss);
 	gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs);
 	gsl_matrix *H_mo=gsl_matrix_alloc (np_miss, np_obs);
 	gsl_vector *Hiy=gsl_vector_alloc (np_obs);
-	
+
 	size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0;
-	
+
 	// Obtain H_oo, H_mo.
-	c_obs1=0; c_miss1=0; 
+	c_obs1=0; c_miss1=0;
 	for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) {
 		if (indicator_cvt[i1]==0) {continue;}
 		for (vector<int>::size_type j1=0; j1<n_ph; ++j1) {
-			
+
 			c_obs2=0; c_miss2=0;
 			for (vector<int>::size_type i2=0;
 			     i2<indicator_pheno.size(); ++i2) {
 				if (indicator_cvt[i2]==0) {continue;}
 				for (vector<int>::size_type j2=0;
 				     j2<n_ph; j2++) {
-					
+
 					if (indicator_pheno[i2][j2]==1) {
 					      if (indicator_pheno[i1][j1]==1) {
 						gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
@@ -476,30 +476,30 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
 					} else {
 						c_miss2++;
 					}
-				}				
+				}
 			}
-			
+
 			if (indicator_pheno[i1][j1]==1) {
 				c_obs1++;
 			} else {
 				c_miss1++;
 			}
 		}
-		
-	}	
-	
+
+	}
+
 	// Do LU decomposition of H_oo.
 	int sig;
 	gsl_permutation * pmt=gsl_permutation_alloc (np_obs);
 	LUDecomp (H_oo, pmt, &sig);
-	
+
 		// Obtain y_obs=y_full-y_hat.
 		// Add the fixed effects part to y_miss: y_miss=y_hat.
 		c_obs1=0; c_miss1=0;
 		for (vector<int>::size_type i=0;
 		     i<indicator_pheno.size(); ++i) {
 			if (indicator_cvt[i]==0) {continue;}
-			
+
 			for (vector<int>::size_type j=0; j<n_ph; ++j) {
 				if (indicator_pheno[i][j]==1) {
 					gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j)-gsl_matrix_get (Y_hat, i, j) );
@@ -509,18 +509,18 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
 					c_miss1++;
 				}
 			}
-		}	
-		
+		}
+
 		LUSolve (H_oo, pmt, y_obs, Hiy);
-		
+
 		gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
-		
+
 		// Put back predicted y_miss to Y_full.
 		c_miss1=0;
 		for (vector<int>::size_type i=0;
 		     i<indicator_pheno.size(); ++i) {
 			if (indicator_cvt[i]==0) {continue;}
-			
+
 			for (vector<int>::size_type j=0; j<n_ph; ++j) {
 				if (indicator_pheno[i][j]==0) {
 					gsl_matrix_set (Y_full, i, j, gsl_vector_get (y_miss, c_miss1) );
@@ -528,14 +528,14 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
 				}
 			}
 		}
-		
+
 	// Free matrices.
 	gsl_vector_free(y_obs);
 	gsl_vector_free(y_miss);
 	gsl_matrix_free(H_oo);
 	gsl_matrix_free(H_mo);
 	gsl_vector_free(Hiy);
-	
+
 	return;
 }