aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lapack.cpp32
-rw-r--r--src/lm.cpp416
-rw-r--r--src/lm.h54
3 files changed, 257 insertions, 245 deletions
diff --git a/src/lapack.cpp b/src/lapack.cpp
index 2bbdf62..01d2039 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -62,14 +62,14 @@ void lapack_float_cholesky_decomp (gsl_matrix_float *A) {
char UPLO='L';
if (N!=(int)A->size2) {
- cout << "Matrix needs to be symmetric and same dimension in" <<
+ cout << "Matrix needs to be symmetric and same dimension in " <<
"lapack_cholesky_decomp." << endl;
return;
}
spotrf_(&UPLO, &N, A->data, &LDA, &INFO);
if (INFO!=0) {
- cout << "Cholesky decomposition unsuccessful in" <<
+ cout << "Cholesky decomposition unsuccessful in " <<
"lapack_cholesky_decomp." << endl;
return;
}
@@ -83,14 +83,14 @@ void lapack_cholesky_decomp (gsl_matrix *A) {
char UPLO='L';
if (N!=(int)A->size2) {
- cout << "Matrix needs to be symmetric and same dimension in" <<
+ cout << "Matrix needs to be symmetric and same dimension in " <<
"lapack_cholesky_decomp." << endl;
return;
}
dpotrf_(&UPLO, &N, A->data, &LDA, &INFO);
if (INFO!=0) {
- cout << "Cholesky decomposition unsuccessful in" <<
+ cout << "Cholesky decomposition unsuccessful in " <<
"lapack_cholesky_decomp."<<endl;
return;
}
@@ -106,7 +106,7 @@ void lapack_float_cholesky_solve (gsl_matrix_float *A,
char UPLO='L';
if (N!=(int)A->size2 || N!=LDB) {
- cout << "Matrix needs to be symmetric and same dimension in" <<
+ cout << "Matrix needs to be symmetric and same dimension in " <<cout
"lapack_cholesky_solve." << endl;
return;
}
@@ -129,7 +129,7 @@ void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b,
char UPLO='L';
if (N!=(int)A->size2 || N!=LDB) {
- cout << "Matrix needs to be symmetric and same dimension in" <<
+ cout << "Matrix needs to be symmetric and same dimension in " <<
"lapack_cholesky_solve." << endl;
return;
}
@@ -236,7 +236,7 @@ void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
char JOBZ='V', UPLO='L';
if (N!=(int)A->size2 || N!=(int)eval->size) {
- cout << "Matrix needs to be symmetric and same" <<
+ cout << "Matrix needs to be symmetric and same " <<
"dimension in lapack_eigen_symmv."<<endl;
return;
}
@@ -246,7 +246,7 @@ void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK,
&LWORK, &INFO);
if (INFO!=0) {
- cout << "Eigen decomposition unsuccessful in" <<
+ cout << "Eigen decomposition unsuccessful in " <<
"lapack_eigen_symmv."<<endl;
return;
}
@@ -268,7 +268,7 @@ void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
int IL=0, IU=0, M;
if (N!=(int)A->size2 || N!=(int)eval->size) {
- cout << "Matrix needs to be symmetric and same" <<
+ cout << "Matrix needs to be symmetric and same " <<
"dimension in lapack_float_eigen_symmv." << endl;
return;
}
@@ -282,7 +282,7 @@ void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
evec->data, &LDZ, ISUPPZ, WORK_temp, &LWORK,
IWORK_temp, &LIWORK, &INFO);
if (INFO!=0) {
- cout << "Work space estimate unsuccessful in" <<
+ cout << "Work space estimate unsuccessful in " <<
"lapack_float_eigen_symmv." << endl;
return;
}
@@ -295,7 +295,7 @@ void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
&VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data,
&LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO);
if (INFO!=0) {
- cout << "Eigen decomposition unsuccessful in" <<
+ cout << "Eigen decomposition unsuccessful in " <<
"lapack_float_eigen_symmv." << endl;
return;
}
@@ -321,7 +321,7 @@ void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
char JOBZ='V', UPLO='L';
if (N!=(int)A->size2 || N!=(int)eval->size) {
- cout << "Matrix needs to be symmetric and same" <<
+ cout << "Matrix needs to be symmetric and same " <<
"dimension in lapack_eigen_symmv." << endl;
return;
}
@@ -331,7 +331,7 @@ void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK,
&LWORK, &INFO);
if (INFO!=0) {
- cout<<"Eigen decomposition unsuccessful in" <<
+ cout<<"Eigen decomposition unsuccessful in " <<
"lapack_eigen_symmv." << endl;
return;
}
@@ -352,7 +352,7 @@ void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
int IL=0, IU=0, M;
if (N!=(int)A->size2 || N!=(int)eval->size) {
- cout << "Matrix needs to be symmetric and same" <<
+ cout << "Matrix needs to be symmetric and same " <<
"dimension in lapack_eigen_symmv." << endl;
return;
}
@@ -367,7 +367,7 @@ void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
&LDZ, ISUPPZ, WORK_temp, &LWORK, IWORK_temp,
&LIWORK, &INFO);
if (INFO!=0) {
- cout << "Work space estimate unsuccessful in" <<
+ cout << "Work space estimate unsuccessful in " <<
"lapack_eigen_symmv." << endl;
return;
}
@@ -380,7 +380,7 @@ void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
&IL, &IU, &ABSTOL, &M, eval->data, evec->data,
&LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO);
if (INFO!=0) {
- cout << "Eigen decomposition unsuccessful in" <<
+ cout << "Eigen decomposition unsuccessful in " <<
"lapack_eigen_symmv." << endl;
return;
}
diff --git a/src/lm.cpp b/src/lm.cpp
index f8cb974..d3ad7f3 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -1,6 +1,6 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011 Xiang Zhou
+ Copyright (C) 2011-2017 Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -14,9 +14,7 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-
+*/
#include <iostream>
#include <fstream>
@@ -27,6 +25,7 @@
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
+#include <assert.h>
#include <bitset>
#include <cstring>
@@ -35,7 +34,6 @@
#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
-
#include "gsl/gsl_cdf.h"
#include "gsl/gsl_roots.h"
#include "gsl/gsl_min.h"
@@ -44,22 +42,11 @@
#include "eigenlib.h"
#include "gzstream.h"
#include "lapack.h"
-
-#ifdef FORCE_FLOAT
-#include "lm_float.h"
-#else
#include "lm.h"
-#endif
-
using namespace std;
-
-
-
-
-void LM::CopyFromParam (PARAM &cPar)
-{
+void LM::CopyFromParam (PARAM &cPar) {
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
@@ -89,26 +76,22 @@ void LM::CopyFromParam (PARAM &cPar)
return;
}
-
-void LM::CopyToParam (PARAM &cPar)
-{
+void LM::CopyToParam (PARAM &cPar) {
cPar.time_opt=time_opt;
-
cPar.ng_test=ng_test;
-
return;
}
-
-
-void LM::WriteFiles ()
-{
+void LM::WriteFiles () {
string file_str;
file_str=path_out+"/"+file_out;
file_str+=".assoc.txt";
ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
if (!file_gene.empty()) {
outfile<<"geneID"<<"\t";
@@ -120,24 +103,36 @@ void LM::WriteFiles ()
} else if (a_mode==53) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
} else if (a_mode==54) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
+ outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<
+ "\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
outfile<<snpInfo[t].rs_number<<"\t";
if (a_mode==51) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].p_wald <<endl;
} else if (a_mode==52) {
- outfile<<scientific<<setprecision(6)<<"\t"<<sumStat[t].p_lrt<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ "\t"<<sumStat[t].p_lrt<<endl;
} else if (a_mode==53) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_score<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].p_score<<endl;
} else if (a_mode==54) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].p_wald <<"\t"<<
+ sumStat[t].p_lrt<<"\t"<<
+ sumStat[t].p_score<<endl;
} else {}
}
} else {
- outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"<<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
+ outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"<<
+ "\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<
+ "af"<<"\t";
if (a_mode==51) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
@@ -146,40 +141,50 @@ void LM::WriteFiles ()
} else if (a_mode==53) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
} else if (a_mode==54) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
+ outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"
+ <<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
size_t t=0;
for (size_t i=0; i<snpInfo.size(); ++i) {
if (indicator_snp[i]==0) {continue;}
- outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<ni_test-snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
+ outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<
+ "\t"<<snpInfo[i].base_position<<"\t"<<
+ snpInfo[i].n_miss<<"\t"<<ni_test-snpInfo[i].n_miss<<
+ "\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<
+ "\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
if (a_mode==51) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].p_wald <<endl;
} else if (a_mode==52) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].p_lrt<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].p_lrt<<endl;
} else if (a_mode==53) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_score<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].p_score<<endl;
} else if (a_mode==54) {
- outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
+ outfile<<scientific<<setprecision(6)<<
+ sumStat[t].beta<<"\t"<<sumStat[t].se<<
+ "\t"<<sumStat[t].p_wald <<"\t"<<
+ sumStat[t].p_lrt<<"\t"<<
+ sumStat[t].p_score<<endl;
} else {}
t++;
}
}
-
outfile.close();
outfile.clear();
return;
}
-
-
-
-
-void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *Wtx, const gsl_vector *y, const gsl_vector *x, double &xPwy, double &xPwx)
-{
+void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty,
+ const gsl_vector *Wtx, const gsl_vector *y,
+ const gsl_vector *x, double &xPwy, double &xPwx) {
size_t c_size=Wty->size;
double d;
@@ -200,9 +205,8 @@ void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *Wt
return;
}
-
-void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y, double &yPwy)
-{
+void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty,
+ const gsl_vector *y, double &yPwy) {
size_t c_size=Wty->size;
double d;
@@ -219,11 +223,11 @@ void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y,
return;
}
-
-
-//calculate p values and beta/se in a linear model
-void LmCalcP (const size_t test_mode, const double yPwy, const double xPwy, const double xPwx, const double df, const size_t n_size, double &beta, double &se, double &p_wald, double &p_lrt, double &p_score)
-{
+// Calculate p-values and beta/se in a linear model.
+void LmCalcP (const size_t test_mode, const double yPwy,
+ const double xPwy, const double xPwx, const double df,
+ const size_t n_size, double &beta, double &se,
+ double &p_wald, double &p_lrt, double &p_score) {
double yPxy=yPwy-xPwy*xPwy/xPwx;
double se_wald, se_score;
@@ -240,13 +244,12 @@ void LmCalcP (const size_t test_mode, const double yPwy, const double xPwy, cons
return;
}
-
-
-
-void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
-{
+void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x) {
ifstream infile (file_gene.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading gene expression file:"<<file_gene<<endl; return;}
+ if (!infile) {
+ cout<<"error reading gene expression file:"<<file_gene<<endl;
+ return;
+ }
clock_t time_start=clock();
@@ -255,10 +258,10 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
int c_phen;
- string rs; //gene id
+ string rs; // Gene id.
double d;
- //calculate some basic quantities
+ // Calculate some basic quantities.
double yPwy, xPwy, xPwx;
double df=(double)W->size1-(double)W->size2-1.0;
@@ -278,12 +281,14 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
CalcvPv(WtWi, Wtx, x, xPwx);
- //header
+ // Header.
getline(infile, line);
for (size_t t=0; t<ng_total; t++) {
getline(infile, line);
- if (t%d_pace==0 || t==ng_total-1) {ProgressBar ("Performing Analysis ", t, ng_total-1);}
+ if (t%d_pace==0 || t==ng_total-1) {
+ ProgressBar ("Performing Analysis ", t, ng_total-1);
+ }
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
@@ -298,16 +303,17 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
c_phen++;
}
- //calculate statistics
+ // Calculate statistics.
time_start=clock();
gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
+ beta, se, p_wald, p_lrt, p_score);
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- //store summary data
+ // Store summary data.
SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
@@ -327,17 +333,14 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
return;
}
-
-
-
// WJA added
-#include <assert.h>
-void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
-{
+void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) {
string file_bgen=file_oxford+".bgen";
ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;}
-
+ if (!infile) {
+ cout<<"error reading bgen file:"<<file_bgen<<endl;
+ return;
+ }
clock_t time_start=clock();
@@ -348,7 +351,7 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
int n_miss, c_phen;
double geno, x_mean;
- //calculate some basic quantities
+ // Calculate some basic quantities.
double yPwy, xPwy, xPwx;
double df=(double)W->size1-(double)W->size2-1.0;
@@ -369,7 +372,7 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
CalcvPv(WtWi, Wty, y, yPwy);
- // read in header
+ // Read in header.
uint32_t bgen_snp_block_offset;
uint32_t bgen_header_length;
uint32_t bgen_nsamples;
@@ -387,11 +390,11 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
infile.read(reinterpret_cast<char*>(&bgen_flags),4);
bgen_snp_block_offset-=4;
bool CompressedSNPBlocks=bgen_flags&0x1;
-// bool LongIds=bgen_flags&0x4;
infile.ignore(bgen_snp_block_offset);
- double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+ double bgen_geno_prob_AA, bgen_geno_prob_AB;
+ double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
uint32_t bgen_N;
uint16_t bgen_LS;
@@ -407,17 +410,16 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
string id;
string rs;
string chr;
- std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<<std::endl;
-
-
-
- //start reading genotypes and analyze
- for (size_t t=0; t<indicator_snp.size(); ++t)
- {
-
-// if (t>1) {break;}
- if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
- // read SNP header
+ std::cout << "Warning: WJA hard coded SNP missingness " <<
+ "threshold of 10%" << std::endl;
+
+ // Start reading genotypes and analyze.
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+ if (t%d_pace==0 || t==(ns_total-1)) {
+ ProgressBar ("Reading SNPs ", t, ns_total-1);
+ }
+
+ // Read SNP header.
id.clear();
rs.clear();
chr.clear();
@@ -444,40 +446,37 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
bgen_A_allele.resize(bgen_LA);
infile.read(&bgen_A_allele[0], bgen_LA);
-
infile.read(reinterpret_cast<char*>(&bgen_LB),4);
bgen_B_allele.resize(bgen_LB);
infile.read(&bgen_B_allele[0], bgen_LB);
-
-
-
uint16_t unzipped_data[3*bgen_N];
if (indicator_snp[t]==0) {
if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
else
- bgen_P=6*bgen_N;
+ bgen_P=6*bgen_N;
infile.ignore(static_cast<size_t>(bgen_P));
continue;
}
-
- if(CompressedSNPBlocks)
- {
-
-
+ if(CompressedSNPBlocks) {
infile.read(reinterpret_cast<char*>(&bgen_P),4);
uint8_t zipped_data[bgen_P];
unzipped_data_size=6*bgen_N;
- infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+ infile.read(reinterpret_cast<char*>(zipped_data),
+ bgen_P);
- int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ int result=
+ uncompress(reinterpret_cast<Bytef*>(unzipped_data),
+ reinterpret_cast<uLongf*>(&unzipped_data_size),
+ reinterpret_cast<Bytef*>(zipped_data),
+ static_cast<uLong> (bgen_P));
assert(result == Z_OK);
}
@@ -485,7 +484,8 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
{
bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+ infile.read(reinterpret_cast<char*>(unzipped_data),
+ bgen_P);
}
x_mean=0.0; c_phen=0; n_miss=0;
@@ -494,23 +494,32 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
if (indicator_idv[i]==0) {continue;}
- bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ bgen_geno_prob_AA=
+ static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=
+ static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=
+ static_cast<double>(unzipped_data[i*3+2])/32768.0;
+
// WJA
- bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
- else {
-
- bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
- bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
- bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
- geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+ bgen_geno_prob_non_miss=
+ bgen_geno_prob_AA +
+ bgen_geno_prob_AB +
+ bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss<0.9) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ }
+ else {
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
}
c_phen++;
}
@@ -518,24 +527,23 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
x_mean/=static_cast<double>(ni_test-n_miss);
for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ if (gsl_vector_get (x_miss, i)==0) {
+ gsl_vector_set(x, i, x_mean);
+ }
geno=gsl_vector_get(x, i);
- //if (x_mean>1) {
- //gsl_vector_set(x, i, 2-geno);
- //}
}
-
- //calculate statistics
+ // Calculate statistics.
time_start=clock();
gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
+ beta, se, p_wald, p_lrt, p_score);
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- //store summary data
+ // Store summary data.
SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
@@ -556,13 +564,12 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
return;
}
-
-
-void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
-{
+void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y) {
igzstream infile (file_geno.c_str(), igzstream::in);
- // ifstream infile (file_geno.c_str(), ifstream::in);
- if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return;
+ }
clock_t time_start=clock();
@@ -573,7 +580,7 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
int n_miss, c_phen;
double geno, x_mean;
- //calculate some basic quantities
+ // Calculate some basic quantities.
double yPwy, xPwy, xPwx;
double df=(double)W->size1-(double)W->size2-1.0;
@@ -594,11 +601,12 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
CalcvPv(WtWi, Wty, y, yPwy);
- //start reading genotypes and analyze
+ // Start reading genotypes and analyze.
for (size_t t=0; t<indicator_snp.size(); ++t) {
- //if (t>1) {break;}
getline(infile, line);
- if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ if (t%d_pace==0 || t==(ns_total-1)) {
+ ProgressBar ("Reading SNPs ", t, ns_total-1);
+ }
if (indicator_snp[t]==0) {continue;}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -611,7 +619,10 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
- if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ if (strcmp(ch_ptr, "NA")==0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ }
else {
geno=atof(ch_ptr);
@@ -625,23 +636,23 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
x_mean/=(double)(ni_test-n_miss);
for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ if (gsl_vector_get (x_miss, i)==0) {
+ gsl_vector_set(x, i, x_mean);
+ }
geno=gsl_vector_get(x, i);
- //if (x_mean>1) {
- //gsl_vector_set(x, i, 2-geno);
- //}
}
- //calculate statistics
+ // Calculate statistics.
time_start=clock();
gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
+ beta, se, p_wald, p_lrt, p_score);
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- //store summary data
+ // Store summary data.
SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
@@ -662,17 +673,13 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
return;
}
-
-
-
-
-
-
-void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
-{
+void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y) {
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+ if (!infile) {
+ cout<<"error reading bed file:"<<file_bed<<endl;
+ return;
+ }
clock_t time_start=clock();
@@ -683,7 +690,7 @@ void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
int n_bit, n_miss, ci_total, ci_test;
double geno, x_mean;
- //calculate some basic quantities
+ // Calculate some basic quantities.
double yPwy, xPwy, xPwx;
double df=(double)W->size1-(double)W->size2-1.0;
@@ -703,42 +710,62 @@ void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
CalcvPv(WtWi, Wty, y, yPwy);
- //calculate n_bit and c, the number of bit for each snp
+ // Calculate n_bit and c, the number of bit for each SNP.
if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
+ else {n_bit=ni_total/4+1;}
- //print the first three majic numbers
+ // Print the first three magic numbers.
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
}
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
+ if (t%d_pace==0 || t==snpInfo.size()-1) {
+ ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
+ }
if (indicator_snp[t]==0) {continue;}
- infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t*n_bit+3);
- //read genotypes
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ // Read genotypes.
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
- for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
-
- ci_total++;
- ci_test++;
+
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j=0; j<4; ++j) {
+ if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total]==0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean+=2.0;
+ }
+ else {
+ gsl_vector_set(x, ci_test, 1);
+ x_mean+=1.0; }
+ }
+ else {
+ if (b[2*j+1]==1) {
+ gsl_vector_set(x, ci_test, 0);
+ }
+ else {
+ gsl_vector_set(x, ci_test, -9);
+ n_miss++;
+ }
+ }
+
+ ci_total++;
+ ci_test++;
}
}
@@ -746,18 +773,19 @@ void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
for (size_t i=0; i<ni_test; ++i) {
geno=gsl_vector_get(x,i);
- if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
- //if (x_mean>1) {
- //gsl_vector_set(x, i, 2-geno);
- //}
+ if (geno==-9) {
+ gsl_vector_set(x, i, x_mean);
+ geno=x_mean;
+ }
}
- //calculate statistics
+ // Calculate statistics.
time_start=clock();
gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
+ beta, se, p_wald, p_lrt, p_score);
//store summary data
SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
@@ -781,25 +809,9 @@ void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
return;
}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-//make sure that both y and X are centered already
-void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_t, double> > &pos_loglr)
-{
+// Make sure that both y and X are centered already.
+void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y,
+ vector<pair<size_t, double> > &pos_loglr) {
double yty, xty, xtx, log_lr;
gsl_blas_ddot(y, y, &yty);
diff --git a/src/lm.h b/src/lm.h
index 656dd52..fac84e1 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -1,6 +1,6 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011 Xiang Zhou
+ Copyright (C) 2011-2017 Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -13,33 +13,25 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
#ifndef __LM_H__
#define __LM_H__
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
-
-
-#ifdef FORCE_FLOAT
-#include "param_float.h"
-#include "io_float.h"
-#else
#include "param.h"
#include "io.h"
-#endif
using namespace std;
-
class LM {
public:
- // IO related parameters
- int a_mode; //analysis mode, 50+1/2/3/4 for Frequentist tests
- size_t d_pace; //display pace
+ // IO-related parameters.
+ int a_mode; // Analysis mode: 50+1/2/3/4 for Frequentist tests.
+ size_t d_pace; // Display pace.
string file_bfile;
string file_geno;
@@ -49,31 +41,39 @@ public:
string file_gene;
- // Summary statistics
- size_t ni_total, ni_test; //number of individuals
- size_t ns_total, ns_test; //number of snps
- size_t ng_total, ng_test; //number of genes
+ // Summary statistics.
+ size_t ni_total, ni_test; // Number of individuals.
+ size_t ns_total, ns_test; // Number of SNPs.
+ size_t ng_total, ng_test; // Number of genes.
size_t n_cvt;
- double time_opt; //time spent
+ double time_opt; // Time spent.
- vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis
- vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_idv;
- vector<SNPINFO> snpInfo; //record SNP information
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
- // Not included in PARAM
- vector<SUMSTAT> sumStat; //Output SNPSummary Data
+ vector<SNPINFO> snpInfo; // Record SNP information.
- // Main functions
+ // Not included in PARAM.
+ vector<SUMSTAT> sumStat; // Output SNPSummary Data.
+
+ // Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
void AnalyzeGene (const gsl_matrix *W, const gsl_vector *x);
void AnalyzePlink (const gsl_matrix *W, const gsl_vector *y);
void AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y);
- // WJA added
+ // WJA added.
void Analyzebgen (const gsl_matrix *W, const gsl_vector *y);
void WriteFiles ();
};
-void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_t, double> > &pos_loglr);
+
+void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y,
+ vector<pair<size_t, double> > &pos_loglr);
+
#endif