aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/bslmm.cpp3331
-rw-r--r--src/bslmm.h282
-rw-r--r--src/bslmmdap.cpp1258
-rw-r--r--src/bslmmdap.h160
-rw-r--r--src/eigenlib.cpp103
-rw-r--r--src/eigenlib.h16
-rw-r--r--src/gemma.cpp6549
-rw-r--r--src/gemma.h31
-rw-r--r--src/gzstream.cpp176
-rw-r--r--src/gzstream.h90
-rw-r--r--src/io.cpp7531
-rw-r--r--src/io.h335
-rw-r--r--src/lapack.cpp1030
-rw-r--r--src/lapack.h66
-rw-r--r--src/ldr.cpp83
-rw-r--r--src/ldr.h62
-rw-r--r--src/lm.cpp1500
-rw-r--r--src/lm.h92
-rw-r--r--src/lmm.cpp4813
-rw-r--r--src/lmm.h197
-rw-r--r--src/logistic.cpp747
-rw-r--r--src/logistic.h96
-rw-r--r--src/main.cpp104
-rw-r--r--src/mathfunc.cpp585
-rw-r--r--src/mathfunc.h34
-rw-r--r--src/mvlmm.cpp10159
-rw-r--r--src/mvlmm.h145
-rw-r--r--src/param.cpp4138
-rw-r--r--src/param.h600
-rw-r--r--src/prdt.cpp988
-rw-r--r--src/prdt.h80
-rw-r--r--src/varcov.cpp386
-rw-r--r--src/varcov.h60
-rw-r--r--src/vc.cpp3655
-rw-r--r--src/vc.h180
35 files changed, 25588 insertions, 24074 deletions
diff --git a/src/bslmm.cpp b/src/bslmm.cpp
index d579802..3305639 100644
--- a/src/bslmm.cpp
+++ b/src/bslmm.cpp
@@ -16,1360 +16,1428 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <sstream>
-#include <iomanip>
+#include <algorithm>
#include <cmath>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
-#include <ctime>
-#include <cstring>
-#include <algorithm>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
#include "gsl/gsl_randist.h"
-#include "gsl/gsl_cdf.h"
#include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
-#include "lapack.h"
-#include "param.h"
#include "bslmm.h"
-#include "lmm.h"
+#include "lapack.h"
#include "lm.h"
+#include "lmm.h"
#include "mathfunc.h"
+#include "param.h"
using namespace std;
-void BSLMM::CopyFromParam (PARAM &cPar) {
- a_mode=cPar.a_mode;
- d_pace=cPar.d_pace;
-
- file_bfile=cPar.file_bfile;
- file_geno=cPar.file_geno;
- file_out=cPar.file_out;
- path_out=cPar.path_out;
-
- l_min=cPar.h_min;
- l_max=cPar.h_max;
- n_region=cPar.n_region;
- pve_null=cPar.pve_null;
- pheno_mean=cPar.pheno_mean;
-
- time_UtZ=0.0;
- time_Omega=0.0;
- n_accept=0;
-
- h_min=cPar.h_min;
- h_max=cPar.h_max;
- h_scale=cPar.h_scale;
- rho_min=cPar.rho_min;
- rho_max=cPar.rho_max;
- rho_scale=cPar.rho_scale;
- logp_min=cPar.logp_min;
- logp_max=cPar.logp_max;
- logp_scale=cPar.logp_scale;
-
- s_min=cPar.s_min;
- s_max=cPar.s_max;
- w_step=cPar.w_step;
- s_step=cPar.s_step;
- r_pace=cPar.r_pace;
- w_pace=cPar.w_pace;
- n_mh=cPar.n_mh;
- geo_mean=cPar.geo_mean;
- randseed=cPar.randseed;
- trace_G=cPar.trace_G;
-
- ni_total=cPar.ni_total;
- ns_total=cPar.ns_total;
- ni_test=cPar.ni_test;
- ns_test=cPar.ns_test;
- n_cvt=cPar.n_cvt;
-
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
- snpInfo=cPar.snpInfo;
-
- return;
+void BSLMM::CopyFromParam(PARAM &cPar) {
+ a_mode = cPar.a_mode;
+ d_pace = cPar.d_pace;
+
+ file_bfile = cPar.file_bfile;
+ file_geno = cPar.file_geno;
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
+
+ l_min = cPar.h_min;
+ l_max = cPar.h_max;
+ n_region = cPar.n_region;
+ pve_null = cPar.pve_null;
+ pheno_mean = cPar.pheno_mean;
+
+ time_UtZ = 0.0;
+ time_Omega = 0.0;
+ n_accept = 0;
+
+ h_min = cPar.h_min;
+ h_max = cPar.h_max;
+ h_scale = cPar.h_scale;
+ rho_min = cPar.rho_min;
+ rho_max = cPar.rho_max;
+ rho_scale = cPar.rho_scale;
+ logp_min = cPar.logp_min;
+ logp_max = cPar.logp_max;
+ logp_scale = cPar.logp_scale;
+
+ s_min = cPar.s_min;
+ s_max = cPar.s_max;
+ w_step = cPar.w_step;
+ s_step = cPar.s_step;
+ r_pace = cPar.r_pace;
+ w_pace = cPar.w_pace;
+ n_mh = cPar.n_mh;
+ geo_mean = cPar.geo_mean;
+ randseed = cPar.randseed;
+ trace_G = cPar.trace_G;
+
+ ni_total = cPar.ni_total;
+ ns_total = cPar.ns_total;
+ ni_test = cPar.ni_test;
+ ns_test = cPar.ns_test;
+ n_cvt = cPar.n_cvt;
+
+ indicator_idv = cPar.indicator_idv;
+ indicator_snp = cPar.indicator_snp;
+ snpInfo = cPar.snpInfo;
+
+ return;
}
-void BSLMM::CopyToParam (PARAM &cPar) {
- cPar.time_UtZ=time_UtZ;
- cPar.time_Omega=time_Omega;
- cPar.time_Proposal=time_Proposal;
- cPar.cHyp_initial=cHyp_initial;
- cPar.n_accept=n_accept;
- cPar.pheno_mean=pheno_mean;
- cPar.randseed=randseed;
+void BSLMM::CopyToParam(PARAM &cPar) {
+ cPar.time_UtZ = time_UtZ;
+ cPar.time_Omega = time_Omega;
+ cPar.time_Proposal = time_Proposal;
+ cPar.cHyp_initial = cHyp_initial;
+ cPar.n_accept = n_accept;
+ cPar.pheno_mean = pheno_mean;
+ cPar.randseed = randseed;
- return;
+ return;
}
-void BSLMM::WriteBV (const gsl_vector *bv) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".bv.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- size_t t=0;
- for (size_t i=0; i<ni_total; ++i) {
- if (indicator_idv[i]==0) {
- outfile<<"NA"<<endl;
- }
- else {
- outfile<<scientific<<setprecision(6)<<
- gsl_vector_get(bv, t)<<endl;
- t++;
- }
- }
-
- outfile.clear();
- outfile.close();
- return;
+void BSLMM::WriteBV(const gsl_vector *bv) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".bv.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ size_t t = 0;
+ for (size_t i = 0; i < ni_total; ++i) {
+ if (indicator_idv[i] == 0) {
+ outfile << "NA" << endl;
+ } else {
+ outfile << scientific << setprecision(6) << gsl_vector_get(bv, t) << endl;
+ t++;
+ }
+ }
+
+ outfile.clear();
+ outfile.close();
+ return;
}
-void BSLMM::WriteParam (vector<pair<double, double> > &beta_g,
- const gsl_vector *alpha, const size_t w) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".param.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;}
-
- outfile<<"chr"<<"\t"<<"rs"<<"\t"
- <<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t"
- <<"beta"<<"\t"<<"gamma"<<endl;
-
- size_t t=0;
- for (size_t i=0; i<ns_total; ++i) {
- if (indicator_snp[i]==0) {continue;}
-
- outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
- <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t";
-
- outfile<<scientific<<setprecision(6)<<
- gsl_vector_get(alpha, t)<<"\t";
- if (beta_g[t].second!=0) {
- outfile<<beta_g[t].first/beta_g[t].second<<
- "\t"<<beta_g[t].second/(double)w<<endl;
- }
- else {
- outfile<<0.0<<"\t"<<0.0<<endl;
- }
- t++;
- }
-
- outfile.clear();
- outfile.close();
- return;
+void BSLMM::WriteParam(vector<pair<double, double>> &beta_g,
+ const gsl_vector *alpha, const size_t w) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".param.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ outfile << "chr"
+ << "\t"
+ << "rs"
+ << "\t"
+ << "ps"
+ << "\t"
+ << "n_miss"
+ << "\t"
+ << "alpha"
+ << "\t"
+ << "beta"
+ << "\t"
+ << "gamma" << endl;
+
+ size_t t = 0;
+ for (size_t i = 0; i < ns_total; ++i) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+ << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t";
+
+ outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t)
+ << "\t";
+ if (beta_g[t].second != 0) {
+ outfile << beta_g[t].first / beta_g[t].second << "\t"
+ << beta_g[t].second / (double)w << endl;
+ } else {
+ outfile << 0.0 << "\t" << 0.0 << endl;
+ }
+ t++;
+ }
+
+ outfile.clear();
+ outfile.close();
+ return;
}
-void BSLMM::WriteParam (const gsl_vector *alpha) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".param.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- outfile<<"chr"<<"\t"<<"rs"<<"\t"
- <<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t"
- <<"beta"<<"\t"<<"gamma"<<endl;
-
- size_t t=0;
- for (size_t i=0; i<ns_total; ++i) {
- if (indicator_snp[i]==0) {continue;}
-
- outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<
- snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t";
- outfile<<scientific<<setprecision(6)<<
- gsl_vector_get(alpha, t)<<"\t";
- outfile<<0.0<<"\t"<<0.0<<endl;
- t++;
- }
-
- outfile.clear();
- outfile.close();
- return;
+void BSLMM::WriteParam(const gsl_vector *alpha) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".param.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ outfile << "chr"
+ << "\t"
+ << "rs"
+ << "\t"
+ << "ps"
+ << "\t"
+ << "n_miss"
+ << "\t"
+ << "alpha"
+ << "\t"
+ << "beta"
+ << "\t"
+ << "gamma" << endl;
+
+ size_t t = 0;
+ for (size_t i = 0; i < ns_total; ++i) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+ << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t";
+ outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t)
+ << "\t";
+ outfile << 0.0 << "\t" << 0.0 << endl;
+ t++;
+ }
+
+ outfile.clear();
+ outfile.close();
+ return;
}
-void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp,
- const gsl_matrix *Result_gamma, const size_t w_col) {
- string file_gamma, file_hyp;
- file_gamma=path_out+"/"+file_out;
- file_gamma+=".gamma.txt";
- file_hyp=path_out+"/"+file_out;
- file_hyp+=".hyp.txt";
-
- ofstream outfile_gamma, outfile_hyp;
-
- if (flag==0) {
- outfile_gamma.open (file_gamma.c_str(), ofstream::out);
- outfile_hyp.open (file_hyp.c_str(), ofstream::out);
- if (!outfile_gamma) {
- cout<<"error writing file: "<<file_gamma<<endl;
- return;
- }
- if (!outfile_hyp) {
- cout<<"error writing file: "<<file_hyp<<endl;
- return;
- }
-
- outfile_hyp<<"h \t pve \t rho \t pge \t pi \t n_gamma"<<endl;
-
- for (size_t i=0; i<s_max; ++i) {
- outfile_gamma<<"s"<<i<<"\t";
- }
- outfile_gamma<<endl;
- }
- else {
- outfile_gamma.open (file_gamma.c_str(), ofstream::app);
- outfile_hyp.open (file_hyp.c_str(), ofstream::app);
- if (!outfile_gamma) {
- cout<<"error writing file: "<<file_gamma<<endl;
- return;
- }
- if (!outfile_hyp) {
- cout<<"error writing file: "<<file_hyp<<endl;
- return;
- }
-
- size_t w;
- if (w_col==0) {w=w_pace;}
- else {w=w_col;}
-
- for (size_t i=0; i<w; ++i) {
- outfile_hyp<<scientific;
- for (size_t j=0; j<4; ++j) {
- outfile_hyp<<setprecision(6)<<
- gsl_matrix_get (Result_hyp, i, j)<<"\t";
- }
- outfile_hyp<<setprecision(6)<<
- exp(gsl_matrix_get (Result_hyp, i, 4))<<"\t";
- outfile_hyp<<(int)gsl_matrix_get(Result_hyp,i,5)<<"\t";
- outfile_hyp<<endl;
- }
-
- for (size_t i=0; i<w; ++i) {
- for (size_t j=0; j<s_max; ++j) {
- outfile_gamma<<
- (int)gsl_matrix_get(Result_gamma,i,j)<<"\t";
- }
- outfile_gamma<<endl;
- }
-
- }
-
- outfile_hyp.close();
- outfile_hyp.clear();
- outfile_gamma.close();
- outfile_gamma.clear();
- return;
+void BSLMM::WriteResult(const int flag, const gsl_matrix *Result_hyp,
+ const gsl_matrix *Result_gamma, const size_t w_col) {
+ string file_gamma, file_hyp;
+ file_gamma = path_out + "/" + file_out;
+ file_gamma += ".gamma.txt";
+ file_hyp = path_out + "/" + file_out;
+ file_hyp += ".hyp.txt";
+
+ ofstream outfile_gamma, outfile_hyp;
+
+ if (flag == 0) {
+ outfile_gamma.open(file_gamma.c_str(), ofstream::out);
+ outfile_hyp.open(file_hyp.c_str(), ofstream::out);
+ if (!outfile_gamma) {
+ cout << "error writing file: " << file_gamma << endl;
+ return;
+ }
+ if (!outfile_hyp) {
+ cout << "error writing file: " << file_hyp << endl;
+ return;
+ }
+
+ outfile_hyp << "h \t pve \t rho \t pge \t pi \t n_gamma" << endl;
+
+ for (size_t i = 0; i < s_max; ++i) {
+ outfile_gamma << "s" << i << "\t";
+ }
+ outfile_gamma << endl;
+ } else {
+ outfile_gamma.open(file_gamma.c_str(), ofstream::app);
+ outfile_hyp.open(file_hyp.c_str(), ofstream::app);
+ if (!outfile_gamma) {
+ cout << "error writing file: " << file_gamma << endl;
+ return;
+ }
+ if (!outfile_hyp) {
+ cout << "error writing file: " << file_hyp << endl;
+ return;
+ }
+
+ size_t w;
+ if (w_col == 0) {
+ w = w_pace;
+ } else {
+ w = w_col;
+ }
+
+ for (size_t i = 0; i < w; ++i) {
+ outfile_hyp << scientific;
+ for (size_t j = 0; j < 4; ++j) {
+ outfile_hyp << setprecision(6) << gsl_matrix_get(Result_hyp, i, j)
+ << "\t";
+ }
+ outfile_hyp << setprecision(6) << exp(gsl_matrix_get(Result_hyp, i, 4))
+ << "\t";
+ outfile_hyp << (int)gsl_matrix_get(Result_hyp, i, 5) << "\t";
+ outfile_hyp << endl;
+ }
+
+ for (size_t i = 0; i < w; ++i) {
+ for (size_t j = 0; j < s_max; ++j) {
+ outfile_gamma << (int)gsl_matrix_get(Result_gamma, i, j) << "\t";
+ }
+ outfile_gamma << endl;
+ }
+ }
+
+ outfile_hyp.close();
+ outfile_hyp.clear();
+ outfile_gamma.close();
+ outfile_gamma.clear();
+ return;
}
-void BSLMM::CalcPgamma (double *p_gamma) {
- double p, s=0.0;
- for (size_t i=0; i<ns_test; ++i) {
- p=0.7*gsl_ran_geometric_pdf (i+1, 1.0/geo_mean)+0.3/
- (double)ns_test;
- p_gamma[i]=p;
- s+=p;
- }
- for (size_t i=0; i<ns_test; ++i) {
- p=p_gamma[i];
- p_gamma[i]=p/s;
- }
- return;
+void BSLMM::CalcPgamma(double *p_gamma) {
+ double p, s = 0.0;
+ for (size_t i = 0; i < ns_test; ++i) {
+ p = 0.7 * gsl_ran_geometric_pdf(i + 1, 1.0 / geo_mean) +
+ 0.3 / (double)ns_test;
+ p_gamma[i] = p;
+ s += p;
+ }
+ for (size_t i = 0; i < ns_test; ++i) {
+ p = p_gamma[i];
+ p_gamma[i] = p / s;
+ }
+ return;
}
-void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X,
- vector<size_t> &rank) {
- size_t pos;
- for (size_t i=0; i<rank.size(); ++i) {
- pos=mapRank2pos[rank[i]];
- gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, i);
- gsl_vector_const_view X_col=gsl_matrix_const_column (X, pos);
- gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);
- }
-
- return;
+void BSLMM::SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X,
+ vector<size_t> &rank) {
+ size_t pos;
+ for (size_t i = 0; i < rank.size(); ++i) {
+ pos = mapRank2pos[rank[i]];
+ gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, i);
+ gsl_vector_const_view X_col = gsl_matrix_const_column(X, pos);
+ gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector);
+ }
+
+ return;
}
-double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
- const double sigma_a2) {
- double pve, var_y;
+double BSLMM::CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+ const double sigma_a2) {
+ double pve, var_y;
- gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
- gsl_vector *Xty=gsl_vector_alloc (UtXgamma->size2);
- gsl_vector *OiXty=gsl_vector_alloc (UtXgamma->size2);
+ gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
+ gsl_vector *Xty = gsl_vector_alloc(UtXgamma->size2);
+ gsl_vector *OiXty = gsl_vector_alloc(UtXgamma->size2);
- gsl_matrix_set_identity (Omega);
- gsl_matrix_scale (Omega, 1.0/sigma_a2);
+ gsl_matrix_set_identity(Omega);
+ gsl_matrix_scale(Omega, 1.0 / sigma_a2);
- lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma,
- 1.0, Omega);
- gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty);
+ lapack_dgemm((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega);
+ gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty);
- CholeskySolve(Omega, Xty, OiXty);
+ CholeskySolve(Omega, Xty, OiXty);
- gsl_blas_ddot (Xty, OiXty, &pve);
- gsl_blas_ddot (Uty, Uty, &var_y);
+ gsl_blas_ddot(Xty, OiXty, &pve);
+ gsl_blas_ddot(Uty, Uty, &var_y);
- pve/=var_y;
+ pve /= var_y;
- gsl_matrix_free (Omega);
- gsl_vector_free (Xty);
- gsl_vector_free (OiXty);
+ gsl_matrix_free(Omega);
+ gsl_vector_free(Xty);
+ gsl_vector_free(OiXty);
- return pve;
+ return pve;
}
-void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
- vector<size_t> &rank, class HYPBSLMM &cHyp,
- vector<pair<size_t, double> > &pos_loglr) {
- double q_genome=gsl_cdf_chisq_Qinv(0.05/(double)ns_test, 1);
-
- cHyp.n_gamma=0;
- for (size_t i=0; i<pos_loglr.size(); ++i) {
- if (2.0*pos_loglr[i].second>q_genome) {cHyp.n_gamma++;}
- }
- if (cHyp.n_gamma<10) {cHyp.n_gamma=10;}
-
- if (cHyp.n_gamma>s_max) {cHyp.n_gamma=s_max;}
- if (cHyp.n_gamma<s_min) {cHyp.n_gamma=s_min;}
-
- rank.clear();
- for (size_t i=0; i<cHyp.n_gamma; ++i) {
- rank.push_back(i);
- }
-
- cHyp.logp=log((double)cHyp.n_gamma/(double)ns_test);
- cHyp.h=pve_null;
-
- if (cHyp.logp==0) {cHyp.logp=-0.000001;}
- if (cHyp.h==0) {cHyp.h=0.1;}
-
- gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp.n_gamma);
- SetXgamma (UtXgamma, UtX, rank);
- double sigma_a2;
- if (trace_G!=0) {
- sigma_a2=cHyp.h*1.0/
- (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
- } else {
- sigma_a2=cHyp.h*1.0/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
- }
- if (sigma_a2==0) {sigma_a2=0.025;}
- cHyp.rho=CalcPveLM (UtXgamma, Uty, sigma_a2)/cHyp.h;
- gsl_matrix_free (UtXgamma);
-
- if (cHyp.rho>1.0) {cHyp.rho=1.0;}
-
- if (cHyp.h<h_min) {cHyp.h=h_min;}
- if (cHyp.h>h_max) {cHyp.h=h_max;}
- if (cHyp.rho<rho_min) {cHyp.rho=rho_min;}
- if (cHyp.rho>rho_max) {cHyp.rho=rho_max;}
- if (cHyp.logp<logp_min) {cHyp.logp=logp_min;}
- if (cHyp.logp>logp_max) {cHyp.logp=logp_max;}
-
- cout<<"initial value of h = "<<cHyp.h<<endl;
- cout<<"initial value of rho = "<<cHyp.rho<<endl;
- cout<<"initial value of pi = "<<exp(cHyp.logp)<<endl;
- cout<<"initial value of |gamma| = "<<cHyp.n_gamma<<endl;
-
- return;
+void BSLMM::InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty,
+ vector<size_t> &rank, class HYPBSLMM &cHyp,
+ vector<pair<size_t, double>> &pos_loglr) {
+ double q_genome = gsl_cdf_chisq_Qinv(0.05 / (double)ns_test, 1);
+
+ cHyp.n_gamma = 0;
+ for (size_t i = 0; i < pos_loglr.size(); ++i) {
+ if (2.0 * pos_loglr[i].second > q_genome) {
+ cHyp.n_gamma++;
+ }
+ }
+ if (cHyp.n_gamma < 10) {
+ cHyp.n_gamma = 10;
+ }
+
+ if (cHyp.n_gamma > s_max) {
+ cHyp.n_gamma = s_max;
+ }
+ if (cHyp.n_gamma < s_min) {
+ cHyp.n_gamma = s_min;
+ }
+
+ rank.clear();
+ for (size_t i = 0; i < cHyp.n_gamma; ++i) {
+ rank.push_back(i);
+ }
+
+ cHyp.logp = log((double)cHyp.n_gamma / (double)ns_test);
+ cHyp.h = pve_null;
+
+ if (cHyp.logp == 0) {
+ cHyp.logp = -0.000001;
+ }
+ if (cHyp.h == 0) {
+ cHyp.h = 0.1;
+ }
+
+ gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp.n_gamma);
+ SetXgamma(UtXgamma, UtX, rank);
+ double sigma_a2;
+ if (trace_G != 0) {
+ sigma_a2 = cHyp.h * 1.0 /
+ (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
+ } else {
+ sigma_a2 = cHyp.h * 1.0 / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
+ }
+ if (sigma_a2 == 0) {
+ sigma_a2 = 0.025;
+ }
+ cHyp.rho = CalcPveLM(UtXgamma, Uty, sigma_a2) / cHyp.h;
+ gsl_matrix_free(UtXgamma);
+
+ if (cHyp.rho > 1.0) {
+ cHyp.rho = 1.0;
+ }
+
+ if (cHyp.h < h_min) {
+ cHyp.h = h_min;
+ }
+ if (cHyp.h > h_max) {
+ cHyp.h = h_max;
+ }
+ if (cHyp.rho < rho_min) {
+ cHyp.rho = rho_min;
+ }
+ if (cHyp.rho > rho_max) {
+ cHyp.rho = rho_max;
+ }
+ if (cHyp.logp < logp_min) {
+ cHyp.logp = logp_min;
+ }
+ if (cHyp.logp > logp_max) {
+ cHyp.logp = logp_max;
+ }
+
+ cout << "initial value of h = " << cHyp.h << endl;
+ cout << "initial value of rho = " << cHyp.rho << endl;
+ cout << "initial value of pi = " << exp(cHyp.logp) << endl;
+ cout << "initial value of |gamma| = " << cHyp.n_gamma << endl;
+
+ return;
}
-double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval,
- gsl_vector *Utu, gsl_vector *alpha_prime,
- class HYPBSLMM &cHyp) {
- double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h));
-
- gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size);
- gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
-
- double logpost=0.0;
- double d, ds, uy, Hi_yy=0, logdet_H=0.0;
- for (size_t i=0; i<ni_test; ++i) {
- d=gsl_vector_get (K_eval, i)*sigma_b2;
- ds=d/(d+1.0);
- d=1.0/(d+1.0);
- gsl_vector_set (weight_Hi, i, d);
-
- logdet_H-=log(d);
- uy=gsl_vector_get (Uty, i);
- Hi_yy+=d*uy*uy;
-
- gsl_vector_set (Utu_rand, i,
- gsl_ran_gaussian(gsl_r, 1)*sqrt(ds));
- }
-
- // Sample tau.
- double tau=1.0;
- if (a_mode==11) {
- tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/Hi_yy);
- }
-
- // Sample alpha.
- gsl_vector_memcpy (alpha_prime, Uty);
- gsl_vector_mul (alpha_prime, weight_Hi);
- gsl_vector_scale (alpha_prime, sigma_b2);
-
- // Sample u.
- gsl_vector_memcpy (Utu, alpha_prime);
- gsl_vector_mul (Utu, K_eval);
- if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));}
- gsl_vector_add (Utu, Utu_rand);
-
- // For quantitative traits, calculate pve and ppe.
- if (a_mode==11) {
- gsl_blas_ddot (Utu, Utu, &d);
- cHyp.pve=d/(double)ni_test;
- cHyp.pve/=cHyp.pve+1.0/tau;
- cHyp.pge=0.0;
- }
-
- // Calculate likelihood.
- logpost=-0.5*logdet_H;
- if (a_mode==11) {logpost-=0.5*(double)ni_test*log(Hi_yy);}
- else {logpost-=0.5*Hi_yy;}
-
- logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
- ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
-
- gsl_vector_free (Utu_rand);
- gsl_vector_free (weight_Hi);
-
- return logpost;
+double BSLMM::CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval,
+ gsl_vector *Utu, gsl_vector *alpha_prime,
+ class HYPBSLMM &cHyp) {
+ double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h));
+
+ gsl_vector *Utu_rand = gsl_vector_alloc(Uty->size);
+ gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size);
+
+ double logpost = 0.0;
+ double d, ds, uy, Hi_yy = 0, logdet_H = 0.0;
+ for (size_t i = 0; i < ni_test; ++i) {
+ d = gsl_vector_get(K_eval, i) * sigma_b2;
+ ds = d / (d + 1.0);
+ d = 1.0 / (d + 1.0);
+ gsl_vector_set(weight_Hi, i, d);
+
+ logdet_H -= log(d);
+ uy = gsl_vector_get(Uty, i);
+ Hi_yy += d * uy * uy;
+
+ gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds));
+ }
+
+ // Sample tau.
+ double tau = 1.0;
+ if (a_mode == 11) {
+ tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / Hi_yy);
+ }
+
+ // Sample alpha.
+ gsl_vector_memcpy(alpha_prime, Uty);
+ gsl_vector_mul(alpha_prime, weight_Hi);
+ gsl_vector_scale(alpha_prime, sigma_b2);
+
+ // Sample u.
+ gsl_vector_memcpy(Utu, alpha_prime);
+ gsl_vector_mul(Utu, K_eval);
+ if (a_mode == 11) {
+ gsl_vector_scale(Utu_rand, sqrt(1.0 / tau));
+ }
+ gsl_vector_add(Utu, Utu_rand);
+
+ // For quantitative traits, calculate pve and ppe.
+ if (a_mode == 11) {
+ gsl_blas_ddot(Utu, Utu, &d);
+ cHyp.pve = d / (double)ni_test;
+ cHyp.pve /= cHyp.pve + 1.0 / tau;
+ cHyp.pge = 0.0;
+ }
+
+ // Calculate likelihood.
+ logpost = -0.5 * logdet_H;
+ if (a_mode == 11) {
+ logpost -= 0.5 * (double)ni_test * log(Hi_yy);
+ } else {
+ logpost -= 0.5 * Hi_yy;
+ }
+
+ logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+ ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp));
+
+ gsl_vector_free(Utu_rand);
+ gsl_vector_free(weight_Hi);
+
+ return logpost;
}
-double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- gsl_vector *UtXb, gsl_vector *Utu,
- gsl_vector *alpha_prime, gsl_vector *beta,
- class HYPBSLMM &cHyp) {
- clock_t time_start;
-
- double sigma_a2=cHyp.h*cHyp.rho/
- (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
- double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h));
-
- double logpost=0.0;
- double d, ds, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;
-
- gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
- UtXgamma->size2);
- gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
- gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
- gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
- gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1);
- gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
-
- gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
-
- logdet_H=0.0; P_yy=0.0;
- for (size_t i=0; i<ni_test; ++i) {
- gsl_vector_view UtXgamma_row=
- gsl_matrix_row (UtXgamma_eval, i);
- d=gsl_vector_get (K_eval, i)*sigma_b2;
- ds=d/(d+1.0);
- d=1.0/(d+1.0);
- gsl_vector_set (weight_Hi, i, d);
-
- logdet_H-=log(d);
- uy=gsl_vector_get (Uty, i);
- P_yy+=d*uy*uy;
- gsl_vector_scale (&UtXgamma_row.vector, d);
-
- gsl_vector_set(Utu_rand,i,gsl_ran_gaussian(gsl_r,1)*sqrt(ds));
- }
-
- // Calculate Omega.
- gsl_matrix_set_identity (Omega);
-
- time_start=clock();
- lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval,
- UtXgamma, 1.0, Omega);
- time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-
- // Calculate beta_hat.
- gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
-
- logdet_O=CholeskySolve(Omega, XtHiy, beta_hat);
-
- gsl_vector_scale (beta_hat, sigma_a2);
-
- gsl_blas_ddot (XtHiy, beta_hat, &d);
- P_yy-=d;
-
- // Sample tau.
- double tau=1.0;
- if (a_mode==11) {
- tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy);
- }
-
- // Sample beta.
- for (size_t i=0; i<beta->size; i++)
- {
- d=gsl_ran_gaussian(gsl_r, 1);
- gsl_vector_set(beta, i, d);
- }
- gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
-
- // This computes inv(L^T(Omega)) %*% beta.
- gsl_vector_scale(beta, sqrt(sigma_a2/tau));
- gsl_vector_add(beta, beta_hat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb);
-
- // Sample alpha.
- gsl_vector_memcpy (alpha_prime, Uty);
- gsl_vector_sub (alpha_prime, UtXb);
- gsl_vector_mul (alpha_prime, weight_Hi);
- gsl_vector_scale (alpha_prime, sigma_b2);
-
- // Sample u.
- gsl_vector_memcpy (Utu, alpha_prime);
- gsl_vector_mul (Utu, K_eval);
-
- if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));}
- gsl_vector_add (Utu, Utu_rand);
-
- // For quantitative traits, calculate pve and pge.
- if (a_mode==11) {
- gsl_blas_ddot (UtXb, UtXb, &d);
- cHyp.pge=d/(double)ni_test;
-
- gsl_blas_ddot (Utu, Utu, &d);
- cHyp.pve=cHyp.pge+d/(double)ni_test;
-
- if (cHyp.pve==0) {cHyp.pge=0.0;}
- else {cHyp.pge/=cHyp.pve;}
- cHyp.pve/=cHyp.pve+1.0/tau;
- }
-
- gsl_matrix_free (UtXgamma_eval);
- gsl_matrix_free (Omega);
- gsl_vector_free (XtHiy);
- gsl_vector_free (beta_hat);
- gsl_vector_free (Utu_rand);
- gsl_vector_free (weight_Hi);
-
- logpost=-0.5*logdet_H-0.5*logdet_O;
- if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
- else {logpost-=0.5*P_yy;}
- logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
- ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
-
- return logpost;
+double BSLMM::CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+ const gsl_vector *K_eval, gsl_vector *UtXb,
+ gsl_vector *Utu, gsl_vector *alpha_prime,
+ gsl_vector *beta, class HYPBSLMM &cHyp) {
+ clock_t time_start;
+
+ double sigma_a2 = cHyp.h * cHyp.rho /
+ (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
+ double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h));
+
+ double logpost = 0.0;
+ double d, ds, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0;
+
+ gsl_matrix *UtXgamma_eval =
+ gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2);
+ gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
+ gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2);
+ gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2);
+ gsl_vector *Utu_rand = gsl_vector_alloc(UtXgamma->size1);
+ gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1);
+
+ gsl_matrix_memcpy(UtXgamma_eval, UtXgamma);
+
+ logdet_H = 0.0;
+ P_yy = 0.0;
+ for (size_t i = 0; i < ni_test; ++i) {
+ gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i);
+ d = gsl_vector_get(K_eval, i) * sigma_b2;
+ ds = d / (d + 1.0);
+ d = 1.0 / (d + 1.0);
+ gsl_vector_set(weight_Hi, i, d);
+
+ logdet_H -= log(d);
+ uy = gsl_vector_get(Uty, i);
+ P_yy += d * uy * uy;
+ gsl_vector_scale(&UtXgamma_row.vector, d);
+
+ gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds));
+ }
+
+ // Calculate Omega.
+ gsl_matrix_set_identity(Omega);
+
+ time_start = clock();
+ lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0,
+ Omega);
+ time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Calculate beta_hat.
+ gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
+
+ logdet_O = CholeskySolve(Omega, XtHiy, beta_hat);
+
+ gsl_vector_scale(beta_hat, sigma_a2);
+
+ gsl_blas_ddot(XtHiy, beta_hat, &d);
+ P_yy -= d;
+
+ // Sample tau.
+ double tau = 1.0;
+ if (a_mode == 11) {
+ tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy);
+ }
+
+ // Sample beta.
+ for (size_t i = 0; i < beta->size; i++) {
+ d = gsl_ran_gaussian(gsl_r, 1);
+ gsl_vector_set(beta, i, d);
+ }
+ gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
+
+ // This computes inv(L^T(Omega)) %*% beta.
+ gsl_vector_scale(beta, sqrt(sigma_a2 / tau));
+ gsl_vector_add(beta, beta_hat);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb);
+
+ // Sample alpha.
+ gsl_vector_memcpy(alpha_prime, Uty);
+ gsl_vector_sub(alpha_prime, UtXb);
+ gsl_vector_mul(alpha_prime, weight_Hi);
+ gsl_vector_scale(alpha_prime, sigma_b2);
+
+ // Sample u.
+ gsl_vector_memcpy(Utu, alpha_prime);
+ gsl_vector_mul(Utu, K_eval);
+
+ if (a_mode == 11) {
+ gsl_vector_scale(Utu_rand, sqrt(1.0 / tau));
+ }
+ gsl_vector_add(Utu, Utu_rand);
+
+ // For quantitative traits, calculate pve and pge.
+ if (a_mode == 11) {
+ gsl_blas_ddot(UtXb, UtXb, &d);
+ cHyp.pge = d / (double)ni_test;
+
+ gsl_blas_ddot(Utu, Utu, &d);
+ cHyp.pve = cHyp.pge + d / (double)ni_test;
+
+ if (cHyp.pve == 0) {
+ cHyp.pge = 0.0;
+ } else {
+ cHyp.pge /= cHyp.pve;
+ }
+ cHyp.pve /= cHyp.pve + 1.0 / tau;
+ }
+
+ gsl_matrix_free(UtXgamma_eval);
+ gsl_matrix_free(Omega);
+ gsl_vector_free(XtHiy);
+ gsl_vector_free(beta_hat);
+ gsl_vector_free(Utu_rand);
+ gsl_vector_free(weight_Hi);
+
+ logpost = -0.5 * logdet_H - 0.5 * logdet_O;
+ if (a_mode == 11) {
+ logpost -= 0.5 * (double)ni_test * log(P_yy);
+ } else {
+ logpost -= 0.5 * P_yy;
+ }
+ logpost +=
+ ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+ ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
+
+ return logpost;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu,
- gsl_vector *z_hat, class HYPBSLMM &cHyp) {
- double d;
+void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu,
+ gsl_vector *z_hat, class HYPBSLMM &cHyp) {
+ double d;
- gsl_blas_ddot (Utu, Utu, &d);
- cHyp.pve=d/(double)ni_test;
+ gsl_blas_ddot(Utu, Utu, &d);
+ cHyp.pve = d / (double)ni_test;
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, z_hat);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, z_hat);
- cHyp.pve/=cHyp.pve+1.0;
- cHyp.pge=0.0;
+ cHyp.pve /= cHyp.pve + 1.0;
+ cHyp.pge = 0.0;
- return;
+ return;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb,
- const gsl_vector *Utu, gsl_vector *z_hat,
- class HYPBSLMM &cHyp) {
- double d;
- gsl_vector *UtXbU=gsl_vector_alloc (Utu->size);
+void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb,
+ const gsl_vector *Utu, gsl_vector *z_hat,
+ class HYPBSLMM &cHyp) {
+ double d;
+ gsl_vector *UtXbU = gsl_vector_alloc(Utu->size);
- gsl_blas_ddot (UtXb, UtXb, &d);
- cHyp.pge=d/(double)ni_test;
+ gsl_blas_ddot(UtXb, UtXb, &d);
+ cHyp.pge = d / (double)ni_test;
- gsl_blas_ddot (Utu, Utu, &d);
- cHyp.pve=cHyp.pge+d/(double)ni_test;
+ gsl_blas_ddot(Utu, Utu, &d);
+ cHyp.pve = cHyp.pge + d / (double)ni_test;
- gsl_vector_memcpy (UtXbU, Utu);
- gsl_vector_add (UtXbU, UtXb);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
+ gsl_vector_memcpy(UtXbU, Utu);
+ gsl_vector_add(UtXbU, UtXb);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
- if (cHyp.pve==0) {cHyp.pge=0.0;}
- else {cHyp.pge/=cHyp.pve;}
+ if (cHyp.pve == 0) {
+ cHyp.pge = 0.0;
+ } else {
+ cHyp.pge /= cHyp.pve;
+ }
- cHyp.pve/=cHyp.pve+1.0;
+ cHyp.pve /= cHyp.pve + 1.0;
- gsl_vector_free(UtXbU);
- return;
+ gsl_vector_free(UtXbU);
+ return;
}
-void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
- gsl_vector *z) {
- double d1, d2, z_rand=0.0;
- for (size_t i=0; i<z->size; ++i) {
- d1=gsl_vector_get (y, i);
- d2=gsl_vector_get (z_hat, i);
-
- // y is centered for case control studies.
- if (d1<=0.0) {
-
- // Control, right truncated.
- do {
- z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0);
- } while (z_rand>0.0);
- }
- else {
- do {
- z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0);
- } while (z_rand<0.0);
- }
-
- gsl_vector_set (z, i, z_rand);
- }
-
- return;
+void BSLMM::SampleZ(const gsl_vector *y, const gsl_vector *z_hat,
+ gsl_vector *z) {
+ double d1, d2, z_rand = 0.0;
+ for (size_t i = 0; i < z->size; ++i) {
+ d1 = gsl_vector_get(y, i);
+ d2 = gsl_vector_get(z_hat, i);
+
+ // y is centered for case control studies.
+ if (d1 <= 0.0) {
+
+ // Control, right truncated.
+ do {
+ z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0);
+ } while (z_rand > 0.0);
+ } else {
+ do {
+ z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0);
+ } while (z_rand < 0.0);
+ }
+
+ gsl_vector_set(z, i, z_rand);
+ }
+
+ return;
}
-double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old,
- class HYPBSLMM &cHyp_new, const size_t &repeat) {
+double BSLMM::ProposeHnRho(const class HYPBSLMM &cHyp_old,
+ class HYPBSLMM &cHyp_new, const size_t &repeat) {
- double h=cHyp_old.h, rho=cHyp_old.rho;
+ double h = cHyp_old.h, rho = cHyp_old.rho;
- double d_h=(h_max-h_min)*h_scale, d_rho=(rho_max-rho_min)*rho_scale;
+ double d_h = (h_max - h_min) * h_scale,
+ d_rho = (rho_max - rho_min) * rho_scale;
- for (size_t i=0; i<repeat; ++i) {
- h=h+(gsl_rng_uniform(gsl_r)-0.5)*d_h;
- if (h<h_min) {h=2*h_min-h;}
- if (h>h_max) {h=2*h_max-h;}
+ for (size_t i = 0; i < repeat; ++i) {
+ h = h + (gsl_rng_uniform(gsl_r) - 0.5) * d_h;
+ if (h < h_min) {
+ h = 2 * h_min - h;
+ }
+ if (h > h_max) {
+ h = 2 * h_max - h;
+ }
- rho=rho+(gsl_rng_uniform(gsl_r)-0.5)*d_rho;
- if (rho<rho_min) {rho=2*rho_min-rho;}
- if (rho>rho_max) {rho=2*rho_max-rho;}
- }
- cHyp_new.h=h;
- cHyp_new.rho=rho;
- return 0.0;
+ rho = rho + (gsl_rng_uniform(gsl_r) - 0.5) * d_rho;
+ if (rho < rho_min) {
+ rho = 2 * rho_min - rho;
+ }
+ if (rho > rho_max) {
+ rho = 2 * rho_max - rho;
+ }
+ }
+ cHyp_new.h = h;
+ cHyp_new.rho = rho;
+ return 0.0;
}
-double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old,
- class HYPBSLMM &cHyp_new, const size_t &repeat) {
- double logp_old=cHyp_old.logp, logp_new=cHyp_old.logp;
- double log_ratio=0.0;
+double BSLMM::ProposePi(const class HYPBSLMM &cHyp_old,
+ class HYPBSLMM &cHyp_new, const size_t &repeat) {
+ double logp_old = cHyp_old.logp, logp_new = cHyp_old.logp;
+ double log_ratio = 0.0;
- double d_logp=min(0.1, (logp_max-logp_min)*logp_scale);
+ double d_logp = min(0.1, (logp_max - logp_min) * logp_scale);
- for (size_t i=0; i<repeat; ++i) {
- logp_new=logp_old+(gsl_rng_uniform(gsl_r)-0.5)*d_logp;
- if (logp_new<logp_min) {logp_new=2*logp_min-logp_new;}
- if (logp_new>logp_max) {logp_new=2*logp_max-logp_new;}
- log_ratio+=logp_new-logp_old;
- logp_old=logp_new;
- }
- cHyp_new.logp=logp_new;
+ for (size_t i = 0; i < repeat; ++i) {
+ logp_new = logp_old + (gsl_rng_uniform(gsl_r) - 0.5) * d_logp;
+ if (logp_new < logp_min) {
+ logp_new = 2 * logp_min - logp_new;
+ }
+ if (logp_new > logp_max) {
+ logp_new = 2 * logp_max - logp_new;
+ }
+ log_ratio += logp_new - logp_old;
+ logp_old = logp_new;
+ }
+ cHyp_new.logp = logp_new;
- return log_ratio;
+ return log_ratio;
}
-bool comp_vec (size_t a, size_t b) {
- return (a < b);
-}
+bool comp_vec(size_t a, size_t b) { return (a < b); }
+
+double BSLMM::ProposeGamma(const vector<size_t> &rank_old,
+ vector<size_t> &rank_new, const double *p_gamma,
+ const class HYPBSLMM &cHyp_old,
+ class HYPBSLMM &cHyp_new, const size_t &repeat) {
+ map<size_t, int> mapRank2in;
+ size_t r;
+ double unif, logp = 0.0;
+ int flag_gamma;
+ size_t r_add, r_remove, col_id;
+
+ rank_new.clear();
+ if (cHyp_old.n_gamma != rank_old.size()) {
+ cout << "size wrong" << endl;
+ }
+
+ if (cHyp_old.n_gamma != 0) {
+ for (size_t i = 0; i < rank_old.size(); ++i) {
+ r = rank_old[i];
+ rank_new.push_back(r);
+ mapRank2in[r] = 1;
+ }
+ }
+ cHyp_new.n_gamma = cHyp_old.n_gamma;
+
+ for (size_t i = 0; i < repeat; ++i) {
+ unif = gsl_rng_uniform(gsl_r);
+
+ if (unif < 0.40 && cHyp_new.n_gamma < s_max) {
+ flag_gamma = 1;
+ } else if (unif >= 0.40 && unif < 0.80 && cHyp_new.n_gamma > s_min) {
+ flag_gamma = 2;
+ } else if (unif >= 0.80 && cHyp_new.n_gamma > 0 &&
+ cHyp_new.n_gamma < ns_test) {
+ flag_gamma = 3;
+ } else {
+ flag_gamma = 4;
+ }
+
+ if (flag_gamma == 1) {
+
+ // Add a SNP.
+ do {
+ r_add = gsl_ran_discrete(gsl_r, gsl_t);
+ } while (mapRank2in.count(r_add) != 0);
+
+ double prob_total = 1.0;
+ for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
+ r = rank_new[i];
+ prob_total -= p_gamma[r];
+ }
+
+ mapRank2in[r_add] = 1;
+ rank_new.push_back(r_add);
+ cHyp_new.n_gamma++;
+ logp += -log(p_gamma[r_add] / prob_total) - log((double)cHyp_new.n_gamma);
+ } else if (flag_gamma == 2) {
+
+ // Delete a SNP.
+ col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
+ r_remove = rank_new[col_id];
+
+ double prob_total = 1.0;
+ for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
+ r = rank_new[i];
+ prob_total -= p_gamma[r];
+ }
+ prob_total += p_gamma[r_remove];
+
+ mapRank2in.erase(r_remove);
+ rank_new.erase(rank_new.begin() + col_id);
+ logp +=
+ log(p_gamma[r_remove] / prob_total) + log((double)cHyp_new.n_gamma);
+ cHyp_new.n_gamma--;
+ } else if (flag_gamma == 3) {
+
+ // Switch a SNP.
+ col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
+ r_remove = rank_new[col_id];
+
+ // Be careful with the proposal.
+ do {
+ r_add = gsl_ran_discrete(gsl_r, gsl_t);
+ } while (mapRank2in.count(r_add) != 0);
+
+ double prob_total = 1.0;
+ for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
+ r = rank_new[i];
+ prob_total -= p_gamma[r];
+ }
+
+ logp += log(p_gamma[r_remove] /
+ (prob_total + p_gamma[r_remove] - p_gamma[r_add]));
+ logp -= log(p_gamma[r_add] / prob_total);
+
+ mapRank2in.erase(r_remove);
+ mapRank2in[r_add] = 1;
+ rank_new.erase(rank_new.begin() + col_id);
+ rank_new.push_back(r_add);
+ } else {
+ logp += 0;
+ } // Do not change.
+ }
+
+ stable_sort(rank_new.begin(), rank_new.end(), comp_vec);
-double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
- vector<size_t> &rank_new,
- const double *p_gamma,
- const class HYPBSLMM &cHyp_old,
- class HYPBSLMM &cHyp_new,
- const size_t &repeat) {
- map<size_t, int> mapRank2in;
- size_t r;
- double unif, logp=0.0;
- int flag_gamma;
- size_t r_add, r_remove, col_id;
-
- rank_new.clear();
- if (cHyp_old.n_gamma!=rank_old.size()) {cout<<"size wrong"<<endl;}
-
- if (cHyp_old.n_gamma!=0) {
- for (size_t i=0; i<rank_old.size(); ++i) {
- r=rank_old[i];
- rank_new.push_back(r);
- mapRank2in[r]=1;
- }
- }
- cHyp_new.n_gamma=cHyp_old.n_gamma;
-
- for (size_t i=0; i<repeat; ++i) {
- unif=gsl_rng_uniform(gsl_r);
-
- if (unif < 0.40 && cHyp_new.n_gamma<s_max) {flag_gamma=1;}
- else if (unif>=0.40 && unif < 0.80 &&
- cHyp_new.n_gamma>s_min) {
- flag_gamma=2;
- }
- else if (unif>=0.80 && cHyp_new.n_gamma>0 &&
- cHyp_new.n_gamma<ns_test) {
- flag_gamma=3;
- }
- else {flag_gamma=4;}
-
- if(flag_gamma==1) {
-
- // Add a SNP.
- do {
- r_add=gsl_ran_discrete (gsl_r, gsl_t);
- } while (mapRank2in.count(r_add)!=0);
-
- double prob_total=1.0;
- for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
- r=rank_new[i];
- prob_total-=p_gamma[r];
- }
-
- mapRank2in[r_add]=1;
- rank_new.push_back(r_add);
- cHyp_new.n_gamma++;
- logp+=-log(p_gamma[r_add]/prob_total)-
- log((double)cHyp_new.n_gamma);
- }
- else if (flag_gamma==2) {
-
- // Delete a SNP.
- col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
- r_remove=rank_new[col_id];
-
- double prob_total=1.0;
- for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
- r=rank_new[i];
- prob_total-=p_gamma[r];
- }
- prob_total+=p_gamma[r_remove];
-
- mapRank2in.erase(r_remove);
- rank_new.erase(rank_new.begin()+col_id);
- logp+=log(p_gamma[r_remove]/prob_total)+
- log((double)cHyp_new.n_gamma);
- cHyp_new.n_gamma--;
- }
- else if (flag_gamma==3) {
-
- // Switch a SNP.
- col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
- r_remove=rank_new[col_id];
-
- // Be careful with the proposal.
- do {
- r_add=gsl_ran_discrete (gsl_r, gsl_t);
- } while (mapRank2in.count(r_add)!=0);
-
- double prob_total=1.0;
- for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
- r=rank_new[i];
- prob_total-=p_gamma[r];
- }
-
- logp+=log(p_gamma[r_remove]/
- (prob_total+p_gamma[r_remove]-p_gamma[r_add]));
- logp-=log(p_gamma[r_add]/prob_total);
-
- mapRank2in.erase(r_remove);
- mapRank2in[r_add]=1;
- rank_new.erase(rank_new.begin()+col_id);
- rank_new.push_back(r_add);
- }
- else {logp+=0;} // Do not change.
- }
-
- stable_sort (rank_new.begin(), rank_new.end(), comp_vec);
-
- mapRank2in.clear();
- return logp;
+ mapRank2in.clear();
+ return logp;
}
-bool comp_lr (pair<size_t, double> a, pair<size_t, double> b) {
- return (a.second > b.second);
+bool comp_lr(pair<size_t, double> a, pair<size_t, double> b) {
+ return (a.second > b.second);
}
// If a_mode==13 then Uty==y.
-void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const gsl_vector *y) {
- clock_t time_start;
-
- class HYPBSLMM cHyp_old, cHyp_new;
-
- gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6);
- gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
-
- gsl_vector *alpha_prime=gsl_vector_alloc (ni_test);
- gsl_vector *alpha_new=gsl_vector_alloc (ni_test);
- gsl_vector *alpha_old=gsl_vector_alloc (ni_test);
- gsl_vector *Utu=gsl_vector_alloc (ni_test);
- gsl_vector *Utu_new=gsl_vector_alloc (ni_test);
- gsl_vector *Utu_old=gsl_vector_alloc (ni_test);
-
- gsl_vector *UtXb_new=gsl_vector_alloc (ni_test);
- gsl_vector *UtXb_old=gsl_vector_alloc (ni_test);
-
- gsl_vector *z_hat=gsl_vector_alloc (ni_test);
- gsl_vector *z=gsl_vector_alloc (ni_test);
- gsl_vector *Utz=gsl_vector_alloc (ni_test);
-
- gsl_vector_memcpy (Utz, Uty);
-
- double logPost_new, logPost_old;
- double logMHratio;
- double mean_z=0.0;
-
- gsl_matrix_set_zero (Result_gamma);
- gsl_vector_set_zero (Utu);
- gsl_vector_set_zero (alpha_prime);
- if (a_mode==13) {
- pheno_mean=0.0;
- }
-
- vector<pair<double, double> > beta_g;
- for (size_t i=0; i<ns_test; i++) {
- beta_g.push_back(make_pair(0.0, 0.0));
- }
-
- vector<size_t> rank_new, rank_old;
- vector<double> beta_new, beta_old;
-
- vector<pair<size_t, double> > pos_loglr;
-
- time_start=clock();
- MatrixCalcLR (U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr);
- time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- stable_sort (pos_loglr.begin(), pos_loglr.end(), comp_lr);
- for (size_t i=0; i<ns_test; ++i) {
- mapRank2pos[i]=pos_loglr[i].first;
- }
-
- // Calculate proposal distribution for gamma (unnormalized),
- // and set up gsl_r and gsl_t.
- gsl_rng_env_setup();
- const gsl_rng_type * gslType;
- gslType = gsl_rng_default;
- if (randseed<0)
- {
- time_t rawtime;
- time (&rawtime);
- tm * ptm = gmtime (&rawtime);
-
- randseed = (unsigned) (ptm->tm_hour%24*3600+
- ptm->tm_min*60+ptm->tm_sec);
- }
- gsl_r = gsl_rng_alloc(gslType);
- gsl_rng_set(gsl_r, randseed);
-
- double *p_gamma = new double[ns_test];
- CalcPgamma (p_gamma);
-
- gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma);
-
- // Initial parameters.
- InitialMCMC (UtX, Utz, rank_old, cHyp_old, pos_loglr);
-
- cHyp_initial=cHyp_old;
-
- if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
- logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old,
- cHyp_old);
-
- beta_old.clear();
- for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
- beta_old.push_back(0);
- }
- }
- else {
- gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test,
- cHyp_old.n_gamma);
- gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma);
- SetXgamma (UtXgamma, UtX, rank_old);
- logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old,
- Utu_old, alpha_old, beta, cHyp_old);
-
- beta_old.clear();
- for (size_t i=0; i<beta->size; ++i) {
- beta_old.push_back(gsl_vector_get(beta, i));
- }
- gsl_matrix_free (UtXgamma);
- gsl_vector_free (beta);
- }
-
- // Calculate centered z_hat, and pve.
- if (a_mode==13) {
- time_start=clock();
- if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
- CalcCC_PVEnZ (U, Utu_old, z_hat, cHyp_old);
- }
- else {
- CalcCC_PVEnZ (U, UtXb_old, Utu_old, z_hat, cHyp_old);
- }
- time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- }
-
- // Start MCMC.
- int accept;
- size_t total_step=w_step+s_step;
- size_t w=0, w_col, pos;
- size_t repeat=0;
-
- for (size_t t=0; t<total_step; ++t) {
- if (t%d_pace==0 || t==total_step-1) {
- ProgressBar ("Running MCMC ", t, total_step-1,
- (double)n_accept/(double)(t*n_mh+1));
- }
-
- if (a_mode==13) {
- SampleZ (y, z_hat, z);
- mean_z=CenterVector (z);
-
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz);
- time_UtZ+=(clock()-time_start)/
- (double(CLOCKS_PER_SEC)*60.0);
-
- // First proposal.
- if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
- logPost_old=
- CalcPosterior(Utz, K_eval, Utu_old,
- alpha_old, cHyp_old);
- beta_old.clear();
- for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
- beta_old.push_back(0);
- }
- }
- else {
- gsl_matrix *UtXgamma=
- gsl_matrix_alloc (ni_test, cHyp_old.n_gamma);
- gsl_vector *beta=
- gsl_vector_alloc (cHyp_old.n_gamma);
- SetXgamma (UtXgamma, UtX, rank_old);
- logPost_old=
- CalcPosterior(UtXgamma, Utz, K_eval,
- UtXb_old, Utu_old, alpha_old,
- beta, cHyp_old);
-
- beta_old.clear();
- for (size_t i=0; i<beta->size; ++i) {
- beta_old.push_back(gsl_vector_get(beta, i));
- }
- gsl_matrix_free (UtXgamma);
- gsl_vector_free (beta);
- }
- }
-
- // M-H steps.
- for (size_t i=0; i<n_mh; ++i) {
- if (gsl_rng_uniform(gsl_r)<0.33) {
- repeat = 1+gsl_rng_uniform_int(gsl_r, 20);
- }
- else {
- repeat=1;
- }
-
- logMHratio=0.0;
- logMHratio+=ProposeHnRho(cHyp_old, cHyp_new, repeat);
- logMHratio+=ProposeGamma (rank_old, rank_new, p_gamma,
- cHyp_old, cHyp_new, repeat);
- logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat);
-
- if (cHyp_new.n_gamma==0 || cHyp_new.rho==0) {
- logPost_new=CalcPosterior(Utz, K_eval, Utu_new,
- alpha_new, cHyp_new);
- beta_new.clear();
- for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
- beta_new.push_back(0);
- }
- }
- else {
- gsl_matrix *UtXgamma=
- gsl_matrix_alloc (ni_test, cHyp_new.n_gamma);
- gsl_vector *beta=
- gsl_vector_alloc (cHyp_new.n_gamma);
- SetXgamma (UtXgamma, UtX, rank_new);
- logPost_new=
- CalcPosterior(UtXgamma, Utz, K_eval,
- UtXb_new, Utu_new, alpha_new,
- beta, cHyp_new);
- beta_new.clear();
- for (size_t i=0; i<beta->size; ++i) {
- beta_new.push_back(gsl_vector_get(beta, i));
- }
- gsl_matrix_free (UtXgamma);
- gsl_vector_free (beta);
- }
-
- logMHratio+=logPost_new-logPost_old;
-
- if (logMHratio>0 ||
- log(gsl_rng_uniform(gsl_r))<logMHratio) {
- accept=1; n_accept++;
- }
- else {accept=0;}
-
- if (accept==1) {
- logPost_old=logPost_new;
- rank_old.clear(); beta_old.clear();
- if (rank_new.size()!=0) {
- for (size_t i=0; i<rank_new.size(); ++i) {
- rank_old.push_back(rank_new[i]);
- beta_old.push_back(beta_new[i]);
- }
- }
- cHyp_old=cHyp_new;
- gsl_vector_memcpy (alpha_old, alpha_new);
- gsl_vector_memcpy (UtXb_old, UtXb_new);
- gsl_vector_memcpy (Utu_old, Utu_new);
- }
- else {cHyp_new=cHyp_old;}
- }
-
- // Calculate z_hat, and pve.
- if (a_mode==13) {
- time_start=clock();
- if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
- CalcCC_PVEnZ (U, Utu_old, z_hat, cHyp_old);
- }
- else {
- CalcCC_PVEnZ (U, UtXb_old, Utu_old,
- z_hat, cHyp_old);
- }
-
- // Sample mu and update z_hat.
- gsl_vector_sub (z, z_hat);
- mean_z+=CenterVector(z);
- mean_z+=
- gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test));
- gsl_vector_add_constant (z_hat, mean_z);
-
- time_UtZ+=(clock()-time_start)/
- (double(CLOCKS_PER_SEC)*60.0);
- }
-
- // Save data.
- if (t<w_step) {continue;}
- else {
- if (t%r_pace==0) {
- w_col=w%w_pace;
- if (w_col==0) {
- if (w==0) {
- WriteResult (0, Result_hyp,
- Result_gamma, w_col);
- }
- else {
- WriteResult (1, Result_hyp,
- Result_gamma, w_col);
- gsl_matrix_set_zero (Result_hyp);
- gsl_matrix_set_zero (Result_gamma);
- }
- }
-
- gsl_matrix_set(Result_hyp,w_col,0,cHyp_old.h);
- gsl_matrix_set(Result_hyp,w_col,1,cHyp_old.pve);
- gsl_matrix_set(Result_hyp,w_col,2,cHyp_old.rho);
- gsl_matrix_set(Result_hyp,w_col,3,cHyp_old.pge);
- gsl_matrix_set(Result_hyp,w_col,4,cHyp_old.logp);
- gsl_matrix_set(Result_hyp,w_col,5,cHyp_old.n_gamma);
-
- for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
- pos=mapRank2pos[rank_old[i]]+1;
-
- gsl_matrix_set(Result_gamma,w_col,i,
- pos);
-
- beta_g[pos-1].first+=beta_old[i];
- beta_g[pos-1].second+=1.0;
- }
-
- gsl_vector_add (alpha_prime, alpha_old);
- gsl_vector_add (Utu, Utu_old);
-
- if (a_mode==13) {
- pheno_mean+=mean_z;
- }
-
- w++;
-
- }
-
- }
- }
- cout<<endl;
-
- w_col=w%w_pace;
- WriteResult (1, Result_hyp, Result_gamma, w_col);
-
- gsl_matrix_free(Result_hyp);
- gsl_matrix_free(Result_gamma);
-
- gsl_vector_free(z_hat);
- gsl_vector_free(z);
- gsl_vector_free(Utz);
- gsl_vector_free(UtXb_new);
- gsl_vector_free(UtXb_old);
- gsl_vector_free(alpha_new);
- gsl_vector_free(alpha_old);
- gsl_vector_free(Utu_new);
- gsl_vector_free(Utu_old);
-
- gsl_vector_scale (alpha_prime, 1.0/(double)w);
- gsl_vector_scale (Utu, 1.0/(double)w);
- if (a_mode==13) {
- pheno_mean/=(double)w;
- }
-
- gsl_vector *alpha=gsl_vector_alloc (ns_test);
- gsl_blas_dgemv (CblasTrans, 1.0/(double)ns_test, UtX,
- alpha_prime, 0.0, alpha);
- WriteParam (beta_g, alpha, w);
- gsl_vector_free(alpha);
-
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime);
- WriteBV(alpha_prime);
-
- gsl_vector_free(alpha_prime);
- gsl_vector_free(Utu);
-
- delete [] p_gamma;
- beta_g.clear();
-
- return;
+void BSLMM::MCMC(const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ const gsl_vector *y) {
+ clock_t time_start;
+
+ class HYPBSLMM cHyp_old, cHyp_new;
+
+ gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6);
+ gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max);
+
+ gsl_vector *alpha_prime = gsl_vector_alloc(ni_test);
+ gsl_vector *alpha_new = gsl_vector_alloc(ni_test);
+ gsl_vector *alpha_old = gsl_vector_alloc(ni_test);
+ gsl_vector *Utu = gsl_vector_alloc(ni_test);
+ gsl_vector *Utu_new = gsl_vector_alloc(ni_test);
+ gsl_vector *Utu_old = gsl_vector_alloc(ni_test);
+
+ gsl_vector *UtXb_new = gsl_vector_alloc(ni_test);
+ gsl_vector *UtXb_old = gsl_vector_alloc(ni_test);
+
+ gsl_vector *z_hat = gsl_vector_alloc(ni_test);
+ gsl_vector *z = gsl_vector_alloc(ni_test);
+ gsl_vector *Utz = gsl_vector_alloc(ni_test);
+
+ gsl_vector_memcpy(Utz, Uty);
+
+ double logPost_new, logPost_old;
+ double logMHratio;
+ double mean_z = 0.0;
+
+ gsl_matrix_set_zero(Result_gamma);
+ gsl_vector_set_zero(Utu);
+ gsl_vector_set_zero(alpha_prime);
+ if (a_mode == 13) {
+ pheno_mean = 0.0;
+ }
+
+ vector<pair<double, double>> beta_g;
+ for (size_t i = 0; i < ns_test; i++) {
+ beta_g.push_back(make_pair(0.0, 0.0));
+ }
+
+ vector<size_t> rank_new, rank_old;
+ vector<double> beta_new, beta_old;
+
+ vector<pair<size_t, double>> pos_loglr;
+
+ time_start = clock();
+ MatrixCalcLR(U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr);
+ time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr);
+ for (size_t i = 0; i < ns_test; ++i) {
+ mapRank2pos[i] = pos_loglr[i].first;
+ }
+
+ // Calculate proposal distribution for gamma (unnormalized),
+ // and set up gsl_r and gsl_t.
+ gsl_rng_env_setup();
+ const gsl_rng_type *gslType;
+ gslType = gsl_rng_default;
+ if (randseed < 0) {
+ time_t rawtime;
+ time(&rawtime);
+ tm *ptm = gmtime(&rawtime);
+
+ randseed =
+ (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec);
+ }
+ gsl_r = gsl_rng_alloc(gslType);
+ gsl_rng_set(gsl_r, randseed);
+
+ double *p_gamma = new double[ns_test];
+ CalcPgamma(p_gamma);
+
+ gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma);
+
+ // Initial parameters.
+ InitialMCMC(UtX, Utz, rank_old, cHyp_old, pos_loglr);
+
+ cHyp_initial = cHyp_old;
+
+ if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
+ logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
+
+ beta_old.clear();
+ for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
+ beta_old.push_back(0);
+ }
+ } else {
+ gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma);
+ gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma);
+ SetXgamma(UtXgamma, UtX, rank_old);
+ logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old,
+ alpha_old, beta, cHyp_old);
+
+ beta_old.clear();
+ for (size_t i = 0; i < beta->size; ++i) {
+ beta_old.push_back(gsl_vector_get(beta, i));
+ }
+ gsl_matrix_free(UtXgamma);
+ gsl_vector_free(beta);
+ }
+
+ // Calculate centered z_hat, and pve.
+ if (a_mode == 13) {
+ time_start = clock();
+ if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
+ CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old);
+ } else {
+ CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old);
+ }
+ time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ }
+
+ // Start MCMC.
+ int accept;
+ size_t total_step = w_step + s_step;
+ size_t w = 0, w_col, pos;
+ size_t repeat = 0;
+
+ for (size_t t = 0; t < total_step; ++t) {
+ if (t % d_pace == 0 || t == total_step - 1) {
+ ProgressBar("Running MCMC ", t, total_step - 1,
+ (double)n_accept / (double)(t * n_mh + 1));
+ }
+
+ if (a_mode == 13) {
+ SampleZ(y, z_hat, z);
+ mean_z = CenterVector(z);
+
+ time_start = clock();
+ gsl_blas_dgemv(CblasTrans, 1.0, U, z, 0.0, Utz);
+ time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // First proposal.
+ if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
+ logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
+ beta_old.clear();
+ for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
+ beta_old.push_back(0);
+ }
+ } else {
+ gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma);
+ gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma);
+ SetXgamma(UtXgamma, UtX, rank_old);
+ logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old,
+ alpha_old, beta, cHyp_old);
+
+ beta_old.clear();
+ for (size_t i = 0; i < beta->size; ++i) {
+ beta_old.push_back(gsl_vector_get(beta, i));
+ }
+ gsl_matrix_free(UtXgamma);
+ gsl_vector_free(beta);
+ }
+ }
+
+ // M-H steps.
+ for (size_t i = 0; i < n_mh; ++i) {
+ if (gsl_rng_uniform(gsl_r) < 0.33) {
+ repeat = 1 + gsl_rng_uniform_int(gsl_r, 20);
+ } else {
+ repeat = 1;
+ }
+
+ logMHratio = 0.0;
+ logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat);
+ logMHratio +=
+ ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat);
+ logMHratio += ProposePi(cHyp_old, cHyp_new, repeat);
+
+ if (cHyp_new.n_gamma == 0 || cHyp_new.rho == 0) {
+ logPost_new = CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new);
+ beta_new.clear();
+ for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
+ beta_new.push_back(0);
+ }
+ } else {
+ gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_new.n_gamma);
+ gsl_vector *beta = gsl_vector_alloc(cHyp_new.n_gamma);
+ SetXgamma(UtXgamma, UtX, rank_new);
+ logPost_new = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_new, Utu_new,
+ alpha_new, beta, cHyp_new);
+ beta_new.clear();
+ for (size_t i = 0; i < beta->size; ++i) {
+ beta_new.push_back(gsl_vector_get(beta, i));
+ }
+ gsl_matrix_free(UtXgamma);
+ gsl_vector_free(beta);
+ }
+
+ logMHratio += logPost_new - logPost_old;
+
+ if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) {
+ accept = 1;
+ n_accept++;
+ } else {
+ accept = 0;
+ }
+
+ if (accept == 1) {
+ logPost_old = logPost_new;
+ rank_old.clear();
+ beta_old.clear();
+ if (rank_new.size() != 0) {
+ for (size_t i = 0; i < rank_new.size(); ++i) {
+ rank_old.push_back(rank_new[i]);
+ beta_old.push_back(beta_new[i]);
+ }
+ }
+ cHyp_old = cHyp_new;
+ gsl_vector_memcpy(alpha_old, alpha_new);
+ gsl_vector_memcpy(UtXb_old, UtXb_new);
+ gsl_vector_memcpy(Utu_old, Utu_new);
+ } else {
+ cHyp_new = cHyp_old;
+ }
+ }
+
+ // Calculate z_hat, and pve.
+ if (a_mode == 13) {
+ time_start = clock();
+ if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
+ CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old);
+ } else {
+ CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old);
+ }
+
+ // Sample mu and update z_hat.
+ gsl_vector_sub(z, z_hat);
+ mean_z += CenterVector(z);
+ mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test));
+ gsl_vector_add_constant(z_hat, mean_z);
+
+ time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ }
+
+ // Save data.
+ if (t < w_step) {
+ continue;
+ } else {
+ if (t % r_pace == 0) {
+ w_col = w % w_pace;
+ if (w_col == 0) {
+ if (w == 0) {
+ WriteResult(0, Result_hyp, Result_gamma, w_col);
+ } else {
+ WriteResult(1, Result_hyp, Result_gamma, w_col);
+ gsl_matrix_set_zero(Result_hyp);
+ gsl_matrix_set_zero(Result_gamma);
+ }
+ }
+
+ gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h);
+ gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve);
+ gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho);
+ gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge);
+ gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp);
+ gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma);
+
+ for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
+ pos = mapRank2pos[rank_old[i]] + 1;
+
+ gsl_matrix_set(Result_gamma, w_col, i, pos);
+
+ beta_g[pos - 1].first += beta_old[i];
+ beta_g[pos - 1].second += 1.0;
+ }
+
+ gsl_vector_add(alpha_prime, alpha_old);
+ gsl_vector_add(Utu, Utu_old);
+
+ if (a_mode == 13) {
+ pheno_mean += mean_z;
+ }
+
+ w++;
+ }
+ }
+ }
+ cout << endl;
+
+ w_col = w % w_pace;
+ WriteResult(1, Result_hyp, Result_gamma, w_col);
+
+ gsl_matrix_free(Result_hyp);
+ gsl_matrix_free(Result_gamma);
+
+ gsl_vector_free(z_hat);
+ gsl_vector_free(z);
+ gsl_vector_free(Utz);
+ gsl_vector_free(UtXb_new);
+ gsl_vector_free(UtXb_old);
+ gsl_vector_free(alpha_new);
+ gsl_vector_free(alpha_old);
+ gsl_vector_free(Utu_new);
+ gsl_vector_free(Utu_old);
+
+ gsl_vector_scale(alpha_prime, 1.0 / (double)w);
+ gsl_vector_scale(Utu, 1.0 / (double)w);
+ if (a_mode == 13) {
+ pheno_mean /= (double)w;
+ }
+
+ gsl_vector *alpha = gsl_vector_alloc(ns_test);
+ gsl_blas_dgemv(CblasTrans, 1.0 / (double)ns_test, UtX, alpha_prime, 0.0,
+ alpha);
+ WriteParam(beta_g, alpha, w);
+ gsl_vector_free(alpha);
+
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime);
+ WriteBV(alpha_prime);
+
+ gsl_vector_free(alpha_prime);
+ gsl_vector_free(Utu);
+
+ delete[] p_gamma;
+ beta_g.clear();
+
+ return;
}
void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *eval,
- const double lambda) {
- gsl_vector *beta=gsl_vector_alloc (UtX->size2);
- gsl_vector *H_eval=gsl_vector_alloc (Uty->size);
- gsl_vector *bv=gsl_vector_alloc (Uty->size);
+ const gsl_vector *Uty, const gsl_vector *eval,
+ const double lambda) {
+ gsl_vector *beta = gsl_vector_alloc(UtX->size2);
+ gsl_vector *H_eval = gsl_vector_alloc(Uty->size);
+ gsl_vector *bv = gsl_vector_alloc(Uty->size);
- gsl_vector_memcpy (H_eval, eval);
- gsl_vector_scale (H_eval, lambda);
- gsl_vector_add_constant (H_eval, 1.0);
+ gsl_vector_memcpy(H_eval, eval);
+ gsl_vector_scale(H_eval, lambda);
+ gsl_vector_add_constant(H_eval, 1.0);
- gsl_vector_memcpy (bv, Uty);
- gsl_vector_div (bv, H_eval);
+ gsl_vector_memcpy(bv, Uty);
+ gsl_vector_div(bv, H_eval);
- gsl_blas_dgemv (CblasTrans, lambda/(double)UtX->size2,
- UtX, bv, 0.0, beta);
- gsl_vector_add_constant (H_eval, -1.0);
- gsl_vector_mul (H_eval, bv);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, H_eval, 0.0, bv);
+ gsl_blas_dgemv(CblasTrans, lambda / (double)UtX->size2, UtX, bv, 0.0, beta);
+ gsl_vector_add_constant(H_eval, -1.0);
+ gsl_vector_mul(H_eval, bv);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, H_eval, 0.0, bv);
- WriteParam (beta);
- WriteBV(bv);
+ WriteParam(beta);
+ WriteBV(bv);
- gsl_vector_free (H_eval);
- gsl_vector_free (beta);
- gsl_vector_free (bv);
+ gsl_vector_free(H_eval);
+ gsl_vector_free(beta);
+ gsl_vector_free(bv);
- return;
+ return;
}
// Below fits MCMC for rho=1.
-void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y,
- const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) {
- time_t time_start=clock();
- gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1,
- s_size);
- gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
- gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size);
-
- lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix,
- &X_sub.matrix, 0.0, &XtX_sub.matrix);
+void BSLMM::CalcXtX(const gsl_matrix *X, const gsl_vector *y,
+ const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) {
+ time_t time_start = clock();
+ gsl_matrix_const_view X_sub =
+ gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size);
+ gsl_matrix_view XtX_sub = gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
+ gsl_vector_view Xty_sub = gsl_vector_subvector(Xty, 0, s_size);
+
+ lapack_dgemm((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0,
+ &XtX_sub.matrix);
gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector);
- time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
return;
}
-void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
- const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
- const gsl_vector *y, const vector<size_t> &rank_old,
- const vector<size_t> &rank_new, gsl_matrix *X_new,
- gsl_matrix *XtX_new, gsl_vector *Xty_new) {
+void BSLMM::SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old,
+ const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
+ const gsl_vector *y, const vector<size_t> &rank_old,
+ const vector<size_t> &rank_new, gsl_matrix *X_new,
+ gsl_matrix *XtX_new, gsl_vector *Xty_new) {
double d;
// rank_old and rank_new are sorted already inside PorposeGamma
// calculate vectors rank_remove and rank_add.
// make sure that v_size is larger than repeat.
- size_t v_size=20;
+ size_t v_size = 20;
vector<size_t> rank_remove(v_size), rank_add(v_size),
- rank_union(s_max+v_size);
+ rank_union(s_max + v_size);
vector<size_t>::iterator it;
- it=set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(),
- rank_new.end(), rank_remove.begin());
- rank_remove.resize(it-rank_remove.begin());
+ it = set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(),
+ rank_new.end(), rank_remove.begin());
+ rank_remove.resize(it - rank_remove.begin());
- it=set_difference (rank_new.begin(), rank_new.end(), rank_old.begin(),
- rank_old.end(), rank_add.begin());
- rank_add.resize(it-rank_add.begin());
+ it = set_difference(rank_new.begin(), rank_new.end(), rank_old.begin(),
+ rank_old.end(), rank_add.begin());
+ rank_add.resize(it - rank_add.begin());
- it=set_union (rank_new.begin(), rank_new.end(), rank_old.begin(),
- rank_old.end(), rank_union.begin());
- rank_union.resize(it-rank_union.begin());
+ it = set_union(rank_new.begin(), rank_new.end(), rank_old.begin(),
+ rank_old.end(), rank_union.begin());
+ rank_union.resize(it - rank_union.begin());
// Map rank_remove and rank_add.
map<size_t, int> mapRank2in_remove, mapRank2in_add;
- for (size_t i=0; i<rank_remove.size(); i++) {
- mapRank2in_remove[rank_remove[i]]=1;
+ for (size_t i = 0; i < rank_remove.size(); i++) {
+ mapRank2in_remove[rank_remove[i]] = 1;
}
- for (size_t i=0; i<rank_add.size(); i++) {
- mapRank2in_add[rank_add[i]]=1;
+ for (size_t i = 0; i < rank_add.size(); i++) {
+ mapRank2in_add[rank_add[i]] = 1;
}
// Obtain the subset of matrix/vector.
- gsl_matrix_const_view Xold_sub=
- gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size());
- gsl_matrix_const_view XtXold_sub=
- gsl_matrix_const_submatrix(XtX_old, 0, 0, rank_old.size(),
- rank_old.size());
- gsl_vector_const_view Xtyold_sub=
- gsl_vector_const_subvector(Xty_old, 0, rank_old.size());
-
- gsl_matrix_view Xnew_sub=
- gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size());
- gsl_matrix_view XtXnew_sub=
- gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size());
- gsl_vector_view Xtynew_sub=
- gsl_vector_subvector(Xty_new, 0, rank_new.size());
+ gsl_matrix_const_view Xold_sub =
+ gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size());
+ gsl_matrix_const_view XtXold_sub = gsl_matrix_const_submatrix(
+ XtX_old, 0, 0, rank_old.size(), rank_old.size());
+ gsl_vector_const_view Xtyold_sub =
+ gsl_vector_const_subvector(Xty_old, 0, rank_old.size());
+
+ gsl_matrix_view Xnew_sub =
+ gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size());
+ gsl_matrix_view XtXnew_sub =
+ gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size());
+ gsl_vector_view Xtynew_sub =
+ gsl_vector_subvector(Xty_new, 0, rank_new.size());
// Get X_new and calculate XtX_new.
- if (rank_remove.size()==0 && rank_add.size()==0) {
+ if (rank_remove.size() == 0 && rank_add.size() == 0) {
gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix);
gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix);
gsl_vector_memcpy(&Xtynew_sub.vector, &Xtyold_sub.vector);
} else {
size_t i_old, j_old, i_new, j_new, i_add, j_add, i_flag, j_flag;
- if (rank_add.size()==0) {
- i_old=0; i_new=0;
- for (size_t i=0; i<rank_union.size(); i++) {
- if (mapRank2in_remove.count(rank_old[i_old])!=0) {i_old++; continue;}
+ if (rank_add.size() == 0) {
+ i_old = 0;
+ i_new = 0;
+ for (size_t i = 0; i < rank_union.size(); i++) {
+ if (mapRank2in_remove.count(rank_old[i_old]) != 0) {
+ i_old++;
+ continue;
+ }
- gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
- gsl_vector_const_view Xcopy_col=gsl_matrix_const_column(X_old, i_old);
- gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
+ gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new);
+ gsl_vector_const_view Xcopy_col = gsl_matrix_const_column(X_old, i_old);
+ gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
- d=gsl_vector_get (Xty_old, i_old);
- gsl_vector_set (Xty_new, i_new, d);
+ d = gsl_vector_get(Xty_old, i_old);
+ gsl_vector_set(Xty_new, i_new, d);
- j_old=i_old; j_new=i_new;
- for (size_t j=i; j<rank_union.size(); j++) {
- if (mapRank2in_remove.count(rank_old[j_old])!=0) {j_old++; continue;}
+ j_old = i_old;
+ j_new = i_new;
+ for (size_t j = i; j < rank_union.size(); j++) {
+ if (mapRank2in_remove.count(rank_old[j_old]) != 0) {
+ j_old++;
+ continue;
+ }
- d=gsl_matrix_get(XtX_old, i_old, j_old);
+ d = gsl_matrix_get(XtX_old, i_old, j_old);
- gsl_matrix_set (XtX_new, i_new, j_new, d);
- if (i_new!=j_new) {gsl_matrix_set (XtX_new, j_new, i_new, d);}
+ gsl_matrix_set(XtX_new, i_new, j_new, d);
+ if (i_new != j_new) {
+ gsl_matrix_set(XtX_new, j_new, i_new, d);
+ }
- j_old++; j_new++;
+ j_old++;
+ j_new++;
}
- i_old++; i_new++;
+ i_old++;
+ i_new++;
}
} else {
- gsl_matrix *X_add=gsl_matrix_alloc(X_old->size1, rank_add.size() );
- gsl_matrix *XtX_aa=gsl_matrix_alloc(X_add->size2, X_add->size2);
- gsl_matrix *XtX_ao=gsl_matrix_alloc(X_add->size2, X_old->size2);
- gsl_vector *Xty_add=gsl_vector_alloc(X_add->size2);
+ gsl_matrix *X_add = gsl_matrix_alloc(X_old->size1, rank_add.size());
+ gsl_matrix *XtX_aa = gsl_matrix_alloc(X_add->size2, X_add->size2);
+ gsl_matrix *XtX_ao = gsl_matrix_alloc(X_add->size2, X_old->size2);
+ gsl_vector *Xty_add = gsl_vector_alloc(X_add->size2);
// Get X_add.
- SetXgamma (X_add, X, rank_add);
+ SetXgamma(X_add, X, rank_add);
// Get t(X_add)X_add and t(X_add)X_temp.
- clock_t time_start=clock();
+ clock_t time_start = clock();
// Somehow the lapack_dgemm does not work here.
- gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add,
- 0.0, XtX_aa);
- gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_old,
- 0.0, XtX_ao);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_old, 0.0, XtX_ao);
gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add);
- time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Save to X_new, XtX_new and Xty_new.
- i_old=0; i_new=0; i_add=0;
- for (size_t i=0; i<rank_union.size(); i++) {
- if (mapRank2in_remove.count(rank_old[i_old])!=0) {
- i_old++;
- continue;
- }
- if (mapRank2in_add.count(rank_new[i_new])!=0) {
- i_flag=1;
- } else {
- i_flag=0;
- }
-
- gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
- if (i_flag==1) {
- gsl_vector_view Xcopy_col=gsl_matrix_column(X_add, i_add);
- gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
- } else {
- gsl_vector_const_view Xcopy_col=
- gsl_matrix_const_column(X_old, i_old);
- gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
- }
-
- if (i_flag==1) {
- d=gsl_vector_get (Xty_add, i_add);
+ i_old = 0;
+ i_new = 0;
+ i_add = 0;
+ for (size_t i = 0; i < rank_union.size(); i++) {
+ if (mapRank2in_remove.count(rank_old[i_old]) != 0) {
+ i_old++;
+ continue;
+ }
+ if (mapRank2in_add.count(rank_new[i_new]) != 0) {
+ i_flag = 1;
+ } else {
+ i_flag = 0;
+ }
+
+ gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new);
+ if (i_flag == 1) {
+ gsl_vector_view Xcopy_col = gsl_matrix_column(X_add, i_add);
+ gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
+ } else {
+ gsl_vector_const_view Xcopy_col =
+ gsl_matrix_const_column(X_old, i_old);
+ gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
+ }
+
+ if (i_flag == 1) {
+ d = gsl_vector_get(Xty_add, i_add);
} else {
- d=gsl_vector_get (Xty_old, i_old);
+ d = gsl_vector_get(Xty_old, i_old);
+ }
+ gsl_vector_set(Xty_new, i_new, d);
+
+ j_old = i_old;
+ j_new = i_new;
+ j_add = i_add;
+ for (size_t j = i; j < rank_union.size(); j++) {
+ if (mapRank2in_remove.count(rank_old[j_old]) != 0) {
+ j_old++;
+ continue;
+ }
+ if (mapRank2in_add.count(rank_new[j_new]) != 0) {
+ j_flag = 1;
+ } else {
+ j_flag = 0;
+ }
+
+ if (i_flag == 1 && j_flag == 1) {
+ d = gsl_matrix_get(XtX_aa, i_add, j_add);
+ } else if (i_flag == 1) {
+ d = gsl_matrix_get(XtX_ao, i_add, j_old);
+ } else if (j_flag == 1) {
+ d = gsl_matrix_get(XtX_ao, j_add, i_old);
+ } else {
+ d = gsl_matrix_get(XtX_old, i_old, j_old);
+ }
+
+ gsl_matrix_set(XtX_new, i_new, j_new, d);
+ if (i_new != j_new) {
+ gsl_matrix_set(XtX_new, j_new, i_new, d);
+ }
+
+ j_new++;
+ if (j_flag == 1) {
+ j_add++;
+ } else {
+ j_old++;
+ }
}
- gsl_vector_set (Xty_new, i_new, d);
-
- j_old=i_old; j_new=i_new; j_add=i_add;
- for (size_t j=i; j<rank_union.size(); j++) {
- if (mapRank2in_remove.count(rank_old[j_old])!=0) {
- j_old++;
- continue;
- }
- if (mapRank2in_add.count(rank_new[j_new])!=0) {
- j_flag=1;
- } else {
- j_flag=0;
- }
-
- if (i_flag==1 && j_flag==1) {
- d=gsl_matrix_get(XtX_aa, i_add, j_add);
- } else if (i_flag==1) {
- d=gsl_matrix_get(XtX_ao, i_add, j_old);
- } else if (j_flag==1) {
- d=gsl_matrix_get(XtX_ao, j_add, i_old);
- } else {
- d=gsl_matrix_get(XtX_old, i_old, j_old);
- }
-
- gsl_matrix_set (XtX_new, i_new, j_new, d);
- if (i_new!=j_new) {gsl_matrix_set (XtX_new, j_new, i_new, d);}
-
- j_new++; if (j_flag==1) {j_add++;} else {j_old++;}
+ i_new++;
+ if (i_flag == 1) {
+ i_add++;
+ } else {
+ i_old++;
}
- i_new++; if (i_flag==1) {i_add++;} else {i_old++;}
}
gsl_matrix_free(X_add);
@@ -1377,7 +1445,6 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
gsl_matrix_free(XtX_ao);
gsl_vector_free(Xty_add);
}
-
}
rank_remove.clear();
@@ -1389,462 +1456,442 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
return;
}
-double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) {
- double logpost=0.0;
+double BSLMM::CalcPosterior(const double yty, class HYPBSLMM &cHyp) {
+ double logpost = 0.0;
- // For quantitative traits, calculate pve and pge.
- // Pve and pge for case/control data are calculted in CalcCC_PVEnZ.
- if (a_mode==11) {
- cHyp.pve=0.0;
- cHyp.pge=1.0;
- }
+ // For quantitative traits, calculate pve and pge.
+ // Pve and pge for case/control data are calculted in CalcCC_PVEnZ.
+ if (a_mode == 11) {
+ cHyp.pve = 0.0;
+ cHyp.pge = 1.0;
+ }
- // Calculate likelihood.
- if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);}
- else {logpost-=0.5*yty;}
+ // Calculate likelihood.
+ if (a_mode == 11) {
+ logpost -= 0.5 * (double)ni_test * log(yty);
+ } else {
+ logpost -= 0.5 * yty;
+ }
- logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
- ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
+ logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+ ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp));
- return logpost;
+ return logpost;
}
-double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
- const gsl_vector *Xty, const double yty,
- const size_t s_size, gsl_vector *Xb,
- gsl_vector *beta, class HYPBSLMM &cHyp) {
- double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
- double logpost=0.0;
- double d, P_yy=yty, logdet_O=0.0;
-
- gsl_matrix_const_view Xgamma_sub=
- gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size);
- gsl_matrix_const_view XtX_sub=
- gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size);
- gsl_vector_const_view Xty_sub=
- gsl_vector_const_subvector (Xty, 0, s_size);
-
- gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size);
- gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size);
- gsl_vector *beta_hat=gsl_vector_alloc (s_size);
- gsl_vector *Xty_temp=gsl_vector_alloc (s_size);
-
- gsl_vector_memcpy (Xty_temp, &Xty_sub.vector);
-
- // Calculate Omega.
- gsl_matrix_memcpy (Omega, &XtX_sub.matrix);
- gsl_matrix_scale (Omega, sigma_a2);
- gsl_matrix_set_identity (M_temp);
- gsl_matrix_add (Omega, M_temp);
-
- // Calculate beta_hat.
- logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
- gsl_vector_scale (beta_hat, sigma_a2);
-
- gsl_blas_ddot (Xty_temp, beta_hat, &d);
- P_yy-=d;
-
- // Sample tau.
- double tau=1.0;
- if (a_mode==11) {
- tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy);
- }
-
- // Sample beta.
- for (size_t i=0; i<s_size; i++)
- {
- d=gsl_ran_gaussian(gsl_r, 1);
- gsl_vector_set(beta, i, d);
- }
- gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size);
- gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega,
- &beta_sub.vector);
-
- // This computes inv(L^T(Omega)) %*% beta.
- gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau));
- gsl_vector_add(&beta_sub.vector, beta_hat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix,
- &beta_sub.vector, 0.0, Xb);
-
- // For quantitative traits, calculate pve and pge.
- if (a_mode==11) {
- gsl_blas_ddot (Xb, Xb, &d);
- cHyp.pve=d/(double)ni_test;
- cHyp.pve/=cHyp.pve+1.0/tau;
- cHyp.pge=1.0;
- }
-
- logpost=-0.5*logdet_O;
- if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
- else {logpost-=0.5*P_yy;}
-
- logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
- ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
-
- gsl_matrix_free (Omega);
- gsl_matrix_free (M_temp);
- gsl_vector_free (beta_hat);
- gsl_vector_free (Xty_temp);
-
- return logpost;
+double BSLMM::CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX,
+ const gsl_vector *Xty, const double yty,
+ const size_t s_size, gsl_vector *Xb,
+ gsl_vector *beta, class HYPBSLMM &cHyp) {
+ double sigma_a2 = cHyp.h / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
+ double logpost = 0.0;
+ double d, P_yy = yty, logdet_O = 0.0;
+
+ gsl_matrix_const_view Xgamma_sub =
+ gsl_matrix_const_submatrix(Xgamma, 0, 0, Xgamma->size1, s_size);
+ gsl_matrix_const_view XtX_sub =
+ gsl_matrix_const_submatrix(XtX, 0, 0, s_size, s_size);
+ gsl_vector_const_view Xty_sub = gsl_vector_const_subvector(Xty, 0, s_size);
+
+ gsl_matrix *Omega = gsl_matrix_alloc(s_size, s_size);
+ gsl_matrix *M_temp = gsl_matrix_alloc(s_size, s_size);
+ gsl_vector *beta_hat = gsl_vector_alloc(s_size);
+ gsl_vector *Xty_temp = gsl_vector_alloc(s_size);
+
+ gsl_vector_memcpy(Xty_temp, &Xty_sub.vector);
+
+ // Calculate Omega.
+ gsl_matrix_memcpy(Omega, &XtX_sub.matrix);
+ gsl_matrix_scale(Omega, sigma_a2);
+ gsl_matrix_set_identity(M_temp);
+ gsl_matrix_add(Omega, M_temp);
+
+ // Calculate beta_hat.
+ logdet_O = CholeskySolve(Omega, Xty_temp, beta_hat);
+ gsl_vector_scale(beta_hat, sigma_a2);
+
+ gsl_blas_ddot(Xty_temp, beta_hat, &d);
+ P_yy -= d;
+
+ // Sample tau.
+ double tau = 1.0;
+ if (a_mode == 11) {
+ tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy);
+ }
+
+ // Sample beta.
+ for (size_t i = 0; i < s_size; i++) {
+ d = gsl_ran_gaussian(gsl_r, 1);
+ gsl_vector_set(beta, i, d);
+ }
+ gsl_vector_view beta_sub = gsl_vector_subvector(beta, 0, s_size);
+ gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega,
+ &beta_sub.vector);
+
+ // This computes inv(L^T(Omega)) %*% beta.
+ gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2 / tau));
+ gsl_vector_add(&beta_sub.vector, beta_hat);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0,
+ Xb);
+
+ // For quantitative traits, calculate pve and pge.
+ if (a_mode == 11) {
+ gsl_blas_ddot(Xb, Xb, &d);
+ cHyp.pve = d / (double)ni_test;
+ cHyp.pve /= cHyp.pve + 1.0 / tau;
+ cHyp.pge = 1.0;
+ }
+
+ logpost = -0.5 * logdet_O;
+ if (a_mode == 11) {
+ logpost -= 0.5 * (double)ni_test * log(P_yy);
+ } else {
+ logpost -= 0.5 * P_yy;
+ }
+
+ logpost +=
+ ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+ ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
+
+ gsl_matrix_free(Omega);
+ gsl_matrix_free(M_temp);
+ gsl_vector_free(beta_hat);
+ gsl_vector_free(Xty_temp);
+
+ return logpost;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp)
-{
+void BSLMM::CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp) {
gsl_vector_set_zero(z_hat);
- cHyp.pve=0.0;
- cHyp.pge=1.0;
+ cHyp.pve = 0.0;
+ cHyp.pge = 1.0;
return;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat,
- class HYPBSLMM &cHyp) {
- double d;
+void BSLMM::CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat,
+ class HYPBSLMM &cHyp) {
+ double d;
- gsl_blas_ddot (Xb, Xb, &d);
- cHyp.pve=d/(double)ni_test;
- cHyp.pve/=cHyp.pve+1.0;
- cHyp.pge=1.0;
+ gsl_blas_ddot(Xb, Xb, &d);
+ cHyp.pve = d / (double)ni_test;
+ cHyp.pve /= cHyp.pve + 1.0;
+ cHyp.pge = 1.0;
- gsl_vector_memcpy (z_hat, Xb);
+ gsl_vector_memcpy(z_hat, Xb);
- return;
+ return;
}
// If a_mode==13, then run probit model.
-void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
- clock_t time_start;
- double time_set=0, time_post=0;
-
- class HYPBSLMM cHyp_old, cHyp_new;
-
- gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6);
- gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
-
- gsl_vector *Xb_new=gsl_vector_alloc (ni_test);
- gsl_vector *Xb_old=gsl_vector_alloc (ni_test);
- gsl_vector *z_hat=gsl_vector_alloc (ni_test);
- gsl_vector *z=gsl_vector_alloc (ni_test);
-
- gsl_matrix *Xgamma_old=gsl_matrix_alloc (ni_test, s_max);
- gsl_matrix *XtX_old=gsl_matrix_alloc (s_max, s_max);
- gsl_vector *Xtz_old=gsl_vector_alloc (s_max);
- gsl_vector *beta_old=gsl_vector_alloc (s_max);
-
- gsl_matrix *Xgamma_new=gsl_matrix_alloc (ni_test, s_max);
- gsl_matrix *XtX_new=gsl_matrix_alloc (s_max, s_max);
- gsl_vector *Xtz_new=gsl_vector_alloc (s_max);
- gsl_vector *beta_new=gsl_vector_alloc (s_max);
-
- double ztz=0.0;
- gsl_vector_memcpy (z, y);
-
- // For quantitative traits, y is centered already in
- // gemma.cpp, but just in case.
- double mean_z=CenterVector (z);
- gsl_blas_ddot(z, z, &ztz);
-
- double logPost_new, logPost_old;
- double logMHratio;
-
- gsl_matrix_set_zero (Result_gamma);
- if (a_mode==13) {
- pheno_mean=0.0;
- }
-
- vector<pair<double, double> > beta_g;
- for (size_t i=0; i<ns_test; i++) {
- beta_g.push_back(make_pair(0.0, 0.0));
- }
-
- vector<size_t> rank_new, rank_old;
- vector<pair<size_t, double> > pos_loglr;
-
- time_start=clock();
- MatrixCalcLmLR (X, z, pos_loglr);
- time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- stable_sort (pos_loglr.begin(), pos_loglr.end(), comp_lr);
- for (size_t i=0; i<ns_test; ++i) {
- mapRank2pos[i]=pos_loglr[i].first;
- }
-
- // Calculate proposal distribution for gamma (unnormalized),
- // and set up gsl_r and gsl_t.
- gsl_rng_env_setup();
- const gsl_rng_type * gslType;
- gslType = gsl_rng_default;
- if (randseed<0)
- {
- time_t rawtime;
- time (&rawtime);
- tm * ptm = gmtime (&rawtime);
-
- randseed = (unsigned) (ptm->tm_hour%24*3600+
- ptm->tm_min*60+ptm->tm_sec);
- }
- gsl_r = gsl_rng_alloc(gslType);
- gsl_rng_set(gsl_r, randseed);
-
- double *p_gamma = new double[ns_test];
- CalcPgamma (p_gamma);
-
- gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma);
-
- // Initial parameters.
- InitialMCMC (X, z, rank_old, cHyp_old, pos_loglr);
-
- cHyp_initial=cHyp_old;
-
- if (cHyp_old.n_gamma==0) {
- logPost_old=CalcPosterior (ztz, cHyp_old);
- }
- else {
- SetXgamma (Xgamma_old, X, rank_old);
- CalcXtX (Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old);
- logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz,
- rank_old.size(), Xb_old, beta_old,
- cHyp_old);
- }
-
- // Calculate centered z_hat, and pve.
- if (a_mode==13) {
- if (cHyp_old.n_gamma==0) {
- CalcCC_PVEnZ (z_hat, cHyp_old);
- }
- else {
- CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old);
- }
- }
-
- // Start MCMC.
- int accept;
- size_t total_step=w_step+s_step;
- size_t w=0, w_col, pos;
- size_t repeat=0;
-
- for (size_t t=0; t<total_step; ++t) {
- if (t%d_pace==0 || t==total_step-1) {
- ProgressBar ("Running MCMC ", t, total_step-1,
- (double)n_accept/(double)(t*n_mh+1));
- }
-
- if (a_mode==13) {
- SampleZ (y, z_hat, z);
- mean_z=CenterVector (z);
- gsl_blas_ddot(z,z,&ztz);
-
- // First proposal.
- if (cHyp_old.n_gamma==0) {
- logPost_old=CalcPosterior (ztz, cHyp_old);
- } else {
- gsl_matrix_view Xold_sub=
- gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test,
- rank_old.size());
- gsl_vector_view Xtz_sub=
- gsl_vector_subvector(Xtz_old, 0, rank_old.size());
- gsl_blas_dgemv (CblasTrans, 1.0, &Xold_sub.matrix,
- z, 0.0, &Xtz_sub.vector);
- logPost_old=
- CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz,
- rank_old.size(), Xb_old, beta_old,
- cHyp_old);
- }
- }
-
- // M-H steps.
- for (size_t i=0; i<n_mh; ++i) {
- if (gsl_rng_uniform(gsl_r)<0.33) {
- repeat = 1+gsl_rng_uniform_int(gsl_r, 20);
- }
- else {repeat=1;}
-
- logMHratio=0.0;
- logMHratio+=
- ProposeHnRho(cHyp_old, cHyp_new, repeat);
- logMHratio+=
- ProposeGamma (rank_old, rank_new, p_gamma,
- cHyp_old, cHyp_new, repeat);
- logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat);
-
- if (cHyp_new.n_gamma==0) {
- logPost_new=CalcPosterior (ztz, cHyp_new);
- } else {
-
- // This makes sure that rank_old.size() ==
- // rank_remove.size() does not happen.
- if (cHyp_new.n_gamma<=20 || cHyp_old.n_gamma<=20) {
- time_start=clock();
- SetXgamma (Xgamma_new, X, rank_new);
- CalcXtX (Xgamma_new, z, rank_new.size(),
- XtX_new, Xtz_new);
- time_set+=(clock()-time_start)/
- (double(CLOCKS_PER_SEC)*60.0);
- } else {
- time_start=clock();
- SetXgamma (X, Xgamma_old, XtX_old, Xtz_old, z,
- rank_old, rank_new, Xgamma_new,
- XtX_new, Xtz_new);
- time_set+=(clock()-time_start)/
- (double(CLOCKS_PER_SEC)*60.0);
- }
- time_start=clock();
- logPost_new=
- CalcPosterior (Xgamma_new, XtX_new, Xtz_new, ztz,
- rank_new.size(), Xb_new, beta_new,
- cHyp_new);
- time_post+=(clock()-time_start)/
- (double(CLOCKS_PER_SEC)*60.0);
- }
- logMHratio+=logPost_new-logPost_old;
-
- if (logMHratio>0 ||
- log(gsl_rng_uniform(gsl_r))<logMHratio) {
- accept=1;
- n_accept++;
- }
- else {accept=0;}
-
- if (accept==1) {
- logPost_old=logPost_new;
- cHyp_old=cHyp_new;
- gsl_vector_memcpy (Xb_old, Xb_new);
-
- rank_old.clear();
- if (rank_new.size()!=0) {
- for (size_t i=0;
- i<rank_new.size();
- ++i) {
- rank_old.push_back(rank_new[i]);
- }
-
- gsl_matrix_view Xold_sub=gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size());
- gsl_matrix_view XtXold_sub=gsl_matrix_submatrix(XtX_old, 0, 0, rank_new.size(), rank_new.size());
- gsl_vector_view Xtzold_sub=gsl_vector_subvector(Xtz_old, 0, rank_new.size());
- gsl_vector_view betaold_sub=gsl_vector_subvector(beta_old, 0, rank_new.size());
-
- gsl_matrix_view Xnew_sub=gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size());
- gsl_matrix_view XtXnew_sub=gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size());
- gsl_vector_view Xtznew_sub=gsl_vector_subvector(Xtz_new, 0, rank_new.size());
- gsl_vector_view betanew_sub=gsl_vector_subvector(beta_new, 0, rank_new.size());
-
- gsl_matrix_memcpy(&Xold_sub.matrix,
- &Xnew_sub.matrix);
- gsl_matrix_memcpy(&XtXold_sub.matrix,
- &XtXnew_sub.matrix);
- gsl_vector_memcpy(&Xtzold_sub.vector,
- &Xtznew_sub.vector);
- gsl_vector_memcpy(&betaold_sub.vector,
- &betanew_sub.vector);
- }
- } else {
- cHyp_new=cHyp_old;
- }
-
- }
-
- // Calculate z_hat, and pve.
- if (a_mode==13) {
- if (cHyp_old.n_gamma==0) {
- CalcCC_PVEnZ (z_hat, cHyp_old);
- }
- else {
- CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old);
- }
-
- // Sample mu and update z_hat.
- gsl_vector_sub (z, z_hat);
- mean_z+=CenterVector(z);
- mean_z+=gsl_ran_gaussian(gsl_r,
- sqrt(1.0/(double) ni_test));
-
- gsl_vector_add_constant (z_hat, mean_z);
- }
-
- // Save data.
- if (t<w_step) {continue;}
- else {
- if (t%r_pace==0) {
- w_col=w%w_pace;
- if (w_col==0) {
- if (w==0) {
- WriteResult(0,Result_hyp,
- Result_gamma,w_col);
- }
- else {
- WriteResult(1,Result_hyp,
- Result_gamma,w_col);
- gsl_matrix_set_zero (Result_hyp);
- gsl_matrix_set_zero (Result_gamma);
- }
- }
-
- gsl_matrix_set(Result_hyp,w_col,0,
- cHyp_old.h);
- gsl_matrix_set(Result_hyp,w_col,1,
- cHyp_old.pve);
- gsl_matrix_set(Result_hyp,w_col,2,
- cHyp_old.rho);
- gsl_matrix_set(Result_hyp,w_col,3,
- cHyp_old.pge);
- gsl_matrix_set(Result_hyp,w_col,4,
- cHyp_old.logp);
- gsl_matrix_set(Result_hyp,w_col,5,
- cHyp_old.n_gamma);
-
- for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
- pos=mapRank2pos[rank_old[i]]+1;
- gsl_matrix_set(Result_gamma,w_col,
- i,pos);
-
- beta_g[pos-1].first+=
- gsl_vector_get(beta_old, i);
- beta_g[pos-1].second+=1.0;
- }
-
- if (a_mode==13) {
- pheno_mean+=mean_z;
- }
-
- w++;
- }
- }
- }
- cout<<endl;
-
- cout<<"time on selecting Xgamma: "<<time_set<<endl;
- cout<<"time on calculating posterior: "<<time_post<<endl;
-
- w_col=w%w_pace;
- WriteResult (1, Result_hyp, Result_gamma, w_col);
-
- gsl_vector *alpha=gsl_vector_alloc (ns_test);
- gsl_vector_set_zero (alpha);
- WriteParam (beta_g, alpha, w);
- gsl_vector_free(alpha);
-
- gsl_matrix_free(Result_hyp);
- gsl_matrix_free(Result_gamma);
-
- gsl_vector_free(z_hat);
- gsl_vector_free(z);
- gsl_vector_free(Xb_new);
- gsl_vector_free(Xb_old);
-
- gsl_matrix_free(Xgamma_old);
- gsl_matrix_free(XtX_old);
- gsl_vector_free(Xtz_old);
- gsl_vector_free(beta_old);
-
- gsl_matrix_free(Xgamma_new);
- gsl_matrix_free(XtX_new);
- gsl_vector_free(Xtz_new);
- gsl_vector_free(beta_new);
-
- delete [] p_gamma;
- beta_g.clear();
-
- return;
+void BSLMM::MCMC(const gsl_matrix *X, const gsl_vector *y) {
+ clock_t time_start;
+ double time_set = 0, time_post = 0;
+
+ class HYPBSLMM cHyp_old, cHyp_new;
+
+ gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6);
+ gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max);
+
+ gsl_vector *Xb_new = gsl_vector_alloc(ni_test);
+ gsl_vector *Xb_old = gsl_vector_alloc(ni_test);
+ gsl_vector *z_hat = gsl_vector_alloc(ni_test);
+ gsl_vector *z = gsl_vector_alloc(ni_test);
+
+ gsl_matrix *Xgamma_old = gsl_matrix_alloc(ni_test, s_max);
+ gsl_matrix *XtX_old = gsl_matrix_alloc(s_max, s_max);
+ gsl_vector *Xtz_old = gsl_vector_alloc(s_max);
+ gsl_vector *beta_old = gsl_vector_alloc(s_max);
+
+ gsl_matrix *Xgamma_new = gsl_matrix_alloc(ni_test, s_max);
+ gsl_matrix *XtX_new = gsl_matrix_alloc(s_max, s_max);
+ gsl_vector *Xtz_new = gsl_vector_alloc(s_max);
+ gsl_vector *beta_new = gsl_vector_alloc(s_max);
+
+ double ztz = 0.0;
+ gsl_vector_memcpy(z, y);
+
+ // For quantitative traits, y is centered already in
+ // gemma.cpp, but just in case.
+ double mean_z = CenterVector(z);
+ gsl_blas_ddot(z, z, &ztz);
+
+ double logPost_new, logPost_old;
+ double logMHratio;
+
+ gsl_matrix_set_zero(Result_gamma);
+ if (a_mode == 13) {
+ pheno_mean = 0.0;
+ }
+
+ vector<pair<double, double>> beta_g;
+ for (size_t i = 0; i < ns_test; i++) {
+ beta_g.push_back(make_pair(0.0, 0.0));
+ }
+
+ vector<size_t> rank_new, rank_old;
+ vector<pair<size_t, double>> pos_loglr;
+
+ time_start = clock();
+ MatrixCalcLmLR(X, z, pos_loglr);
+ time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr);
+ for (size_t i = 0; i < ns_test; ++i) {
+ mapRank2pos[i] = pos_loglr[i].first;
+ }
+
+ // Calculate proposal distribution for gamma (unnormalized),
+ // and set up gsl_r and gsl_t.
+ gsl_rng_env_setup();
+ const gsl_rng_type *gslType;
+ gslType = gsl_rng_default;
+ if (randseed < 0) {
+ time_t rawtime;
+ time(&rawtime);
+ tm *ptm = gmtime(&rawtime);
+
+ randseed =
+ (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec);
+ }
+ gsl_r = gsl_rng_alloc(gslType);
+ gsl_rng_set(gsl_r, randseed);
+
+ double *p_gamma = new double[ns_test];
+ CalcPgamma(p_gamma);
+
+ gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma);
+
+ // Initial parameters.
+ InitialMCMC(X, z, rank_old, cHyp_old, pos_loglr);
+
+ cHyp_initial = cHyp_old;
+
+ if (cHyp_old.n_gamma == 0) {
+ logPost_old = CalcPosterior(ztz, cHyp_old);
+ } else {
+ SetXgamma(Xgamma_old, X, rank_old);
+ CalcXtX(Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old);
+ logPost_old = CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz,
+ rank_old.size(), Xb_old, beta_old, cHyp_old);
+ }
+
+ // Calculate centered z_hat, and pve.
+ if (a_mode == 13) {
+ if (cHyp_old.n_gamma == 0) {
+ CalcCC_PVEnZ(z_hat, cHyp_old);
+ } else {
+ CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old);
+ }
+ }
+
+ // Start MCMC.
+ int accept;
+ size_t total_step = w_step + s_step;
+ size_t w = 0, w_col, pos;
+ size_t repeat = 0;
+
+ for (size_t t = 0; t < total_step; ++t) {
+ if (t % d_pace == 0 || t == total_step - 1) {
+ ProgressBar("Running MCMC ", t, total_step - 1,
+ (double)n_accept / (double)(t * n_mh + 1));
+ }
+
+ if (a_mode == 13) {
+ SampleZ(y, z_hat, z);
+ mean_z = CenterVector(z);
+ gsl_blas_ddot(z, z, &ztz);
+
+ // First proposal.
+ if (cHyp_old.n_gamma == 0) {
+ logPost_old = CalcPosterior(ztz, cHyp_old);
+ } else {
+ gsl_matrix_view Xold_sub =
+ gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size());
+ gsl_vector_view Xtz_sub =
+ gsl_vector_subvector(Xtz_old, 0, rank_old.size());
+ gsl_blas_dgemv(CblasTrans, 1.0, &Xold_sub.matrix, z, 0.0,
+ &Xtz_sub.vector);
+ logPost_old =
+ CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(),
+ Xb_old, beta_old, cHyp_old);
+ }
+ }
+
+ // M-H steps.
+ for (size_t i = 0; i < n_mh; ++i) {
+ if (gsl_rng_uniform(gsl_r) < 0.33) {
+ repeat = 1 + gsl_rng_uniform_int(gsl_r, 20);
+ } else {
+ repeat = 1;
+ }
+
+ logMHratio = 0.0;
+ logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat);
+ logMHratio +=
+ ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat);
+ logMHratio += ProposePi(cHyp_old, cHyp_new, repeat);
+
+ if (cHyp_new.n_gamma == 0) {
+ logPost_new = CalcPosterior(ztz, cHyp_new);
+ } else {
+
+ // This makes sure that rank_old.size() ==
+ // rank_remove.size() does not happen.
+ if (cHyp_new.n_gamma <= 20 || cHyp_old.n_gamma <= 20) {
+ time_start = clock();
+ SetXgamma(Xgamma_new, X, rank_new);
+ CalcXtX(Xgamma_new, z, rank_new.size(), XtX_new, Xtz_new);
+ time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ } else {
+ time_start = clock();
+ SetXgamma(X, Xgamma_old, XtX_old, Xtz_old, z, rank_old, rank_new,
+ Xgamma_new, XtX_new, Xtz_new);
+ time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ }
+ time_start = clock();
+ logPost_new =
+ CalcPosterior(Xgamma_new, XtX_new, Xtz_new, ztz, rank_new.size(),
+ Xb_new, beta_new, cHyp_new);
+ time_post += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ }
+ logMHratio += logPost_new - logPost_old;
+
+ if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) {
+ accept = 1;
+ n_accept++;
+ } else {
+ accept = 0;
+ }
+
+ if (accept == 1) {
+ logPost_old = logPost_new;
+ cHyp_old = cHyp_new;
+ gsl_vector_memcpy(Xb_old, Xb_new);
+
+ rank_old.clear();
+ if (rank_new.size() != 0) {
+ for (size_t i = 0; i < rank_new.size(); ++i) {
+ rank_old.push_back(rank_new[i]);
+ }
+
+ gsl_matrix_view Xold_sub =
+ gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size());
+ gsl_matrix_view XtXold_sub = gsl_matrix_submatrix(
+ XtX_old, 0, 0, rank_new.size(), rank_new.size());
+ gsl_vector_view Xtzold_sub =
+ gsl_vector_subvector(Xtz_old, 0, rank_new.size());
+ gsl_vector_view betaold_sub =
+ gsl_vector_subvector(beta_old, 0, rank_new.size());
+
+ gsl_matrix_view Xnew_sub =
+ gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size());
+ gsl_matrix_view XtXnew_sub = gsl_matrix_submatrix(
+ XtX_new, 0, 0, rank_new.size(), rank_new.size());
+ gsl_vector_view Xtznew_sub =
+ gsl_vector_subvector(Xtz_new, 0, rank_new.size());
+ gsl_vector_view betanew_sub =
+ gsl_vector_subvector(beta_new, 0, rank_new.size());
+
+ gsl_matrix_memcpy(&Xold_sub.matrix, &Xnew_sub.matrix);
+ gsl_matrix_memcpy(&XtXold_sub.matrix, &XtXnew_sub.matrix);
+ gsl_vector_memcpy(&Xtzold_sub.vector, &Xtznew_sub.vector);
+ gsl_vector_memcpy(&betaold_sub.vector, &betanew_sub.vector);
+ }
+ } else {
+ cHyp_new = cHyp_old;
+ }
+ }
+
+ // Calculate z_hat, and pve.
+ if (a_mode == 13) {
+ if (cHyp_old.n_gamma == 0) {
+ CalcCC_PVEnZ(z_hat, cHyp_old);
+ } else {
+ CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old);
+ }
+
+ // Sample mu and update z_hat.
+ gsl_vector_sub(z, z_hat);
+ mean_z += CenterVector(z);
+ mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test));
+
+ gsl_vector_add_constant(z_hat, mean_z);
+ }
+
+ // Save data.
+ if (t < w_step) {
+ continue;
+ } else {
+ if (t % r_pace == 0) {
+ w_col = w % w_pace;
+ if (w_col == 0) {
+ if (w == 0) {
+ WriteResult(0, Result_hyp, Result_gamma, w_col);
+ } else {
+ WriteResult(1, Result_hyp, Result_gamma, w_col);
+ gsl_matrix_set_zero(Result_hyp);
+ gsl_matrix_set_zero(Result_gamma);
+ }
+ }
+
+ gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h);
+ gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve);
+ gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho);
+ gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge);
+ gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp);
+ gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma);
+
+ for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
+ pos = mapRank2pos[rank_old[i]] + 1;
+ gsl_matrix_set(Result_gamma, w_col, i, pos);
+
+ beta_g[pos - 1].first += gsl_vector_get(beta_old, i);
+ beta_g[pos - 1].second += 1.0;
+ }
+
+ if (a_mode == 13) {
+ pheno_mean += mean_z;
+ }
+
+ w++;
+ }
+ }
+ }
+ cout << endl;
+
+ cout << "time on selecting Xgamma: " << time_set << endl;
+ cout << "time on calculating posterior: " << time_post << endl;
+
+ w_col = w % w_pace;
+ WriteResult(1, Result_hyp, Result_gamma, w_col);
+
+ gsl_vector *alpha = gsl_vector_alloc(ns_test);
+ gsl_vector_set_zero(alpha);
+ WriteParam(beta_g, alpha, w);
+ gsl_vector_free(alpha);
+
+ gsl_matrix_free(Result_hyp);
+ gsl_matrix_free(Result_gamma);
+
+ gsl_vector_free(z_hat);
+ gsl_vector_free(z);
+ gsl_vector_free(Xb_new);
+ gsl_vector_free(Xb_old);
+
+ gsl_matrix_free(Xgamma_old);
+ gsl_matrix_free(XtX_old);
+ gsl_vector_free(Xtz_old);
+ gsl_vector_free(beta_old);
+
+ gsl_matrix_free(Xgamma_new);
+ gsl_matrix_free(XtX_new);
+ gsl_vector_free(Xtz_new);
+ gsl_vector_free(beta_new);
+
+ delete[] p_gamma;
+ beta_g.clear();
+
+ return;
}
diff --git a/src/bslmm.h b/src/bslmm.h
index c7768a2..d2dadbf 100644
--- a/src/bslmm.h
+++ b/src/bslmm.h
@@ -19,10 +19,10 @@
#ifndef __BSLMM_H__
#define __BSLMM_H__
-#include <vector>
-#include <map>
-#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
+#include <gsl/gsl_rng.h>
+#include <map>
+#include <vector>
#include "param.h"
@@ -31,149 +31,139 @@ using namespace std;
class BSLMM {
public:
- // IO-related parameters.
- int a_mode;
- size_t d_pace;
-
- string file_bfile;
- string file_geno;
- string file_out;
- string path_out;
-
- // LMM-related parameters.
- double l_min;
- double l_max;
- size_t n_region;
- double pve_null;
- double pheno_mean;
-
- // BSLMM MCMC-related parameters
- double h_min, h_max, h_scale; // Priors for h.
- double rho_min, rho_max, rho_scale; // Priors for rho.
- double logp_min, logp_max, logp_scale; // Priors for log(pi).
- size_t s_min, s_max; // Min. & max. number of gammas.
- size_t w_step; // Number of warm up/burn in
- // iterations.
- size_t s_step; // Num. sampling iterations.
- size_t r_pace; // Record pace.
- size_t w_pace; // Write pace.
- size_t n_accept; // Number of acceptances.
- size_t n_mh; // Number of MH steps per iter.
- double geo_mean; // Mean of geometric dist.
- long int randseed;
- double trace_G;
-
- HYPBSLMM cHyp_initial;
-
- // Summary statistics.
- size_t ni_total, ns_total; // Number of total individuals and SNPs
- size_t ni_test, ns_test; // Num. individuals & SNPs used in analysis.
- size_t n_cvt; // Number of covariates.
- double time_UtZ;
- double time_Omega; // Time spent on optimization iterations.
-
- // Time spent on constructing the proposal distribution for
- // gamma (i.e. lmm or lm analysis).
- double time_Proposal;
-
- // Indicator for individuals (phenotypes): 0 missing, 1
- // available for analysis.
- vector<int> indicator_idv;
-
- // Sequence indicator for SNPs: 0 ignored because of (a) maf,
- // (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
-
- // Record SNP information.
- vector<SNPINFO> snpInfo;
-
- // Not included in PARAM.
- gsl_rng *gsl_r;
- gsl_ran_discrete_t *gsl_t;
- map<size_t, size_t> mapRank2pos;
-
- // Main functions.
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
-
- void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *eval,
- const double lambda);
-
- void MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const gsl_vector *y);
- void WriteLog ();
- void WriteLR ();
- void WriteBV (const gsl_vector *bv);
- void WriteParam (vector<pair<double, double> > &beta_g,
- const gsl_vector *alpha, const size_t w);
- void WriteParam (const gsl_vector *alpha);
- void WriteResult (const int flag, const gsl_matrix *Result_hyp,
- const gsl_matrix *Result_gamma, const size_t w_col);
-
- // Subfunctions inside MCMC.
- void CalcPgamma (double *p_gammar);
-
- double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
- const double sigma_a2);
- void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
- vector<size_t> &rank_old, class HYPBSLMM &cHyp,
- vector<pair<size_t, double> > &pos_loglr);
- double CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval,
- gsl_vector *Utu, gsl_vector *alpha_prime,
- class HYPBSLMM &cHyp);
- double CalcPosterior (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- gsl_vector *UtXb, gsl_vector *Utu,
- gsl_vector *alpha_prime, gsl_vector *beta,
- class HYPBSLMM &cHyp);
- void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu,
- gsl_vector *z_hat, class HYPBSLMM &cHyp);
- void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb,
- const gsl_vector *Utu, gsl_vector *z_hat,
- class HYPBSLMM &cHyp);
- double CalcREMLE (const gsl_matrix *Utw, const gsl_vector *Uty,
- const gsl_vector *K_eval);
-
- // Calculate the maximum marginal likelihood ratio for each
- // analyzed SNPs with gemma, use it to rank SNPs.
- double CalcLR (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- vector<pair<size_t, double> > &loglr_sort);
- void SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
- gsl_vector *z);
- double ProposeHnRho (const class HYPBSLMM &cHyp_old,
- class HYPBSLMM &cHyp_new, const size_t &repeat);
- double ProposePi (const class HYPBSLMM &cHyp_old,
- class HYPBSLMM &cHyp_new,
- const size_t &repeat);
- double ProposeGamma (const vector<size_t> &rank_old,
- vector<size_t> &rank_new, const double *p_gamma,
- const class HYPBSLMM &cHyp_old,
- class HYPBSLMM &cHyp_new, const size_t &repeat);
- void SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X,
- vector<size_t> &rank);
-
- void CalcXtX (const gsl_matrix *X_new, const gsl_vector *y,
- const size_t s_size, gsl_matrix *XtX_new,
- gsl_vector *Xty_new);
- void SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
- const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
- const gsl_vector *y, const vector<size_t> &rank_old,
- const vector<size_t> &rank_new, gsl_matrix *X_new,
- gsl_matrix *XtX_new, gsl_vector *Xty_new);
- double CalcPosterior (const double yty, class HYPBSLMM &cHyp);
- double CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
- const gsl_vector *Xty, const double yty,
- const size_t s_size, gsl_vector *Xb,
- gsl_vector *beta, class HYPBSLMM &cHyp);
- void CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp);
- void CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat,
- class HYPBSLMM &cHyp);
- void MCMC (const gsl_matrix *X, const gsl_vector *y);
+ // IO-related parameters.
+ int a_mode;
+ size_t d_pace;
+
+ string file_bfile;
+ string file_geno;
+ string file_out;
+ string path_out;
+
+ // LMM-related parameters.
+ double l_min;
+ double l_max;
+ size_t n_region;
+ double pve_null;
+ double pheno_mean;
+
+ // BSLMM MCMC-related parameters
+ double h_min, h_max, h_scale; // Priors for h.
+ double rho_min, rho_max, rho_scale; // Priors for rho.
+ double logp_min, logp_max, logp_scale; // Priors for log(pi).
+ size_t s_min, s_max; // Min. & max. number of gammas.
+ size_t w_step; // Number of warm up/burn in
+ // iterations.
+ size_t s_step; // Num. sampling iterations.
+ size_t r_pace; // Record pace.
+ size_t w_pace; // Write pace.
+ size_t n_accept; // Number of acceptances.
+ size_t n_mh; // Number of MH steps per iter.
+ double geo_mean; // Mean of geometric dist.
+ long int randseed;
+ double trace_G;
+
+ HYPBSLMM cHyp_initial;
+
+ // Summary statistics.
+ size_t ni_total, ns_total; // Number of total individuals and SNPs
+ size_t ni_test, ns_test; // Num. individuals & SNPs used in analysis.
+ size_t n_cvt; // Number of covariates.
+ double time_UtZ;
+ double time_Omega; // Time spent on optimization iterations.
+
+ // Time spent on constructing the proposal distribution for
+ // gamma (i.e. lmm or lm analysis).
+ double time_Proposal;
+
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_idv;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
+
+ // Record SNP information.
+ vector<SNPINFO> snpInfo;
+
+ // Not included in PARAM.
+ gsl_rng *gsl_r;
+ gsl_ran_discrete_t *gsl_t;
+ map<size_t, size_t> mapRank2pos;
+
+ // Main functions.
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
+
+ void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty,
+ const gsl_vector *eval, const double lambda);
+
+ void MCMC(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty,
+ const gsl_vector *K_eval, const gsl_vector *y);
+ void WriteLog();
+ void WriteLR();
+ void WriteBV(const gsl_vector *bv);
+ void WriteParam(vector<pair<double, double>> &beta_g, const gsl_vector *alpha,
+ const size_t w);
+ void WriteParam(const gsl_vector *alpha);
+ void WriteResult(const int flag, const gsl_matrix *Result_hyp,
+ const gsl_matrix *Result_gamma, const size_t w_col);
+
+ // Subfunctions inside MCMC.
+ void CalcPgamma(double *p_gammar);
+
+ double CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+ const double sigma_a2);
+ void InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty,
+ vector<size_t> &rank_old, class HYPBSLMM &cHyp,
+ vector<pair<size_t, double>> &pos_loglr);
+ double CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval,
+ gsl_vector *Utu, gsl_vector *alpha_prime,
+ class HYPBSLMM &cHyp);
+ double CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+ const gsl_vector *K_eval, gsl_vector *UtXb,
+ gsl_vector *Utu, gsl_vector *alpha_prime,
+ gsl_vector *beta, class HYPBSLMM &cHyp);
+ void CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu,
+ gsl_vector *z_hat, class HYPBSLMM &cHyp);
+ void CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb,
+ const gsl_vector *Utu, gsl_vector *z_hat,
+ class HYPBSLMM &cHyp);
+ double CalcREMLE(const gsl_matrix *Utw, const gsl_vector *Uty,
+ const gsl_vector *K_eval);
+
+ // Calculate the maximum marginal likelihood ratio for each
+ // analyzed SNPs with gemma, use it to rank SNPs.
+ double CalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ vector<pair<size_t, double>> &loglr_sort);
+ void SampleZ(const gsl_vector *y, const gsl_vector *z_hat, gsl_vector *z);
+ double ProposeHnRho(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new,
+ const size_t &repeat);
+ double ProposePi(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new,
+ const size_t &repeat);
+ double ProposeGamma(const vector<size_t> &rank_old, vector<size_t> &rank_new,
+ const double *p_gamma, const class HYPBSLMM &cHyp_old,
+ class HYPBSLMM &cHyp_new, const size_t &repeat);
+ void SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank);
+
+ void CalcXtX(const gsl_matrix *X_new, const gsl_vector *y,
+ const size_t s_size, gsl_matrix *XtX_new, gsl_vector *Xty_new);
+ void SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old,
+ const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
+ const gsl_vector *y, const vector<size_t> &rank_old,
+ const vector<size_t> &rank_new, gsl_matrix *X_new,
+ gsl_matrix *XtX_new, gsl_vector *Xty_new);
+ double CalcPosterior(const double yty, class HYPBSLMM &cHyp);
+ double CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX,
+ const gsl_vector *Xty, const double yty,
+ const size_t s_size, gsl_vector *Xb, gsl_vector *beta,
+ class HYPBSLMM &cHyp);
+ void CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp);
+ void CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat,
+ class HYPBSLMM &cHyp);
+ void MCMC(const gsl_matrix *X, const gsl_vector *y);
};
#endif
-
-
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
index e1a53a6..7aac1d4 100644
--- a/src/bslmmdap.cpp
+++ b/src/bslmmdap.cpp
@@ -16,89 +16,97 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <sstream>
-#include <iomanip>
+#include <algorithm>
#include <cmath>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
-#include <ctime>
-#include <cstring>
-#include <algorithm>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
#include "gsl/gsl_randist.h"
-#include "gsl/gsl_cdf.h"
#include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
-#include "logistic.h"
-#include "lapack.h"
-#include "io.h"
-#include "param.h"
#include "bslmmdap.h"
-#include "lmm.h"
+#include "io.h"
+#include "lapack.h"
#include "lm.h"
+#include "lmm.h"
+#include "logistic.h"
#include "mathfunc.h"
+#include "param.h"
using namespace std;
-void BSLMMDAP::CopyFromParam (PARAM &cPar) {
- file_out=cPar.file_out;
- path_out=cPar.path_out;
+void BSLMMDAP::CopyFromParam(PARAM &cPar) {
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
- time_UtZ=0.0;
- time_Omega=0.0;
+ time_UtZ = 0.0;
+ time_Omega = 0.0;
- h_min=cPar.h_min;
- h_max=cPar.h_max;
- h_ngrid=cPar.h_ngrid;
- rho_min=cPar.rho_min;
- rho_max=cPar.rho_max;
- rho_ngrid=cPar.rho_ngrid;
+ h_min = cPar.h_min;
+ h_max = cPar.h_max;
+ h_ngrid = cPar.h_ngrid;
+ rho_min = cPar.rho_min;
+ rho_max = cPar.rho_max;
+ rho_ngrid = cPar.rho_ngrid;
- if (h_min<=0) {h_min=0.01;}
- if (h_max>=1) {h_max=0.99;}
- if (rho_min<=0) {rho_min=0.01;}
- if (rho_max>=1) {rho_max=0.99;}
+ if (h_min <= 0) {
+ h_min = 0.01;
+ }
+ if (h_max >= 1) {
+ h_max = 0.99;
+ }
+ if (rho_min <= 0) {
+ rho_min = 0.01;
+ }
+ if (rho_max >= 1) {
+ rho_max = 0.99;
+ }
- trace_G=cPar.trace_G;
+ trace_G = cPar.trace_G;
- ni_total=cPar.ni_total;
- ns_total=cPar.ns_total;
- ni_test=cPar.ni_test;
- ns_test=cPar.ns_test;
+ ni_total = cPar.ni_total;
+ ns_total = cPar.ns_total;
+ ni_test = cPar.ni_test;
+ ns_test = cPar.ns_test;
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
- snpInfo=cPar.snpInfo;
+ indicator_idv = cPar.indicator_idv;
+ indicator_snp = cPar.indicator_snp;
+ snpInfo = cPar.snpInfo;
- return;
+ return;
}
-void BSLMMDAP::CopyToParam (PARAM &cPar) {
- cPar.time_UtZ=time_UtZ;
- cPar.time_Omega=time_Omega;
+void BSLMMDAP::CopyToParam(PARAM &cPar) {
+ cPar.time_UtZ = time_UtZ;
+ cPar.time_Omega = time_Omega;
- return;
+ return;
}
-
-
// Read hyp file.
-void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
- vector<double> &vec_sb2, vector<double> &vec_wab) {
- vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear();
+void ReadFile_hyb(const string &file_hyp, vector<double> &vec_sa2,
+ vector<double> &vec_sb2, vector<double> &vec_wab) {
+ vec_sa2.clear();
+ vec_sb2.clear();
+ vec_wab.clear();
- igzstream infile (file_hyp.c_str(), igzstream::in);
+ igzstream infile(file_hyp.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open hyp file: "<<file_hyp<<endl;
+ cout << "error! fail to open hyp file: " << file_hyp << endl;
return;
}
@@ -108,16 +116,16 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
getline(infile, line);
while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
vec_sa2.push_back(atof(ch_ptr));
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
vec_sb2.push_back(atof(ch_ptr));
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
vec_wab.push_back(atof(ch_ptr));
}
@@ -128,55 +136,59 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
}
// Read bf file.
-void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
- vector<vector<vector<double> > > &BF) {
- BF.clear(); vec_rs.clear();
+void ReadFile_bf(const string &file_bf, vector<string> &vec_rs,
+ vector<vector<vector<double>>> &BF) {
+ BF.clear();
+ vec_rs.clear();
- igzstream infile (file_bf.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open bf file: "<<file_bf<<endl; return;}
+ igzstream infile(file_bf.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open bf file: " << file_bf << endl;
+ return;
+ }
string line, rs, block;
vector<double> vec_bf;
- vector<vector<double> > mat_bf;
+ vector<vector<double>> mat_bf;
char *ch_ptr;
size_t bf_size, flag_block;
getline(infile, line);
- size_t t=0;
+ size_t t = 0;
while (!safeGetline(infile, line).eof()) {
- flag_block=0;
+ flag_block = 0;
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- rs=ch_ptr;
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ rs = ch_ptr;
vec_rs.push_back(rs);
- ch_ptr=strtok (NULL, " , \t");
- if (t==0) {
- block=ch_ptr;
+ ch_ptr = strtok(NULL, " , \t");
+ if (t == 0) {
+ block = ch_ptr;
} else {
- if (strcmp(ch_ptr, block.c_str() )!=0) {
- flag_block=1;
- block=ch_ptr;
+ if (strcmp(ch_ptr, block.c_str()) != 0) {
+ flag_block = 1;
+ block = ch_ptr;
}
}
- ch_ptr=strtok (NULL, " , \t");
- while (ch_ptr!=NULL) {
+ ch_ptr = strtok(NULL, " , \t");
+ while (ch_ptr != NULL) {
vec_bf.push_back(atof(ch_ptr));
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
- if (t==0) {
- bf_size=vec_bf.size();
+ if (t == 0) {
+ bf_size = vec_bf.size();
} else {
- if (bf_size!=vec_bf.size()) {
- cout<<"error! unequal row size in bf file."<<endl;
+ if (bf_size != vec_bf.size()) {
+ cout << "error! unequal row size in bf file." << endl;
}
}
- if (flag_block==0) {
+ if (flag_block == 0) {
mat_bf.push_back(vec_bf);
} else {
BF.push_back(mat_bf);
@@ -193,15 +205,14 @@ void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
return;
}
-
// Read category files.
// Read both continuous and discrete category file, record mapRS2catc.
-void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
- gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
- size_t &kc, size_t &kd) {
- igzstream infile (file_cat.c_str(), igzstream::in);
+void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
+ gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
+ size_t &kc, size_t &kd) {
+ igzstream infile(file_cat.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open category file: "<<file_cat<<endl;
+ cout << "error! fail to open category file: " << file_cat << endl;
return;
}
@@ -213,94 +224,103 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
// Read header.
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader_io(line, header);
// Use the header to determine the number of categories.
- kc=header.catc_col.size(); kd=header.catd_col.size();
+ kc = header.catc_col.size();
+ kd = header.catd_col.size();
- //set up storage and mapper
- map<string, vector<double> > mapRS2catc;
- map<string, vector<int> > mapRS2catd;
+ // set up storage and mapper
+ map<string, vector<double>> mapRS2catc;
+ map<string, vector<int>> mapRS2catd;
vector<double> catc;
vector<int> catd;
// Read the following lines to record mapRS2cat.
while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
- if (header.rs_col==0) {
- rs=chr+":"+pos;
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
}
- catc.clear(); catd.clear();
-
- for (size_t i=0; i<header.coln; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {
- rs=ch_ptr;
- } else if (header.chr_col!=0 && header.chr_col==i+1) {
- chr=ch_ptr;
- } else if (header.pos_col!=0 && header.pos_col==i+1) {
- pos=ch_ptr;
- } else if (header.cm_col!=0 && header.cm_col==i+1) {
- cm=ch_ptr;
- } else if (header.a1_col!=0 && header.a1_col==i+1) {
- a1=ch_ptr;
- } else if (header.a0_col!=0 && header.a0_col==i+1) {
- a0=ch_ptr;
- } else if (header.catc_col.size()!=0 && header.catc_col.count(i+1)!=0 ) {
- catc.push_back(atof(ch_ptr));
- } else if (header.catd_col.size()!=0 && header.catd_col.count(i+1)!=0 ) {
- catd.push_back(atoi(ch_ptr));
- } else {}
-
- ch_ptr=strtok (NULL, " , \t");
+ catc.clear();
+ catd.clear();
+
+ for (size_t i = 0; i < header.coln; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ } else if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ } else if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ } else if (header.cm_col != 0 && header.cm_col == i + 1) {
+ cm = ch_ptr;
+ } else if (header.a1_col != 0 && header.a1_col == i + 1) {
+ a1 = ch_ptr;
+ } else if (header.a0_col != 0 && header.a0_col == i + 1) {
+ a0 = ch_ptr;
+ } else if (header.catc_col.size() != 0 &&
+ header.catc_col.count(i + 1) != 0) {
+ catc.push_back(atof(ch_ptr));
+ } else if (header.catd_col.size() != 0 &&
+ header.catd_col.count(i + 1) != 0) {
+ catd.push_back(atoi(ch_ptr));
+ } else {
+ }
+
+ ch_ptr = strtok(NULL, " , \t");
}
- if (mapRS2catc.count(rs)==0 && kc>0) {mapRS2catc[rs]=catc;}
- if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;}
+ if (mapRS2catc.count(rs) == 0 && kc > 0) {
+ mapRS2catc[rs] = catc;
+ }
+ if (mapRS2catd.count(rs) == 0 && kd > 0) {
+ mapRS2catd[rs] = catd;
+ }
}
// Load into Ad and Ac.
- if (kc>0) {
- Ac=gsl_matrix_alloc(vec_rs.size(), kc);
- for (size_t i=0; i<vec_rs.size(); i++) {
- if (mapRS2catc.count(vec_rs[i])!=0) {
- for (size_t j=0; j<kc; j++) {
- gsl_matrix_set(Ac, i, j, mapRS2catc[vec_rs[i]][j]);
- }
+ if (kc > 0) {
+ Ac = gsl_matrix_alloc(vec_rs.size(), kc);
+ for (size_t i = 0; i < vec_rs.size(); i++) {
+ if (mapRS2catc.count(vec_rs[i]) != 0) {
+ for (size_t j = 0; j < kc; j++) {
+ gsl_matrix_set(Ac, i, j, mapRS2catc[vec_rs[i]][j]);
+ }
} else {
- for (size_t j=0; j<kc; j++) {
- gsl_matrix_set(Ac, i, j, 0);
- }
+ for (size_t j = 0; j < kc; j++) {
+ gsl_matrix_set(Ac, i, j, 0);
+ }
}
}
}
- if (kd>0) {
- Ad=gsl_matrix_int_alloc(vec_rs.size(), kd);
+ if (kd > 0) {
+ Ad = gsl_matrix_int_alloc(vec_rs.size(), kd);
- for (size_t i=0; i<vec_rs.size(); i++) {
- if (mapRS2catd.count(vec_rs[i])!=0) {
- for (size_t j=0; j<kd; j++) {
- gsl_matrix_int_set(Ad, i, j, mapRS2catd[vec_rs[i]][j]);
- }
+ for (size_t i = 0; i < vec_rs.size(); i++) {
+ if (mapRS2catd.count(vec_rs[i]) != 0) {
+ for (size_t j = 0; j < kd; j++) {
+ gsl_matrix_int_set(Ad, i, j, mapRS2catd[vec_rs[i]][j]);
+ }
} else {
- for (size_t j=0; j<kd; j++) {
- gsl_matrix_int_set(Ad, i, j, 0);
- }
+ for (size_t j = 0; j < kd; j++) {
+ gsl_matrix_int_set(Ad, i, j, 0);
+ }
}
}
- dlevel=gsl_vector_int_alloc(kd);
+ dlevel = gsl_vector_int_alloc(kd);
map<int, int> rcd;
int val;
- for (size_t j=0; j<kd; j++) {
+ for (size_t j = 0; j < kd; j++) {
rcd.clear();
- for (size_t i=0; i<Ad->size1; i++) {
- val = gsl_matrix_int_get(Ad, i, j);
- rcd[val] = 1;
+ for (size_t i = 0; i < Ad->size1; i++) {
+ val = gsl_matrix_int_get(Ad, i, j);
+ rcd[val] = 1;
}
- gsl_vector_int_set (dlevel, j, rcd.size());
+ gsl_vector_int_set(dlevel, j, rcd.size());
}
}
@@ -310,509 +330,531 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
return;
}
-void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) {
+void BSLMMDAP::WriteResult(const gsl_matrix *Hyper, const gsl_matrix *BF) {
string file_bf, file_hyp;
- file_bf=path_out+"/"+file_out;
- file_bf+=".bf.txt";
- file_hyp=path_out+"/"+file_out;
- file_hyp+=".hyp.txt";
-
- ofstream outfile_bf, outfile_hyp;
-
- outfile_bf.open (file_bf.c_str(), ofstream::out);
- outfile_hyp.open (file_hyp.c_str(), ofstream::out);
-
- if (!outfile_bf) {
- cout<<"error writing file: "<<file_bf<<endl;
- return;
- }
- if (!outfile_hyp) {
- cout<<"error writing file: "<<file_hyp<<endl;
- return;
- }
-
- outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<
- "weight"<<endl;
- outfile_hyp<<scientific;
- for (size_t i=0; i<Hyper->size1; i++) {
- for (size_t j=0; j<Hyper->size2; j++) {
- outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t";
- }
- outfile_hyp<<endl;
- }
-
- outfile_bf<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss";
- for (size_t i=0; i<BF->size2; i++) {
- outfile_bf<<"\t"<<"BF"<<i+1;
- }
- outfile_bf<<endl;
-
- size_t t=0;
- for (size_t i=0; i<ns_total; ++i) {
- if (indicator_snp[i]==0) {continue;}
-
- outfile_bf<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
- <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss;
-
- outfile_bf<<scientific;
- for (size_t j=0; j<BF->size2; j++) {
- outfile_bf<<"\t"<<setprecision(6)<<gsl_matrix_get (BF, t, j);
- }
- outfile_bf<<endl;
-
- t++;
- }
-
- outfile_hyp.close();
- outfile_hyp.clear();
- outfile_bf.close();
- outfile_bf.clear();
- return;
+ file_bf = path_out + "/" + file_out;
+ file_bf += ".bf.txt";
+ file_hyp = path_out + "/" + file_out;
+ file_hyp += ".hyp.txt";
+
+ ofstream outfile_bf, outfile_hyp;
+
+ outfile_bf.open(file_bf.c_str(), ofstream::out);
+ outfile_hyp.open(file_hyp.c_str(), ofstream::out);
+
+ if (!outfile_bf) {
+ cout << "error writing file: " << file_bf << endl;
+ return;
+ }
+ if (!outfile_hyp) {
+ cout << "error writing file: " << file_hyp << endl;
+ return;
+ }
+
+ outfile_hyp << "h"
+ << "\t"
+ << "rho"
+ << "\t"
+ << "sa2"
+ << "\t"
+ << "sb2"
+ << "\t"
+ << "weight" << endl;
+ outfile_hyp << scientific;
+ for (size_t i = 0; i < Hyper->size1; i++) {
+ for (size_t j = 0; j < Hyper->size2; j++) {
+ outfile_hyp << setprecision(6) << gsl_matrix_get(Hyper, i, j) << "\t";
+ }
+ outfile_hyp << endl;
+ }
+
+ outfile_bf << "chr"
+ << "\t"
+ << "rs"
+ << "\t"
+ << "ps"
+ << "\t"
+ << "n_miss";
+ for (size_t i = 0; i < BF->size2; i++) {
+ outfile_bf << "\t"
+ << "BF" << i + 1;
+ }
+ outfile_bf << endl;
+
+ size_t t = 0;
+ for (size_t i = 0; i < ns_total; ++i) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ outfile_bf << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+ << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss;
+
+ outfile_bf << scientific;
+ for (size_t j = 0; j < BF->size2; j++) {
+ outfile_bf << "\t" << setprecision(6) << gsl_matrix_get(BF, t, j);
+ }
+ outfile_bf << endl;
+
+ t++;
+ }
+
+ outfile_hyp.close();
+ outfile_hyp.clear();
+ outfile_bf.close();
+ outfile_bf.clear();
+ return;
}
-void BSLMMDAP::WriteResult (const vector<string> &vec_rs,
- const gsl_matrix *Hyper, const gsl_vector *pip,
- const gsl_vector *coef) {
+void BSLMMDAP::WriteResult(const vector<string> &vec_rs,
+ const gsl_matrix *Hyper, const gsl_vector *pip,
+ const gsl_vector *coef) {
string file_gamma, file_hyp, file_coef;
- file_gamma=path_out+"/"+file_out;
- file_gamma+=".gamma.txt";
- file_hyp=path_out+"/"+file_out;
- file_hyp+=".hyp.txt";
- file_coef=path_out+"/"+file_out;
- file_coef+=".coef.txt";
-
- ofstream outfile_gamma, outfile_hyp, outfile_coef;
-
- outfile_gamma.open (file_gamma.c_str(), ofstream::out);
- outfile_hyp.open (file_hyp.c_str(), ofstream::out);
- outfile_coef.open (file_coef.c_str(), ofstream::out);
-
- if (!outfile_gamma) {
- cout<<"error writing file: "<<file_gamma<<endl;
- return;
- }
- if (!outfile_hyp) {
- cout<<"error writing file: "<<file_hyp<<endl;
- return;
- }
- if (!outfile_coef) {
- cout<<"error writing file: "<<file_coef<<endl;
- return;
- }
-
- outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<
- "weight"<<endl;
- outfile_hyp<<scientific;
- for (size_t i=0; i<Hyper->size1; i++) {
- for (size_t j=0; j<Hyper->size2; j++) {
- outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t";
- }
- outfile_hyp<<endl;
- }
-
- outfile_gamma<<"rs"<<"\t"<<"gamma"<<endl;
- for (size_t i=0; i<vec_rs.size(); ++i) {
- outfile_gamma<<vec_rs[i]<<"\t"<<scientific<<setprecision(6)<<
- gsl_vector_get(pip, i)<<endl;
- }
-
- outfile_coef<<"coef"<<endl;
- outfile_coef<<scientific;
- for (size_t i=0; i<coef->size; i++) {
- outfile_coef<<setprecision(6)<<gsl_vector_get (coef, i)<<endl;
- }
-
- outfile_coef.close();
- outfile_coef.clear();
- outfile_hyp.close();
- outfile_hyp.clear();
- outfile_gamma.close();
- outfile_gamma.clear();
- return;
-}
+ file_gamma = path_out + "/" + file_out;
+ file_gamma += ".gamma.txt";
+ file_hyp = path_out + "/" + file_out;
+ file_hyp += ".hyp.txt";
+ file_coef = path_out + "/" + file_out;
+ file_coef += ".coef.txt";
+ ofstream outfile_gamma, outfile_hyp, outfile_coef;
-double BSLMMDAP::CalcMarginal (const gsl_vector *Uty,
- const gsl_vector *K_eval,
- const double sigma_b2, const double tau) {
- gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
+ outfile_gamma.open(file_gamma.c_str(), ofstream::out);
+ outfile_hyp.open(file_hyp.c_str(), ofstream::out);
+ outfile_coef.open(file_coef.c_str(), ofstream::out);
- double logm=0.0;
- double d, uy, Hi_yy=0, logdet_H=0.0;
- for (size_t i=0; i<ni_test; ++i) {
- d=gsl_vector_get (K_eval, i)*sigma_b2;
- d=1.0/(d+1.0);
- gsl_vector_set (weight_Hi, i, d);
+ if (!outfile_gamma) {
+ cout << "error writing file: " << file_gamma << endl;
+ return;
+ }
+ if (!outfile_hyp) {
+ cout << "error writing file: " << file_hyp << endl;
+ return;
+ }
+ if (!outfile_coef) {
+ cout << "error writing file: " << file_coef << endl;
+ return;
+ }
- logdet_H-=log(d);
- uy=gsl_vector_get (Uty, i);
- Hi_yy+=d*uy*uy;
- }
+ outfile_hyp << "h"
+ << "\t"
+ << "rho"
+ << "\t"
+ << "sa2"
+ << "\t"
+ << "sb2"
+ << "\t"
+ << "weight" << endl;
+ outfile_hyp << scientific;
+ for (size_t i = 0; i < Hyper->size1; i++) {
+ for (size_t j = 0; j < Hyper->size2; j++) {
+ outfile_hyp << setprecision(6) << gsl_matrix_get(Hyper, i, j) << "\t";
+ }
+ outfile_hyp << endl;
+ }
- // Calculate likelihood.
- logm=-0.5*logdet_H-0.5*tau*Hi_yy+0.5*log(tau)*(double)ni_test;
+ outfile_gamma << "rs"
+ << "\t"
+ << "gamma" << endl;
+ for (size_t i = 0; i < vec_rs.size(); ++i) {
+ outfile_gamma << vec_rs[i] << "\t" << scientific << setprecision(6)
+ << gsl_vector_get(pip, i) << endl;
+ }
- gsl_vector_free (weight_Hi);
+ outfile_coef << "coef" << endl;
+ outfile_coef << scientific;
+ for (size_t i = 0; i < coef->size; i++) {
+ outfile_coef << setprecision(6) << gsl_vector_get(coef, i) << endl;
+ }
- return logm;
+ outfile_coef.close();
+ outfile_coef.clear();
+ outfile_hyp.close();
+ outfile_hyp.clear();
+ outfile_gamma.close();
+ outfile_gamma.clear();
+ return;
}
-double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty,
- const gsl_vector *K_eval,
- const double sigma_a2,
- const double sigma_b2, const double tau) {
- clock_t time_start;
- double logm=0.0;
- double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;
-
- gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
- UtXgamma->size2);
- gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
- gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
- gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
- gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
-
- gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
-
- logdet_H=0.0; P_yy=0.0;
- for (size_t i=0; i<ni_test; ++i) {
- gsl_vector_view UtXgamma_row=gsl_matrix_row(UtXgamma_eval,i);
- d=gsl_vector_get (K_eval, i)*sigma_b2;
- d=1.0/(d+1.0);
- gsl_vector_set (weight_Hi, i, d);
-
- logdet_H-=log(d);
- uy=gsl_vector_get (Uty, i);
- P_yy+=d*uy*uy;
- gsl_vector_scale (&UtXgamma_row.vector, d);
- }
-
- // Calculate Omega.
- gsl_matrix_set_identity (Omega);
-
- time_start=clock();
- lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval,
- UtXgamma, 1.0, Omega);
- time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Calculate beta_hat.
- gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
-
- logdet_O=CholeskySolve(Omega, XtHiy, beta_hat);
-
- gsl_vector_scale (beta_hat, sigma_a2);
-
- gsl_blas_ddot (XtHiy, beta_hat, &d);
- P_yy-=d;
-
- gsl_matrix_free (UtXgamma_eval);
- gsl_matrix_free (Omega);
- gsl_vector_free (XtHiy);
- gsl_vector_free (beta_hat);
- gsl_vector_free (weight_Hi);
-
- logm=-0.5*logdet_H-0.5*logdet_O-0.5*tau*P_yy+0.5*log(tau)*
- (double)ni_test;
-
- return logm;
+double BSLMMDAP::CalcMarginal(const gsl_vector *Uty, const gsl_vector *K_eval,
+ const double sigma_b2, const double tau) {
+ gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size);
+
+ double logm = 0.0;
+ double d, uy, Hi_yy = 0, logdet_H = 0.0;
+ for (size_t i = 0; i < ni_test; ++i) {
+ d = gsl_vector_get(K_eval, i) * sigma_b2;
+ d = 1.0 / (d + 1.0);
+ gsl_vector_set(weight_Hi, i, d);
+
+ logdet_H -= log(d);
+ uy = gsl_vector_get(Uty, i);
+ Hi_yy += d * uy * uy;
+ }
+
+ // Calculate likelihood.
+ logm = -0.5 * logdet_H - 0.5 * tau * Hi_yy + 0.5 * log(tau) * (double)ni_test;
+
+ gsl_vector_free(weight_Hi);
+
+ return logm;
}
-double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) {
- double logprior=0;
- logprior=((double)cHyp.n_gamma-1.0)*cHyp.logp+
- ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
+double BSLMMDAP::CalcMarginal(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+ const gsl_vector *K_eval, const double sigma_a2,
+ const double sigma_b2, const double tau) {
+ clock_t time_start;
+ double logm = 0.0;
+ double d, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0;
+
+ gsl_matrix *UtXgamma_eval =
+ gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2);
+ gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
+ gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2);
+ gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2);
+ gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1);
+
+ gsl_matrix_memcpy(UtXgamma_eval, UtXgamma);
+
+ logdet_H = 0.0;
+ P_yy = 0.0;
+ for (size_t i = 0; i < ni_test; ++i) {
+ gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i);
+ d = gsl_vector_get(K_eval, i) * sigma_b2;
+ d = 1.0 / (d + 1.0);
+ gsl_vector_set(weight_Hi, i, d);
+
+ logdet_H -= log(d);
+ uy = gsl_vector_get(Uty, i);
+ P_yy += d * uy * uy;
+ gsl_vector_scale(&UtXgamma_row.vector, d);
+ }
+
+ // Calculate Omega.
+ gsl_matrix_set_identity(Omega);
+
+ time_start = clock();
+ lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0,
+ Omega);
+ time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Calculate beta_hat.
+ gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
+
+ logdet_O = CholeskySolve(Omega, XtHiy, beta_hat);
+
+ gsl_vector_scale(beta_hat, sigma_a2);
+
+ gsl_blas_ddot(XtHiy, beta_hat, &d);
+ P_yy -= d;
+
+ gsl_matrix_free(UtXgamma_eval);
+ gsl_matrix_free(Omega);
+ gsl_vector_free(XtHiy);
+ gsl_vector_free(beta_hat);
+ gsl_vector_free(weight_Hi);
+
+ logm = -0.5 * logdet_H - 0.5 * logdet_O - 0.5 * tau * P_yy +
+ 0.5 * log(tau) * (double)ni_test;
+
+ return logm;
+}
+
+double BSLMMDAP::CalcPrior(class HYPBSLMM &cHyp) {
+ double logprior = 0;
+ logprior =
+ ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
+ ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
return logprior;
}
// Where A is the ni_test by n_cat matrix of annotations.
-void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const gsl_vector *y) {
- clock_t time_start;
-
- // Set up BF.
- double tau, h, rho, sigma_a2, sigma_b2, d;
- size_t ns_causal=10;
- size_t n_grid=h_ngrid*rho_ngrid;
- vector<double> vec_sa2, vec_sb2, logm_null;
-
- gsl_matrix *BF=gsl_matrix_alloc(ns_test, n_grid);
- gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1);
- gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
-
- // Compute tau by using yty.
- gsl_blas_ddot (Uty, Uty, &tau);
- tau=(double)ni_test/tau;
-
- // Set up grid values for sigma_a2 and sigma_b2 based on an
- // approximately even grid for h and rho, and a fixed number
- // of causals.
- size_t ij=0;
- for (size_t i=0; i<h_ngrid; i++) {
- h=h_min+(h_max-h_min)*(double)i/((double)h_ngrid-1);
- for (size_t j=0; j<rho_ngrid; j++) {
- rho=rho_min+(rho_max-rho_min)*(double)j/((double)rho_ngrid-1);
-
- sigma_a2=h*rho/((1-h)*(double)ns_causal);
- sigma_b2=h*(1.0-rho)/(trace_G*(1-h));
-
- vec_sa2.push_back(sigma_a2);
- vec_sb2.push_back(sigma_b2);
- logm_null.push_back(CalcMarginal (Uty, K_eval, 0.0, tau));
-
- gsl_matrix_set (Hyper, ij, 0, h);
- gsl_matrix_set (Hyper, ij, 1, rho);
- gsl_matrix_set (Hyper, ij, 2, sigma_a2);
- gsl_matrix_set (Hyper, ij, 3, sigma_b2);
- gsl_matrix_set (Hyper, ij, 4, 1/(double)n_grid);
- ij++;
- }
- }
-
- // Compute BF factors.
- time_start=clock();
- cout<<"Calculating BF..."<<endl;
- for (size_t t=0; t<ns_test; t++) {
- gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, 0);
- gsl_vector_const_view X_col=gsl_matrix_const_column (UtX, t);
- gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);
-
- for (size_t ij=0; ij<n_grid; ij++) {
- sigma_a2=vec_sa2[ij];
- sigma_b2=vec_sb2[ij];
-
- d=CalcMarginal (Xgamma, Uty, K_eval, sigma_a2, sigma_b2, tau);
- d-=logm_null[ij];
- d=exp(d);
-
- gsl_matrix_set(BF, t, ij, d);
- }
- }
- time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Save results.
- WriteResult (Hyper, BF);
-
- // Free matrices and vectors.
- gsl_matrix_free(BF);
- gsl_matrix_free(Xgamma);
- gsl_matrix_free(Hyper);
- return;
+void BSLMMDAP::DAP_CalcBF(const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ const gsl_vector *y) {
+ clock_t time_start;
+
+ // Set up BF.
+ double tau, h, rho, sigma_a2, sigma_b2, d;
+ size_t ns_causal = 10;
+ size_t n_grid = h_ngrid * rho_ngrid;
+ vector<double> vec_sa2, vec_sb2, logm_null;
+
+ gsl_matrix *BF = gsl_matrix_alloc(ns_test, n_grid);
+ gsl_matrix *Xgamma = gsl_matrix_alloc(ni_test, 1);
+ gsl_matrix *Hyper = gsl_matrix_alloc(n_grid, 5);
+
+ // Compute tau by using yty.
+ gsl_blas_ddot(Uty, Uty, &tau);
+ tau = (double)ni_test / tau;
+
+ // Set up grid values for sigma_a2 and sigma_b2 based on an
+ // approximately even grid for h and rho, and a fixed number
+ // of causals.
+ size_t ij = 0;
+ for (size_t i = 0; i < h_ngrid; i++) {
+ h = h_min + (h_max - h_min) * (double)i / ((double)h_ngrid - 1);
+ for (size_t j = 0; j < rho_ngrid; j++) {
+ rho = rho_min + (rho_max - rho_min) * (double)j / ((double)rho_ngrid - 1);
+
+ sigma_a2 = h * rho / ((1 - h) * (double)ns_causal);
+ sigma_b2 = h * (1.0 - rho) / (trace_G * (1 - h));
+
+ vec_sa2.push_back(sigma_a2);
+ vec_sb2.push_back(sigma_b2);
+ logm_null.push_back(CalcMarginal(Uty, K_eval, 0.0, tau));
+
+ gsl_matrix_set(Hyper, ij, 0, h);
+ gsl_matrix_set(Hyper, ij, 1, rho);
+ gsl_matrix_set(Hyper, ij, 2, sigma_a2);
+ gsl_matrix_set(Hyper, ij, 3, sigma_b2);
+ gsl_matrix_set(Hyper, ij, 4, 1 / (double)n_grid);
+ ij++;
+ }
+ }
+
+ // Compute BF factors.
+ time_start = clock();
+ cout << "Calculating BF..." << endl;
+ for (size_t t = 0; t < ns_test; t++) {
+ gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, 0);
+ gsl_vector_const_view X_col = gsl_matrix_const_column(UtX, t);
+ gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector);
+
+ for (size_t ij = 0; ij < n_grid; ij++) {
+ sigma_a2 = vec_sa2[ij];
+ sigma_b2 = vec_sb2[ij];
+
+ d = CalcMarginal(Xgamma, Uty, K_eval, sigma_a2, sigma_b2, tau);
+ d -= logm_null[ij];
+ d = exp(d);
+
+ gsl_matrix_set(BF, t, ij, d);
+ }
+ }
+ time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Save results.
+ WriteResult(Hyper, BF);
+
+ // Free matrices and vectors.
+ gsl_matrix_free(BF);
+ gsl_matrix_free(Xgamma);
+ gsl_matrix_free(Hyper);
+ return;
}
void single_ct_regression(const gsl_matrix_int *Xd,
- const gsl_vector_int *dlevel,
- const gsl_vector *pip_vec,
- gsl_vector *coef, gsl_vector *prior_vec) {
+ const gsl_vector_int *dlevel,
+ const gsl_vector *pip_vec, gsl_vector *coef,
+ gsl_vector *prior_vec) {
- map<int,double> sum_pip;
- map<int,double> sum;
+ map<int, double> sum_pip;
+ map<int, double> sum;
- int levels = gsl_vector_int_get(dlevel,0);
+ int levels = gsl_vector_int_get(dlevel, 0);
- for(int i=0;i<levels;i++){
+ for (int i = 0; i < levels; i++) {
sum_pip[i] = sum[i] = 0;
}
- for(int i=0;i<Xd->size1;i++){
- int cat = gsl_matrix_int_get(Xd,i,0);
- sum_pip[cat] += gsl_vector_get(pip_vec,i);
+ for (int i = 0; i < Xd->size1; i++) {
+ int cat = gsl_matrix_int_get(Xd, i, 0);
+ sum_pip[cat] += gsl_vector_get(pip_vec, i);
sum[cat] += 1;
}
- for(int i=0;i<Xd->size1;i++){
- int cat = gsl_matrix_int_get(Xd,i,0);
- gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]);
+ for (int i = 0; i < Xd->size1; i++) {
+ int cat = gsl_matrix_int_get(Xd, i, 0);
+ gsl_vector_set(prior_vec, i, sum_pip[cat] / sum[cat]);
}
- for(int i=0;i<levels;i++){
- double new_prior = sum_pip[i]/sum[i];
- gsl_vector_set(coef, i, log(new_prior/(1-new_prior)) );
+ for (int i = 0; i < levels; i++) {
+ double new_prior = sum_pip[i] / sum[i];
+ gsl_vector_set(coef, i, log(new_prior / (1 - new_prior)));
}
return;
}
// Where A is the ni_test by n_cat matrix of annotations.
-void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd,
- const vector<string> &vec_rs,
- const vector<double> &vec_sa2,
- const vector<double> &vec_sb2,
- const vector<double> &wab,
- const vector<vector<vector<double> > > &BF,
- gsl_matrix *Ac, gsl_matrix_int *Ad,
- gsl_vector_int *dlevel) {
- clock_t time_start;
-
- // Set up BF.
- double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
- size_t t1, t2;
- size_t n_grid=wab.size(), ns_test=vec_rs.size();
-
- gsl_vector *prior_vec=gsl_vector_alloc(ns_test);
- gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5);
- gsl_vector *pip=gsl_vector_alloc(ns_test);
- gsl_vector *coef=gsl_vector_alloc(kc+kd+1);
-
- // Perform the EM algorithm.
- vector<double> vec_wab, vec_wab_new;
-
- // Initial values.
- for (size_t t=0; t<ns_test; t++) {
- gsl_vector_set (prior_vec, t, (double)BF.size()/(double)ns_test);
- }
- for (size_t ij=0; ij<n_grid; ij++) {
- vec_wab.push_back(wab[ij]);
- vec_wab_new.push_back(wab[ij]);
- }
-
- // EM iteration.
- size_t it=0;
- double dif=1;
- while (it<100 && dif>1e-3) {
-
- // Update E_gamma.
- t1=0, t2=0;
- for (size_t b=0; b<BF.size(); b++) {
- s=1;
- for (size_t m=0; m<BF[b].size(); m++) {
- d=0;
- for (size_t ij=0; ij<n_grid; ij++) {
- d+=vec_wab_new[ij]*BF[b][m][ij];
- }
- d*=gsl_vector_get(prior_vec,t1)/(1-gsl_vector_get(prior_vec,t1));
-
- gsl_vector_set(pip, t1, d);
- s+=d;
- t1++;
- }
-
- for (size_t m=0; m<BF[b].size(); m++) {
- d=gsl_vector_get(pip, t2)/s;
- gsl_vector_set(pip, t2, d);
- t2++;
- }
- }
-
- // Update E_wab.
- s=0;
- for (size_t ij=0; ij<n_grid; ij++) {
- vec_wab_new[ij]=0;
-
- t1=0;
- for (size_t b=0; b<BF.size(); b++) {
- d=1;
- for (size_t m=0; m<BF[b].size(); m++) {
- d+=gsl_vector_get(prior_vec, t1)/
- (1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij];
- t1++;
- }
- vec_wab_new[ij]+=log(d);
- }
-
- s=max(s, vec_wab_new[ij]);
- }
-
- d=0;
- for (size_t ij=0; ij<n_grid; ij++) {
- vec_wab_new[ij]=exp(vec_wab_new[ij]-s);
- d+=vec_wab_new[ij];
- }
-
- for (size_t ij=0; ij<n_grid; ij++) {
- vec_wab_new[ij]/=d;
- }
-
- // Update coef, and pi.
- if(kc==0 && kd==0){
-
- // No annotation.
- s=0;
- for (size_t t=0; t<pip->size; t++) {
- s+=gsl_vector_get(pip, t);
- }
- s=s/(double)pip->size;
- for (size_t t=0; t<pip->size; t++) {
- gsl_vector_set(prior_vec, t, s);
- }
-
- gsl_vector_set (coef, 0, log(s/(1-s)));
- } else if(kc==0 && kd!=0){
-
- // Only discrete annotations.
- if(kd == 1){
- single_ct_regression(Ad, dlevel, pip, coef, prior_vec);
- }else{
- logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0);
- logistic_cat_pred(coef, Ad, dlevel, prior_vec);
- }
- } else if (kc!=0 && kd==0) {
-
- // Only continuous annotations.
- logistic_cont_fit(coef, Ac, pip, 0, 0);
- logistic_cont_pred(coef, Ac, prior_vec);
- } else if (kc!=0 && kd!=0) {
-
- // Both continuous and categorical annotations.
- logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0);
- logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec);
- }
-
- // Compute marginal likelihood.
- logm=0;
-
- t1=0;
- for (size_t b=0; b<BF.size(); b++) {
- d=1; s=0;
- for (size_t m=0; m<BF[b].size(); m++) {
- s+=log(1-gsl_vector_get(prior_vec, t1));
- for (size_t ij=0; ij<n_grid; ij++) {
- d+=gsl_vector_get(prior_vec, t1)/
- (1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij];
- }
- }
- logm+=log(d)+s;
- t1++;
- }
-
- if (it>0) {
- dif=logm-logm_save;
- }
- logm_save=logm;
- it++;
-
- cout<<"iteration = "<<it<<"; marginal likelihood = "<<logm<<endl;
- }
-
- // Update h and rho that correspond to w_ab.
- for (size_t ij=0; ij<n_grid; ij++) {
- sigma_a2=vec_sa2[ij];
- sigma_b2=vec_sb2[ij];
-
- d=exp(gsl_vector_get(coef, coef->size-1))/
- (1+exp(gsl_vector_get(coef, coef->size-1)));
- h=(d*(double)ns_test*sigma_a2+1*sigma_b2)/
- (1+d*(double)ns_test*sigma_a2+1*sigma_b2);
- rho=d*(double)ns_test*sigma_a2/
- (d*(double)ns_test*sigma_a2+1*sigma_b2);
-
- gsl_matrix_set (Hyper, ij, 0, h);
- gsl_matrix_set (Hyper, ij, 1, rho);
- gsl_matrix_set (Hyper, ij, 2, sigma_a2);
- gsl_matrix_set (Hyper, ij, 3, sigma_b2);
- gsl_matrix_set (Hyper, ij, 4, vec_wab_new[ij]);
- }
-
- // Obtain beta and alpha parameters.
-
- // Save results.
- WriteResult (vec_rs, Hyper, pip, coef);
-
- // Free matrices and vectors.
- gsl_vector_free(prior_vec);
- gsl_matrix_free(Hyper);
- gsl_vector_free(pip);
- gsl_vector_free(coef);
- return;
+void BSLMMDAP::DAP_EstimateHyper(
+ const size_t kc, const size_t kd, const vector<string> &vec_rs,
+ const vector<double> &vec_sa2, const vector<double> &vec_sb2,
+ const vector<double> &wab, const vector<vector<vector<double>>> &BF,
+ gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) {
+ clock_t time_start;
+
+ // Set up BF.
+ double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
+ size_t t1, t2;
+ size_t n_grid = wab.size(), ns_test = vec_rs.size();
+
+ gsl_vector *prior_vec = gsl_vector_alloc(ns_test);
+ gsl_matrix *Hyper = gsl_matrix_alloc(n_grid, 5);
+ gsl_vector *pip = gsl_vector_alloc(ns_test);
+ gsl_vector *coef = gsl_vector_alloc(kc + kd + 1);
+
+ // Perform the EM algorithm.
+ vector<double> vec_wab, vec_wab_new;
+
+ // Initial values.
+ for (size_t t = 0; t < ns_test; t++) {
+ gsl_vector_set(prior_vec, t, (double)BF.size() / (double)ns_test);
+ }
+ for (size_t ij = 0; ij < n_grid; ij++) {
+ vec_wab.push_back(wab[ij]);
+ vec_wab_new.push_back(wab[ij]);
+ }
+
+ // EM iteration.
+ size_t it = 0;
+ double dif = 1;
+ while (it < 100 && dif > 1e-3) {
+
+ // Update E_gamma.
+ t1 = 0, t2 = 0;
+ for (size_t b = 0; b < BF.size(); b++) {
+ s = 1;
+ for (size_t m = 0; m < BF[b].size(); m++) {
+ d = 0;
+ for (size_t ij = 0; ij < n_grid; ij++) {
+ d += vec_wab_new[ij] * BF[b][m][ij];
+ }
+ d *=
+ gsl_vector_get(prior_vec, t1) / (1 - gsl_vector_get(prior_vec, t1));
+
+ gsl_vector_set(pip, t1, d);
+ s += d;
+ t1++;
+ }
+
+ for (size_t m = 0; m < BF[b].size(); m++) {
+ d = gsl_vector_get(pip, t2) / s;
+ gsl_vector_set(pip, t2, d);
+ t2++;
+ }
+ }
+
+ // Update E_wab.
+ s = 0;
+ for (size_t ij = 0; ij < n_grid; ij++) {
+ vec_wab_new[ij] = 0;
+
+ t1 = 0;
+ for (size_t b = 0; b < BF.size(); b++) {
+ d = 1;
+ for (size_t m = 0; m < BF[b].size(); m++) {
+ d += gsl_vector_get(prior_vec, t1) /
+ (1 - gsl_vector_get(prior_vec, t1)) * vec_wab[ij] * BF[b][m][ij];
+ t1++;
+ }
+ vec_wab_new[ij] += log(d);
+ }
+
+ s = max(s, vec_wab_new[ij]);
+ }
+
+ d = 0;
+ for (size_t ij = 0; ij < n_grid; ij++) {
+ vec_wab_new[ij] = exp(vec_wab_new[ij] - s);
+ d += vec_wab_new[ij];
+ }
+
+ for (size_t ij = 0; ij < n_grid; ij++) {
+ vec_wab_new[ij] /= d;
+ }
+
+ // Update coef, and pi.
+ if (kc == 0 && kd == 0) {
+
+ // No annotation.
+ s = 0;
+ for (size_t t = 0; t < pip->size; t++) {
+ s += gsl_vector_get(pip, t);
+ }
+ s = s / (double)pip->size;
+ for (size_t t = 0; t < pip->size; t++) {
+ gsl_vector_set(prior_vec, t, s);
+ }
+
+ gsl_vector_set(coef, 0, log(s / (1 - s)));
+ } else if (kc == 0 && kd != 0) {
+
+ // Only discrete annotations.
+ if (kd == 1) {
+ single_ct_regression(Ad, dlevel, pip, coef, prior_vec);
+ } else {
+ logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0);
+ logistic_cat_pred(coef, Ad, dlevel, prior_vec);
+ }
+ } else if (kc != 0 && kd == 0) {
+
+ // Only continuous annotations.
+ logistic_cont_fit(coef, Ac, pip, 0, 0);
+ logistic_cont_pred(coef, Ac, prior_vec);
+ } else if (kc != 0 && kd != 0) {
+
+ // Both continuous and categorical annotations.
+ logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0);
+ logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec);
+ }
+
+ // Compute marginal likelihood.
+ logm = 0;
+
+ t1 = 0;
+ for (size_t b = 0; b < BF.size(); b++) {
+ d = 1;
+ s = 0;
+ for (size_t m = 0; m < BF[b].size(); m++) {
+ s += log(1 - gsl_vector_get(prior_vec, t1));
+ for (size_t ij = 0; ij < n_grid; ij++) {
+ d += gsl_vector_get(prior_vec, t1) /
+ (1 - gsl_vector_get(prior_vec, t1)) * vec_wab[ij] * BF[b][m][ij];
+ }
+ }
+ logm += log(d) + s;
+ t1++;
+ }
+
+ if (it > 0) {
+ dif = logm - logm_save;
+ }
+ logm_save = logm;
+ it++;
+
+ cout << "iteration = " << it << "; marginal likelihood = " << logm << endl;
+ }
+
+ // Update h and rho that correspond to w_ab.
+ for (size_t ij = 0; ij < n_grid; ij++) {
+ sigma_a2 = vec_sa2[ij];
+ sigma_b2 = vec_sb2[ij];
+
+ d = exp(gsl_vector_get(coef, coef->size - 1)) /
+ (1 + exp(gsl_vector_get(coef, coef->size - 1)));
+ h = (d * (double)ns_test * sigma_a2 + 1 * sigma_b2) /
+ (1 + d * (double)ns_test * sigma_a2 + 1 * sigma_b2);
+ rho = d * (double)ns_test * sigma_a2 /
+ (d * (double)ns_test * sigma_a2 + 1 * sigma_b2);
+
+ gsl_matrix_set(Hyper, ij, 0, h);
+ gsl_matrix_set(Hyper, ij, 1, rho);
+ gsl_matrix_set(Hyper, ij, 2, sigma_a2);
+ gsl_matrix_set(Hyper, ij, 3, sigma_b2);
+ gsl_matrix_set(Hyper, ij, 4, vec_wab_new[ij]);
+ }
+
+ // Obtain beta and alpha parameters.
+
+ // Save results.
+ WriteResult(vec_rs, Hyper, pip, coef);
+
+ // Free matrices and vectors.
+ gsl_vector_free(prior_vec);
+ gsl_matrix_free(Hyper);
+ gsl_vector_free(pip);
+ gsl_vector_free(coef);
+ return;
}
diff --git a/src/bslmmdap.h b/src/bslmmdap.h
index db5774b..dc05e34 100644
--- a/src/bslmmdap.h
+++ b/src/bslmmdap.h
@@ -19,97 +19,91 @@
#ifndef __BSLMMDAP_H__
#define __BSLMMDAP_H__
-#include <vector>
-#include <map>
-#include <gsl/gsl_rng.h>
-#include <gsl/gsl_randist.h>
#include "param.h"
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_rng.h>
+#include <map>
+#include <vector>
using namespace std;
class BSLMMDAP {
public:
- // IO-related parameters.
- int a_mode;
- size_t d_pace;
-
- string file_bfile;
- string file_geno;
- string file_out;
- string path_out;
-
- // LMM related parameters
- double pve_null;
- double pheno_mean;
-
- // BSLMM MCMC related parameters
- long int randseed;
- double trace_G;
-
- HYPBSLMM cHyp_initial;
-
- // Summary statistics
- size_t ni_total, ns_total; // Number of total individuals and SNPs.
- size_t ni_test, ns_test; // Number of individuals and SNPs
- // used for analysis.
-
- double h_min, h_max, rho_min, rho_max;
- size_t h_ngrid, rho_ngrid;
-
- double time_UtZ;
- double time_Omega; // Time spent on optimization iterations.
- double time_Proposal; // Time spent on constructing the
- // proposal distribution for gamma
- // (i.e., lmm or lm analysis).
-
- // Indicator for individuals (phenotypes): 0 missing, 1
- // available for analysis.
- vector<int> indicator_idv;
-
- // Sequence indicator for SNPs: 0 ignored because of (a) maf,
- // (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
-
- vector<SNPINFO> snpInfo; // Record SNP information.
-
- // Main functions.
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
-
- void WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF);
- void WriteResult (const vector<string> &vec_rs,
- const gsl_matrix *Hyper, const gsl_vector *pip,
- const gsl_vector *coef);
- double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval,
- const double sigma_b2, const double tau);
- double CalcMarginal (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const double sigma_a2, const double sigma_b2,
- const double tau);
- double CalcPrior (class HYPBSLMM &cHyp);
-
- void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const gsl_vector *y);
- void DAP_EstimateHyper (const size_t kc, const size_t kd,
- const vector<string> &vec_rs,
- const vector<double> &vec_sa2,
- const vector<double> &vec_sb2,
- const vector<double> &wab,
- const vector<vector<vector<double> > > &BF,
- gsl_matrix *Ac, gsl_matrix_int *Ad,
- gsl_vector_int *dlevel);
+ // IO-related parameters.
+ int a_mode;
+ size_t d_pace;
+
+ string file_bfile;
+ string file_geno;
+ string file_out;
+ string path_out;
+
+ // LMM related parameters
+ double pve_null;
+ double pheno_mean;
+
+ // BSLMM MCMC related parameters
+ long int randseed;
+ double trace_G;
+
+ HYPBSLMM cHyp_initial;
+
+ // Summary statistics
+ size_t ni_total, ns_total; // Number of total individuals and SNPs.
+ size_t ni_test, ns_test; // Number of individuals and SNPs
+ // used for analysis.
+
+ double h_min, h_max, rho_min, rho_max;
+ size_t h_ngrid, rho_ngrid;
+
+ double time_UtZ;
+ double time_Omega; // Time spent on optimization iterations.
+ double time_Proposal; // Time spent on constructing the
+ // proposal distribution for gamma
+ // (i.e., lmm or lm analysis).
+
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_idv;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
+
+ vector<SNPINFO> snpInfo; // Record SNP information.
+
+ // Main functions.
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
+
+ void WriteResult(const gsl_matrix *Hyper, const gsl_matrix *BF);
+ void WriteResult(const vector<string> &vec_rs, const gsl_matrix *Hyper,
+ const gsl_vector *pip, const gsl_vector *coef);
+ double CalcMarginal(const gsl_vector *Uty, const gsl_vector *K_eval,
+ const double sigma_b2, const double tau);
+ double CalcMarginal(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+ const gsl_vector *K_eval, const double sigma_a2,
+ const double sigma_b2, const double tau);
+ double CalcPrior(class HYPBSLMM &cHyp);
+
+ void DAP_CalcBF(const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ const gsl_vector *y);
+ void
+ DAP_EstimateHyper(const size_t kc, const size_t kd,
+ const vector<string> &vec_rs, const vector<double> &vec_sa2,
+ const vector<double> &vec_sb2, const vector<double> &wab,
+ const vector<vector<vector<double>>> &BF, gsl_matrix *Ac,
+ gsl_matrix_int *Ad, gsl_vector_int *dlevel);
};
-void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
- vector<double> &vec_sb2, vector<double> &vec_wab);
-void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
- vector<vector<vector<double> > > &BF);
-void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
- gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
- size_t &kc, size_t &kd);
+void ReadFile_hyb(const string &file_hyp, vector<double> &vec_sa2,
+ vector<double> &vec_sb2, vector<double> &vec_wab);
+void ReadFile_bf(const string &file_bf, vector<string> &vec_rs,
+ vector<vector<vector<double>>> &BF);
+void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
+ gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
+ size_t &kc, size_t &kd);
#endif
-
-
diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp
index 733dae1..a8c545c 100644
--- a/src/eigenlib.cpp
+++ b/src/eigenlib.cpp
@@ -16,13 +16,13 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include "Eigen/Dense"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
#include <cmath>
+#include <iostream>
#include <vector>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
-#include "Eigen/Dense"
using namespace std;
using namespace Eigen;
@@ -34,82 +34,79 @@ using namespace Eigen;
// eigen, 1x or 0.3x slower than lapack
// invert, 20x or 10x faster than lapack
//
-void eigenlib_dgemm (const char *TransA, const char *TransB,
- const double alpha, const gsl_matrix *A,
- const gsl_matrix *B, const double beta,
- gsl_matrix *C) {
- Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> >
- A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) );
- Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> >
- B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda) );
- Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> >
- C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda) );
+void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
+ const gsl_matrix *A, const gsl_matrix *B, const double beta,
+ gsl_matrix *C) {
+ Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+ A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda));
+ Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+ B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda));
+ Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+ C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda));
- if (*TransA=='N' || *TransA=='n') {
- if (*TransB=='N' || *TransB=='n') {
- C_mat=alpha*A_mat*B_mat+beta*C_mat;
+ if (*TransA == 'N' || *TransA == 'n') {
+ if (*TransB == 'N' || *TransB == 'n') {
+ C_mat = alpha * A_mat * B_mat + beta * C_mat;
} else {
- C_mat=alpha*A_mat*B_mat.transpose()+beta*C_mat;
+ C_mat = alpha * A_mat * B_mat.transpose() + beta * C_mat;
}
} else {
- if (*TransB=='N' || *TransB=='n') {
- C_mat=alpha*A_mat.transpose()*B_mat+beta*C_mat;
+ if (*TransB == 'N' || *TransB == 'n') {
+ C_mat = alpha * A_mat.transpose() * B_mat + beta * C_mat;
} else {
- C_mat=alpha*A_mat.transpose()*B_mat.transpose()+beta*C_mat;
+ C_mat = alpha * A_mat.transpose() * B_mat.transpose() + beta * C_mat;
}
}
return;
}
-void eigenlib_dgemv (const char *TransA, const double alpha,
- const gsl_matrix *A, const gsl_vector *x,
- const double beta, gsl_vector *y) {
- Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> >
- A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) );
- Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> >
- x_vec(x->data, x->size, InnerStride<Dynamic>(x->stride) );
- Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> >
- y_vec(y->data, y->size, InnerStride<Dynamic>(y->stride) );
+void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A,
+ const gsl_vector *x, const double beta, gsl_vector *y) {
+ Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+ A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda));
+ Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic>> x_vec(
+ x->data, x->size, InnerStride<Dynamic>(x->stride));
+ Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic>> y_vec(
+ y->data, y->size, InnerStride<Dynamic>(y->stride));
- if (*TransA=='N' || *TransA=='n') {
- y_vec=alpha*A_mat*x_vec+beta*y_vec;
+ if (*TransA == 'N' || *TransA == 'n') {
+ y_vec = alpha * A_mat * x_vec + beta * y_vec;
} else {
- y_vec=alpha*A_mat.transpose()*x_vec+beta*y_vec;
+ y_vec = alpha * A_mat.transpose() * x_vec + beta * y_vec;
}
return;
}
void eigenlib_invert(gsl_matrix *A) {
- Map<Matrix<double, Dynamic, Dynamic, RowMajor> >
- A_mat(A->data, A->size1, A->size2);
- A_mat=A_mat.inverse();
+ Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1,
+ A->size2);
+ A_mat = A_mat.inverse();
return;
}
-void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A) {
- Map<Matrix<double, Dynamic, Dynamic, RowMajor> >
- A_mat(A->data, A->size1, A->size2);
- Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> >
- b_vec(b->data, b->size, OuterStride<Dynamic>(b->stride) );
- A_mat=alpha*b_vec*b_vec.transpose()+A_mat;
+void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) {
+ Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1,
+ A->size2);
+ Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> b_vec(
+ b->data, b->size, OuterStride<Dynamic>(b->stride));
+ A_mat = alpha * b_vec * b_vec.transpose() + A_mat;
return;
}
-void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U,
- gsl_vector *eval) {
- Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> >
- G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda) );
- Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> >
- U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda) );
- Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> >
- eval_vec(eval->data, eval->size, OuterStride<Dynamic>(eval->stride) );
+void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) {
+ Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+ G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda));
+ Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>>
+ U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda));
+ Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> eval_vec(
+ eval->data, eval->size, OuterStride<Dynamic>(eval->stride));
SelfAdjointEigenSolver<MatrixXd> es(G_mat);
if (es.info() != Success)
abort();
- eval_vec=es.eigenvalues();
- U_mat=es.eigenvectors();
+ eval_vec = es.eigenvalues();
+ U_mat = es.eigenvectors();
return;
}
diff --git a/src/eigenlib.h b/src/eigenlib.h
index 3659dc1..b29fa63 100644
--- a/src/eigenlib.h
+++ b/src/eigenlib.h
@@ -23,15 +23,13 @@
using namespace std;
-void eigenlib_dgemm (const char *TransA, const char *TransB,
- const double alpha, const gsl_matrix *A,
- const gsl_matrix *B, const double beta,
- gsl_matrix *C);
-void eigenlib_dgemv (const char *TransA, const double alpha,
- const gsl_matrix *A, const gsl_vector *x,
- const double beta, gsl_vector *y);
+void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
+ const gsl_matrix *A, const gsl_matrix *B, const double beta,
+ gsl_matrix *C);
+void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A,
+ const gsl_vector *x, const double beta, gsl_vector *y);
void eigenlib_invert(gsl_matrix *A);
-void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A);
-void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval);
+void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A);
+void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval);
#endif
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 1a9ca9b..c72475b 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -16,427 +16,670 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include <cmath>
+#include <cstring>
+#include <ctime>
#include <fstream>
+#include <iostream>
#include <string>
-#include <cstring>
#include <sys/stat.h>
-#include <ctime>
-#include <cmath>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
-#include "gsl/gsl_eigen.h"
#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
-#include "lapack.h"
-#include "io.h"
-#include "gemma.h"
-#include "vc.h"
-#include "lm.h"
#include "bslmm.h"
#include "bslmmdap.h"
+#include "gemma.h"
+#include "io.h"
+#include "lapack.h"
#include "ldr.h"
+#include "lm.h"
#include "lmm.h"
+#include "mathfunc.h"
#include "mvlmm.h"
#include "prdt.h"
#include "varcov.h"
-#include "mathfunc.h"
+#include "vc.h"
using namespace std;
-GEMMA::GEMMA(void):
-version("0.97"), date("07/27/2017"), year("2017")
-{}
-
-void GEMMA::PrintHeader (void) {
- cout<<endl;
- cout<<"*********************************************************"<<endl;
- cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl;
- cout<<" Version "<<version<<", "<<date<<" "<<
- endl;
- cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl;
- cout<<" (C) "<<year<<" Xiang Zhou "<<endl;
- cout<<" GNU General Public License "<<endl;
- cout<<" For Help, Type ./gemma -h "<<endl;
- cout<<"*********************************************************"<<endl;
- cout<<endl;
+GEMMA::GEMMA(void) : version("0.97"), date("07/27/2017"), year("2017") {}
+
+void GEMMA::PrintHeader(void) {
+ cout << endl;
+ cout << "*********************************************************" << endl;
+ cout << " Genome-wide Efficient Mixed Model Association (GEMMA) " << endl;
+ cout << " Version " << version << ", " << date
+ << " " << endl;
+ cout << " Visit http://www.xzlab.org/software.html For Updates " << endl;
+ cout << " (C) " << year << " Xiang Zhou "
+ << endl;
+ cout << " GNU General Public License " << endl;
+ cout << " For Help, Type ./gemma -h " << endl;
+ cout << "*********************************************************" << endl;
+ cout << endl;
return;
}
-void GEMMA::PrintLicense (void) {
- cout<<endl;
- cout<<"The Software Is Distributed Under GNU General Public "<<
- "License, But May Also Require The Following Notifications."<<endl;
- cout<<endl;
-
- cout<<"Including Lapack Routines In The Software May Require"<<
- " The Following Notification:"<<endl;
- cout<<"Copyright (c) 1992-2010 The University of Tennessee and "<<
- "The University of Tennessee Research Foundation. All rights "<<
- "reserved."<<endl;
- cout<<"Copyright (c) 2000-2010 The University of California "<<
- "Berkeley. All rights reserved."<<endl;
- cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. "<<
- "All rights reserved."<<endl;
- cout<<endl;
-
- cout<<"$COPYRIGHT$"<<endl;
- cout<<"Additional copyrights may follow"<<endl;
- cout<<"$HEADER$"<<endl;
- cout<<"Redistribution and use in source and binary forms, with or "<<
- "without modification, are permitted provided that the following "<<
- " conditions are met:"<<endl;
- cout<<"- Redistributions of source code must retain the above "<<
- "copyright notice, this list of conditions and the following "<<
- "disclaimer."<<endl;
- cout<<"- Redistributions in binary form must reproduce the above "<<
- "copyright notice, this list of conditions and the following "<<
- "disclaimer listed in this license in the documentation and/or "<<
- "other materials provided with the distribution."<<endl;
- cout<<"- Neither the name of the copyright holders nor the names "<<
- "of its contributors may be used to endorse or promote products "<<
- "derived from this software without specific prior written "<<
- "permission."<<endl;
- cout<<"The copyright holders provide no reassurances that the "<<
- "source code provided does not infringe any patent, copyright, "<<
- "or any other "<<
- "intellectual property rights of third parties. "<<
- "The copyright holders disclaim any liability to any recipient "<<
- "for claims brought against "<<
- "recipient by any third party for infringement of that parties "<<
- "intellectual property rights. "<<endl;
- cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "<<
- "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "<<
- "INCLUDING, BUT NOT "<<
- "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "<<
- "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "<<
- "SHALL THE COPYRIGHT "<<
- "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "<<
- "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "<<
- "(INCLUDING, BUT NOT "<<
- "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "<<
- "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "<<
- "HOWEVER CAUSED AND ON ANY "<<
- "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "<<
- "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "<<
- "OUT OF THE USE "<<
- "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "<<
- "SUCH DAMAGE."<<endl;
- cout<<endl;
-
- return;
+void GEMMA::PrintLicense(void) {
+ cout << endl;
+ cout << "The Software Is Distributed Under GNU General Public "
+ << "License, But May Also Require The Following Notifications." << endl;
+ cout << endl;
+
+ cout << "Including Lapack Routines In The Software May Require"
+ << " The Following Notification:" << endl;
+ cout << "Copyright (c) 1992-2010 The University of Tennessee and "
+ << "The University of Tennessee Research Foundation. All rights "
+ << "reserved." << endl;
+ cout << "Copyright (c) 2000-2010 The University of California "
+ << "Berkeley. All rights reserved." << endl;
+ cout << "Copyright (c) 2006-2010 The University of Colorado Denver. "
+ << "All rights reserved." << endl;
+ cout << endl;
+
+ cout << "$COPYRIGHT$" << endl;
+ cout << "Additional copyrights may follow" << endl;
+ cout << "$HEADER$" << endl;
+ cout << "Redistribution and use in source and binary forms, with or "
+ << "without modification, are permitted provided that the following "
+ << " conditions are met:" << endl;
+ cout << "- Redistributions of source code must retain the above "
+ << "copyright notice, this list of conditions and the following "
+ << "disclaimer." << endl;
+ cout << "- Redistributions in binary form must reproduce the above "
+ << "copyright notice, this list of conditions and the following "
+ << "disclaimer listed in this license in the documentation and/or "
+ << "other materials provided with the distribution." << endl;
+ cout << "- Neither the name of the copyright holders nor the names "
+ << "of its contributors may be used to endorse or promote products "
+ << "derived from this software without specific prior written "
+ << "permission." << endl;
+ cout << "The copyright holders provide no reassurances that the "
+ << "source code provided does not infringe any patent, copyright, "
+ << "or any other "
+ << "intellectual property rights of third parties. "
+ << "The copyright holders disclaim any liability to any recipient "
+ << "for claims brought against "
+ << "recipient by any third party for infringement of that parties "
+ << "intellectual property rights. " << endl;
+ cout << "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "
+ << "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "
+ << "INCLUDING, BUT NOT "
+ << "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "
+ << "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "
+ << "SHALL THE COPYRIGHT "
+ << "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "
+ << "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "
+ << "(INCLUDING, BUT NOT "
+ << "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "
+ << "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "
+ << "HOWEVER CAUSED AND ON ANY "
+ << "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "
+ << "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "
+ << "OUT OF THE USE "
+ << "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "
+ << "SUCH DAMAGE." << endl;
+ cout << endl;
+
+ return;
}
void GEMMA::PrintHelp(size_t option) {
- if (option==0) {
- cout<<endl;
- cout<<" GEMMA version "<<version<<", released on "<<date<<endl;
- cout<<" implemented by Xiang Zhou"<<endl;
- cout<<endl;
- cout<<" type ./gemma -h [num] for detailed helps"<<endl;
- cout<<" options: " << endl;
- cout<<" 1: quick guide"<<endl;
- cout<<" 2: file I/O related"<<endl;
- cout<<" 3: SNP QC"<<endl;
- cout<<" 4: calculate relatedness matrix"<<endl;
- cout<<" 5: perform eigen decomposition"<<endl;
- cout<<" 6: perform variance component estimation"<<endl;
- cout<<" 7: fit a linear model"<<endl;
- cout<<" 8: fit a linear mixed model"<<endl;
- cout<<" 9: fit a multivariate linear mixed model"<<endl;
- cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl;
- cout<<" 11: obtain predicted values"<<endl;
- cout<<" 12: calculate snp variance covariance"<<endl;
- cout<<" 13: note"<<endl;
- cout<<endl;
+ if (option == 0) {
+ cout << endl;
+ cout << " GEMMA version " << version << ", released on " << date << endl;
+ cout << " implemented by Xiang Zhou" << endl;
+ cout << endl;
+ cout << " type ./gemma -h [num] for detailed helps" << endl;
+ cout << " options: " << endl;
+ cout << " 1: quick guide" << endl;
+ cout << " 2: file I/O related" << endl;
+ cout << " 3: SNP QC" << endl;
+ cout << " 4: calculate relatedness matrix" << endl;
+ cout << " 5: perform eigen decomposition" << endl;
+ cout << " 6: perform variance component estimation" << endl;
+ cout << " 7: fit a linear model" << endl;
+ cout << " 8: fit a linear mixed model" << endl;
+ cout << " 9: fit a multivariate linear mixed model" << endl;
+ cout << " 10: fit a Bayesian sparse linear mixed model" << endl;
+ cout << " 11: obtain predicted values" << endl;
+ cout << " 12: calculate snp variance covariance" << endl;
+ cout << " 13: note" << endl;
+ cout << endl;
}
- if (option==1) {
- cout<<" QUICK GUIDE" << endl;
- cout<<" to generate a relatedness matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl;
- cout<<" to generate the S matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl;
- cout<<" to generate the q vector: "<<endl;
- cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl;
- cout<<" to generate the ldsc weigthts: "<<endl;
- cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl;
- cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl;
- cout<<" to estimate variance components: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" options for the above two commands: -crt -windowbp [num]"<<endl;
- cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl;
- cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl;
- cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl;
- cout<<" to fit a linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a linear mixed model to test g by e effects: "<<endl;
- cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl;
- cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl;
- cout<<" to fit a multivariate linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl;
- cout<<" to fit a Bayesian sparse linear mixed model: "<<endl;
- cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl;
- cout<<" to obtain predicted values: "<<endl;
- cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl;
- cout<<" to calculate correlations between SNPs: "<<endl;
- cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl;
- cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl;
- cout<<endl;
+ if (option == 1) {
+ cout << " QUICK GUIDE" << endl;
+ cout << " to generate a relatedness matrix: " << endl;
+ cout << " ./gemma -bfile [prefix] -gk [num] -o [prefix]" << endl;
+ cout << " ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"
+ << endl;
+ cout << " to generate the S matrix: " << endl;
+ cout << " ./gemma -bfile [prefix] -gs -o [prefix]" << endl;
+ cout << " ./gemma -p [filename] -g [filename] -gs -o [prefix]"
+ << endl;
+ cout << " ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"
+ << endl;
+ cout << " ./gemma -p [filename] -g [filename] -cat [filename] -gs "
+ "-o [prefix]"
+ << endl;
+ cout << " ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"
+ << endl;
+ cout << " ./gemma -p [filename] -g [filename] -sample [num] -gs -o "
+ "[prefix]"
+ << endl;
+ cout << " to generate the q vector: " << endl;
+ cout << " ./gemma -beta [filename] -gq -o [prefix]" << endl;
+ cout << " ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"
+ << endl;
+ cout << " to generate the ldsc weigthts: " << endl;
+ cout << " ./gemma -beta [filename] -gw -o [prefix]" << endl;
+ cout << " ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"
+ << endl;
+ cout << " to perform eigen decomposition of the relatedness matrix: "
+ << endl;
+ cout << " ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -k [filename] -eigen "
+ "-o [prefix]"
+ << endl;
+ cout << " to estimate variance components: " << endl;
+ cout << " ./gemma -bfile [prefix] -k [filename] -vc [num] -o "
+ "[prefix]"
+ << endl;
+ cout << " ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -bfile [prefix] -mk [filename] -vc [num] -o "
+ "[prefix]"
+ << endl;
+ cout
+ << " ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -beta [filename] -cor [filename] -vc [num] -o "
+ "[prefix]"
+ << endl;
+ cout << " ./gemma -beta [filename] -cor [filename] -cat [filename] "
+ "-vc [num] -o [prefix]"
+ << endl;
+ cout << " options for the above two commands: -crt -windowbp [num]"
+ << endl;
+ cout << " ./gemma -mq [filename] -ms [filename] -mv [filename] -vc "
+ "[num] -o [prefix]"
+ << endl;
+ cout << " or with summary statistics, replace bfile with mbfile, "
+ "or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"
+ << endl;
+ cout << " ./gemma -beta [filename] -bfile [filename] -cat "
+ "[filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -beta [filename] -bfile [filename] -cat "
+ "[filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"
+ << endl;
+ cout << " to fit a linear mixed model: " << endl;
+ cout << " ./gemma -bfile [prefix] -k [filename] -lmm [num] -o "
+ "[prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] -k "
+ "[filename] -lmm [num] -o [prefix]"
+ << endl;
+ cout << " to fit a linear mixed model to test g by e effects: " << endl;
+ cout << " ./gemma -bfile [prefix] -gxe [filename] -k [filename] "
+ "-lmm [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] -gxe "
+ "[filename] -k [filename] -lmm [num] -o [prefix]"
+ << endl;
+ cout << " to fit a univariate linear mixed model with different residual "
+ "weights for different individuals: "
+ << endl;
+ cout << " ./gemma -bfile [prefix] -weight [filename] -k [filename] "
+ "-lmm [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] "
+ "-weight [filename] -k [filename] -lmm [num] -o [prefix]"
+ << endl;
+ cout << " to fit a multivariate linear mixed model: " << endl;
+ cout << " ./gemma -bfile [prefix] -k [filename] -lmm [num] -n "
+ "[num1] [num2] -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] -k "
+ "[filename] -lmm [num] -n [num1] [num2] -o [prefix]"
+ << endl;
+ cout << " to fit a Bayesian sparse linear mixed model: " << endl;
+ cout << " ./gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl;
+ cout << " ./gemma -g [filename] -p [filename] -a [filename] -bslmm "
+ "[num] -o [prefix]"
+ << endl;
+ cout << " to obtain predicted values: " << endl;
+ cout << " ./gemma -bfile [prefix] -epm [filename] -emu [filename] "
+ "-ebv [filename] -k [filename] -predict [num] -o [prefix]"
+ << endl;
+ cout << " ./gemma -g [filename] -p [filename] -epm [filename] -emu "
+ "[filename] -ebv [filename] -k [filename] -predict [num] -o "
+ "[prefix]"
+ << endl;
+ cout << " to calculate correlations between SNPs: " << endl;
+ cout << " ./gemma -bfile [prefix] -calccor -o [prefix]" << endl;
+ cout << " ./gemma -g [filename] -p [filename] -calccor -o [prefix]"
+ << endl;
+ cout << endl;
}
- if (option==2) {
- cout<<" FILE I/O RELATED OPTIONS" << endl;
- cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl;
- cout<<" requires: *.fam, *.bim and *.bed files"<<endl;
- cout<<" missing value: -9"<<endl;
- cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl;
- cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
- cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl;
- cout<<" format: phenotype for individual 1"<<endl;
- cout<<" phenotype for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl;
- cout<<" format: rs#1, base_position, chr_number"<<endl;
- cout<<" rs#2, base_position, chr_number"<<endl;
- cout<<" ..."<<endl;
+ if (option == 2) {
+ cout << " FILE I/O RELATED OPTIONS" << endl;
+ cout << " -bfile [prefix] "
+ << " specify input PLINK binary ped file prefix." << endl;
+ cout << " requires: *.fam, *.bim and *.bed files" << endl;
+ cout << " missing value: -9" << endl;
+ cout << " -g [filename] "
+ << " specify input BIMBAM mean genotype file name" << endl;
+ cout << " format: rs#1, allele0, allele1, genotype for individual "
+ "1, genotype for individual 2, ..."
+ << endl;
+ cout << " rs#2, allele0, allele1, genotype for individual "
+ "1, genotype for individual 2, ..."
+ << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -p [filename] "
+ << " specify input BIMBAM phenotype file name" << endl;
+ cout << " format: phenotype for individual 1" << endl;
+ cout << " phenotype for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -a [filename] "
+ << " specify input BIMBAM SNP annotation file name (optional)" << endl;
+ cout << " format: rs#1, base_position, chr_number" << endl;
+ cout << " rs#2, base_position, chr_number" << endl;
+ cout << " ..." << endl;
// WJA added.
- cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl;
- cout<<" requires: *.bgen, *.sample files"<<endl;
-
- cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl;
- cout<<" format: variable for individual 1"<<endl;
- cout<<" variable for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl;
- cout<<" format: variable for individual 1"<<endl;
- cout<<" variable for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl;
- cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl;
- cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl;
- cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl;
- cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl;
- cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl;
- cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl;
- cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" note: the intercept (a column of 1s) may need to be included"<<endl;
- cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl;
- cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl;
- cout<<" options: n1: rs column number"<<endl;
- cout<<" n2: estimated alpha column number (0 to ignore)"<<endl;
- cout<<" n3: estimated beta column number (0 to ignore)"<<endl;
- cout<<" n4: estimated gamma column number (0 to ignore)"<<endl;
- cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl;
- cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl;
- cout<<" format: value for individual 1"<<endl;
- cout<<" value for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl;
- cout<<" -mu [num] "<<" specify input estimated mean value"<<endl;
- cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl;
- cout<<" format: header"<<endl;
- cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl;
- cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: not allowed"<<endl;
- cout<<" -r [filename] "<<" specify input total read count file name"<<endl;
- cout<<" format: total read count for individual 1"<<endl;
- cout<<" total read count for individual 2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl;
- cout<<" format: rs#1"<<endl;
- cout<<" rs#2"<<endl;
- cout<<" ..."<<endl;
- cout<<" missing value: NA"<<endl;
- cout<<" -silence "<<" silent terminal display"<<endl;
- cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl;
- cout<<" options: 1: \"n by n matrix\" format"<<endl;
- cout<<" 2: \"id id value\" format"<<endl;
- cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl;
- cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl;
- cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl;
- cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl;
- cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl;
- cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl;
- cout<<endl;
+ cout << " -oxford [prefix] "
+ << " specify input Oxford genotype bgen file prefix." << endl;
+ cout << " requires: *.bgen, *.sample files" << endl;
+
+ cout << " -gxe [filename] "
+ << " specify input file that contains a column of environmental "
+ "factor for g by e tests"
+ << endl;
+ cout << " format: variable for individual 1" << endl;
+ cout << " variable for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -widv [filename] "
+ << " specify input file that contains a column of residual weights"
+ << endl;
+ cout << " format: variable for individual 1" << endl;
+ cout << " variable for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -k [filename] "
+ << " specify input kinship/relatedness matrix file name" << endl;
+ cout << " -mk [filename] "
+ << " specify input file which contains a list of kinship/relatedness "
+ "matrices"
+ << endl;
+ cout << " -u [filename] "
+ << " specify input file containing the eigen vectors of the "
+ "kinship/relatedness matrix"
+ << endl;
+ cout << " -d [filename] "
+ << " specify input file containing the eigen values of the "
+ "kinship/relatedness matrix"
+ << endl;
+ cout << " -c [filename] "
+ << " specify input covariates file name (optional)" << endl;
+ cout << " -cat [filename] "
+ << " specify input category file name (optional), which contains rs "
+ "cat1 cat2 ..."
+ << endl;
+ cout << " -beta [filename] "
+ << " specify input beta file name (optional), which contains rs beta "
+ "se_beta n_total (or n_mis and n_obs) estimates from a lm model"
+ << endl;
+ cout << " -cor [filename] "
+ << " specify input correlation file name (optional), which contains "
+ "rs window_size correlations from snps"
+ << endl;
+ cout << " missing value: NA" << endl;
+ cout << " note: the intercept (a column of 1s) may need to be "
+ "included"
+ << endl;
+ cout << " -epm [filename] "
+ << " specify input estimated parameter file name" << endl;
+ cout << " -en [n1] [n2] [n3] [n4] "
+ << " specify values for the input estimated parameter file (with a "
+ "header)"
+ << endl;
+ cout << " options: n1: rs column number" << endl;
+ cout << " n2: estimated alpha column number (0 to ignore)"
+ << endl;
+ cout << " n3: estimated beta column number (0 to ignore)"
+ << endl;
+ cout << " n4: estimated gamma column number (0 to ignore)"
+ << endl;
+ cout << " default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if "
+ "-ebv is specified"
+ << endl;
+ cout << " -ebv [filename] "
+ << " specify input estimated random effect (breeding value) file name"
+ << endl;
+ cout << " format: value for individual 1" << endl;
+ cout << " value for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -emu [filename] "
+ << " specify input log file name containing estimated mean" << endl;
+ cout << " -mu [num] "
+ << " specify input estimated mean value" << endl;
+ cout << " -gene [filename] "
+ << " specify input gene expression file name" << endl;
+ cout << " format: header" << endl;
+ cout << " gene1, count for individual 1, count for "
+ "individual 2, ..."
+ << endl;
+ cout << " gene2, count for individual 1, count for "
+ "individual 2, ..."
+ << endl;
+ cout << " ..." << endl;
+ cout << " missing value: not allowed" << endl;
+ cout << " -r [filename] "
+ << " specify input total read count file name" << endl;
+ cout << " format: total read count for individual 1" << endl;
+ cout << " total read count for individual 2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout
+ << " -snps [filename] "
+ << " specify input snps file name to only analyze a certain set of snps"
+ << endl;
+ cout << " format: rs#1" << endl;
+ cout << " rs#2" << endl;
+ cout << " ..." << endl;
+ cout << " missing value: NA" << endl;
+ cout << " -silence "
+ << " silent terminal display" << endl;
+ cout << " -km [num] "
+ << " specify input kinship/relatedness file type (default 1)." << endl;
+ cout << " options: 1: \"n by n matrix\" format" << endl;
+ cout << " 2: \"id id value\" format" << endl;
+ cout << " -n [num] "
+ << " specify phenotype column in the phenotype/*.fam file (optional; "
+ "default 1)"
+ << endl;
+ cout << " -pace [num] "
+ << " specify terminal display update pace (default 100000 SNPs or "
+ "100000 iterations)."
+ << endl;
+ cout << " -outdir [path] "
+ << " specify output directory path (default \"./output/\")" << endl;
+ cout << " -o [prefix] "
+ << " specify output file prefix (default \"result\")" << endl;
+ cout << " output: prefix.cXX.txt or prefix.sXX.txt from "
+ "kinship/relatedness matrix estimation"
+ << endl;
+ cout << " output: prefix.assoc.txt and prefix.log.txt form "
+ "association tests"
+ << endl;
+ cout << endl;
}
- if (option==3) {
- cout<<" SNP QC OPTIONS" << endl;
- cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl;
- cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl;
- cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl;
- cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl;
- cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl;
- cout<<endl;
+ if (option == 3) {
+ cout << " SNP QC OPTIONS" << endl;
+ cout << " -miss [num] "
+ << " specify missingness threshold (default 0.05)" << endl;
+ cout << " -maf [num] "
+ << " specify minor allele frequency threshold (default 0.01)" << endl;
+ cout << " -hwe [num] "
+ << " specify HWE test p value threshold (default 0; no test)" << endl;
+ cout << " -r2 [num] "
+ << " specify r-squared threshold (default 0.9999)" << endl;
+ cout << " -notsnp "
+ << " minor allele frequency cutoff is not used" << endl;
+ cout << endl;
}
- if (option==4) {
- cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
- cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl;
- cout<<" options: 1: centered XX^T/p"<<endl;
- cout<<" 2: standardized XX^T/p"<<endl;
- cout<<" note: non-polymorphic SNPs are excluded "<<endl;
- cout<<endl;
+ if (option == 4) {
+ cout << " RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
+ cout << " -gk [num] "
+ << " specify which type of kinship/relatedness matrix to generate "
+ "(default 1)"
+ << endl;
+ cout << " options: 1: centered XX^T/p" << endl;
+ cout << " 2: standardized XX^T/p" << endl;
+ cout << " note: non-polymorphic SNPs are excluded " << endl;
+ cout << endl;
}
- if (option==5) {
- cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl;
- cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl;
- cout<<endl;
+ if (option == 5) {
+ cout << " EIGEN-DECOMPOSITION OPTIONS" << endl;
+ cout << " -eigen "
+ << " specify to perform eigen decomposition of the loaded relatedness "
+ "matrix"
+ << endl;
+ cout << endl;
}
- if (option==6) {
- cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
- cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl;
- cout<<" options (with kinship file): 1: HE regression (default)"<<endl;
- cout<<" 2: REML"<<endl;
- cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl;
- cout<<" 2: Standardized genotypes"<<endl;
- cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl;
- cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl;
- cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl;
- cout<<endl;
+ if (option == 6) {
+ cout << " VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
+ cout << " -vc "
+ << " specify to perform variance component estimation for the loaded "
+ "relatedness matrix/matrices"
+ << endl;
+ cout
+ << " options (with kinship file): 1: HE regression (default)"
+ << endl;
+ cout << " 2: REML" << endl;
+ cout << " options (with beta/cor files): 1: Centered genotypes "
+ "(default)"
+ << endl;
+ cout << " 2: Standardized genotypes"
+ << endl;
+ cout << " -crt -windowbp [num]"
+ << " specify the window size based on bp (default 1000000; 1Mb)"
+ << endl;
+ cout << " -crt -windowcm [num]"
+ << " specify the window size based on cm (default 0)" << endl;
+ cout << " -crt -windowns [num]"
+ << " specify the window size based on number of snps (default 0)"
+ << endl;
+ cout << endl;
}
- if (option==7) {
- cout<<" LINEAR MODEL OPTIONS" << endl;
- cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: Wald test"<<endl;
- cout<<" 2: Likelihood ratio test"<<endl;
- cout<<" 3: Score test"<<endl;
- cout<<" 4: 1-3"<<endl;
- cout<<endl;
+ if (option == 7) {
+ cout << " LINEAR MODEL OPTIONS" << endl;
+ cout << " -lm [num] "
+ << " specify analysis options (default 1)." << endl;
+ cout << " options: 1: Wald test" << endl;
+ cout << " 2: Likelihood ratio test" << endl;
+ cout << " 3: Score test" << endl;
+ cout << " 4: 1-3" << endl;
+ cout << endl;
}
- if (option==8) {
- cout<<" LINEAR MIXED MODEL OPTIONS" << endl;
- cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: Wald test"<<endl;
- cout<<" 2: Likelihood ratio test"<<endl;
- cout<<" 3: Score test"<<endl;
- cout<<" 4: 1-3"<<endl;
- cout<<" 5: Parameter estimation in the null model only"<<endl;
- cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl;
- cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl;
- cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl;
- cout<<endl;
+ if (option == 8) {
+ cout << " LINEAR MIXED MODEL OPTIONS" << endl;
+ cout << " -lmm [num] "
+ << " specify analysis options (default 1)." << endl;
+ cout << " options: 1: Wald test" << endl;
+ cout << " 2: Likelihood ratio test" << endl;
+ cout << " 3: Score test" << endl;
+ cout << " 4: 1-3" << endl;
+ cout << " 5: Parameter estimation in the null model only"
+ << endl;
+ cout << " -lmin [num] "
+ << " specify minimal value for lambda (default 1e-5)" << endl;
+ cout << " -lmax [num] "
+ << " specify maximum value for lambda (default 1e+5)" << endl;
+ cout
+ << " -region [num] "
+ << " specify the number of regions used to evaluate lambda (default 10)"
+ << endl;
+ cout << endl;
}
- if (option==9) {
- cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
- cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl;
- cout<<" -emi "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl;
- cout<<" -nri "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl;
- cout<<" -emp "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl;
- cout<<" -nrp "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl;
- cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl;
- cout<<endl;
+ if (option == 9) {
+ cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
+ cout << " -pnr "
+ << " specify the pvalue threshold to use the Newton-Raphson's method "
+ "(default 0.001)"
+ << endl;
+ cout << " -emi "
+ << " specify the maximum number of iterations for the PX-EM method in "
+ "the null (default 10000)"
+ << endl;
+ cout << " -nri "
+ << " specify the maximum number of iterations for the "
+ "Newton-Raphson's method in the null (default 100)"
+ << endl;
+ cout << " -emp "
+ << " specify the precision for the PX-EM method in the null (default "
+ "0.0001)"
+ << endl;
+ cout << " -nrp "
+ << " specify the precision for the Newton-Raphson's method in the "
+ "null (default 0.0001)"
+ << endl;
+ cout << " -crt "
+ << " specify to output corrected pvalues for these pvalues that are "
+ "below the -pnr threshold"
+ << endl;
+ cout << endl;
}
- if (option==10) {
- cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl;
- cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: BSLMM"<<endl;
- cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl;
- cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
- cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl;
- cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl;
-
- cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl;
- cout<<" options: 1: LDR"<<endl;
-
- cout<<" MCMC OPTIONS" << endl;
- cout<<" Prior" << endl;
- cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl;
- cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl;
- cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl;
- cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl;
- cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl;
- cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl;
- cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl;
- cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl;
-
- cout<<" Proposal" << endl;
- cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl;
- cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
- cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
- cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl;
-
- cout<<" Others" << endl;
- cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl;
- cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl;
- cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl;
- cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl;
- cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl;
- cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl;
- cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl;
- cout<<endl;
+ if (option == 10) {
+ cout << " MULTI-LOCUS ANALYSIS OPTIONS" << endl;
+ cout << " -bslmm [num] "
+ << " specify analysis options (default 1)." << endl;
+ cout << " options: 1: BSLMM" << endl;
+ cout << " 2: standard ridge regression/GBLUP (no mcmc)"
+ << endl;
+ cout << " 3: probit BSLMM (requires 0/1 phenotypes)"
+ << endl;
+ cout
+ << " 4: BSLMM with DAP for Hyper Parameter Estimation"
+ << endl;
+ cout << " 5: BSLMM with DAP for Fine Mapping" << endl;
+
+ cout << " -ldr [num] "
+ << " specify analysis options (default 1)." << endl;
+ cout << " options: 1: LDR" << endl;
+
+ cout << " MCMC OPTIONS" << endl;
+ cout << " Prior" << endl;
+ cout << " -hmin [num] "
+ << " specify minimum value for h (default 0)" << endl;
+ cout << " -hmax [num] "
+ << " specify maximum value for h (default 1)" << endl;
+ cout << " -rmin [num] "
+ << " specify minimum value for rho (default 0)" << endl;
+ cout << " -rmax [num] "
+ << " specify maximum value for rho (default 1)" << endl;
+ cout << " -pmin [num] "
+ << " specify minimum value for log10(pi) (default log10(1/p), where p "
+ "is the number of analyzed SNPs )"
+ << endl;
+ cout << " -pmax [num] "
+ << " specify maximum value for log10(pi) (default log10(1) )" << endl;
+ cout << " -smin [num] "
+ << " specify minimum value for |gamma| (default 0)" << endl;
+ cout << " -smax [num] "
+ << " specify maximum value for |gamma| (default 300)" << endl;
+
+ cout << " Proposal" << endl;
+ cout << " -gmean [num] "
+ << " specify the mean for the geometric distribution (default: 2000)"
+ << endl;
+ cout << " -hscale [num] "
+ << " specify the step size scale for the proposal distribution of h "
+ "(value between 0 and 1, default min(10/sqrt(n),1) )"
+ << endl;
+ cout << " -rscale [num] "
+ << " specify the step size scale for the proposal distribution of rho "
+ "(value between 0 and 1, default min(10/sqrt(n),1) )"
+ << endl;
+ cout << " -pscale [num] "
+ << " specify the step size scale for the proposal distribution of "
+ "log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )"
+ << endl;
+
+ cout << " Others" << endl;
+ cout << " -w [num] "
+ << " specify burn-in steps (default 100,000)" << endl;
+ cout << " -s [num] "
+ << " specify sampling steps (default 1,000,000)" << endl;
+ cout << " -rpace [num] "
+ << " specify recording pace, record one state in every [num] steps "
+ "(default 10)"
+ << endl;
+ cout << " -wpace [num] "
+ << " specify writing pace, write values down in every [num] recorded "
+ "steps (default 1000)"
+ << endl;
+ cout << " -seed [num] "
+ << " specify random seed (a random seed is generated by default)"
+ << endl;
+ cout << " -mh [num] "
+ << " specify number of MH steps in each iteration (default 10)"
+ << endl;
+ cout << " requires: 0/1 phenotypes and -bslmm 3 option" << endl;
+ cout << endl;
}
- if (option==11) {
- cout<<" PREDICTION OPTIONS" << endl;
- cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl;
- cout<<" options: 1: predict for individuals with missing phenotypes"<<endl;
- cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl;
- cout<<endl;
+ if (option == 11) {
+ cout << " PREDICTION OPTIONS" << endl;
+ cout << " -predict [num] "
+ << " specify prediction options (default 1)." << endl;
+ cout << " options: 1: predict for individuals with missing "
+ "phenotypes"
+ << endl;
+ cout << " 2: predict for individuals with missing "
+ "phenotypes, and convert the predicted values to probability "
+ "scale. Use only for files fitted with -bslmm 3 option"
+ << endl;
+ cout << endl;
}
- if (option==12) {
- cout<<" CALC CORRELATION OPTIONS" << endl;
- cout<<" -calccor "<<endl;
- cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl;
- cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl;
- cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl;
- cout<<endl;
+ if (option == 12) {
+ cout << " CALC CORRELATION OPTIONS" << endl;
+ cout << " -calccor " << endl;
+ cout << " -windowbp [num] "
+ << " specify the window size based on bp (default 1000000; 1Mb)"
+ << endl;
+ cout << " -windowcm [num] "
+ << " specify the window size based on cm (default 0; not used)"
+ << endl;
+ cout << " -windowns [num] "
+ << " specify the window size based on number of snps (default 0; not "
+ "used)"
+ << endl;
+ cout << endl;
}
- if (option==13) {
- cout<<" NOTE"<<endl;
- cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl;
- cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl;
- cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl;
- cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl;
- cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl;
- cout<<endl;
+ if (option == 13) {
+ cout << " NOTE" << endl;
+ cout << " 1. Only individuals with non-missing phenotoypes and covariates "
+ "will be analyzed."
+ << endl;
+ cout << " 2. Missing genotoypes will be repalced with the mean genotype of "
+ "that SNP."
+ << endl;
+ cout << " 3. For lmm analysis, memory should be large enough to hold the "
+ "relatedness matrix and to perform eigen decomposition."
+ << endl;
+ cout << " 4. For multivariate lmm analysis, use a large -pnr for each snp "
+ "will increase computation time dramatically."
+ << endl;
+ cout << " 5. For bslmm analysis, in addition to 3, memory should be large "
+ "enough to hold the whole genotype matrix."
+ << endl;
+ cout << endl;
}
return;
@@ -457,2609 +700,2985 @@ void GEMMA::PrintHelp(size_t option) {
// calccor: 71
// gw: 72
-void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) {
- string str;
-
- for(int i = 1; i < argc; i++) {
- if (strcmp(argv[i], "-bfile")==0 ||
- strcmp(argv[i], "--bfile")==0 ||
- strcmp(argv[i], "-b")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_bfile=str;
- }
- else if (strcmp(argv[i], "-mbfile")==0 ||
- strcmp(argv[i], "--mbfile")==0 ||
- strcmp(argv[i], "-mb")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mbfile=str;
- }
- else if (strcmp(argv[i], "-silence")==0) {
- cPar.mode_silence=true;
- }
- else if (strcmp(argv[i], "-g")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_geno=str;
- }
- else if (strcmp(argv[i], "-mg")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mgeno=str;
- }
- else if (strcmp(argv[i], "-p")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_pheno=str;
- }
- else if (strcmp(argv[i], "-a")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_anno=str;
- }
-
- // WJA added.
- else if (strcmp(argv[i], "-oxford")==0 ||
- strcmp(argv[i], "--oxford")==0 ||
- strcmp(argv[i], "-x")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_oxford=str;
- }
- else if (strcmp(argv[i], "-gxe")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_gxe=str;
- }
- else if (strcmp(argv[i], "-widv")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_weight=str;
- }
- else if (strcmp(argv[i], "-wsnp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_wsnp=str;
- }
- else if (strcmp(argv[i], "-wcat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_wcat=str;
- }
- else if (strcmp(argv[i], "-k")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_kin=str;
- }
- else if (strcmp(argv[i], "-mk")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mk=str;
- }
- else if (strcmp(argv[i], "-u")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_ku=str;
- }
- else if (strcmp(argv[i], "-d")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_kd=str;
- }
- else if (strcmp(argv[i], "-c")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_cvt=str;
- }
- else if (strcmp(argv[i], "-cat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_cat=str;
- }
- else if (strcmp(argv[i], "-mcat")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mcat=str;
- }
- else if (strcmp(argv[i], "-catc")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_catc=str;
- }
- else if (strcmp(argv[i], "-mcatc")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mcatc=str;
- }
- else if (strcmp(argv[i], "-beta")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_beta=str;
- }
- else if (strcmp(argv[i], "-bf")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_bf=str;
- }
- else if (strcmp(argv[i], "-hyp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_hyp=str;
- }
- else if (strcmp(argv[i], "-cor")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_cor=str;
- }
- else if (strcmp(argv[i], "-study")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_study=str;
- }
- else if (strcmp(argv[i], "-ref")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_ref=str;
- }
- else if (strcmp(argv[i], "-mstudy")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mstudy=str;
- }
- else if (strcmp(argv[i], "-mref")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_mref=str;
- }
- else if (strcmp(argv[i], "-epm")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_epm=str;
- }
- else if (strcmp(argv[i], "-en")==0) {
- while (argv[i+1] != NULL && argv[i+1][0] != '-') {
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.est_column.push_back(atoi(str.c_str()));
- }
- }
- else if (strcmp(argv[i], "-ebv")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_ebv=str;
- }
- else if (strcmp(argv[i], "-emu")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_log=str;
- }
- else if (strcmp(argv[i], "-mu")==0) {
- if(argv[i+1] == NULL) {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.pheno_mean=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-gene")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_gene=str;
- }
- else if (strcmp(argv[i], "-r")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_read=str;
- }
- else if (strcmp(argv[i], "-snps")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_snps=str;
- }
- else if (strcmp(argv[i], "-km")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.k_mode=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-n")==0) {
- (cPar.p_column).clear();
- while (argv[i+1] != NULL && argv[i+1][0] != '-') {
- ++i;
- str.clear();
- str.assign(argv[i]);
- (cPar.p_column).push_back(atoi(str.c_str()));
- }
- }
- else if (strcmp(argv[i], "-pace")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.d_pace=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-outdir")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.path_out=str;
- }
- else if (strcmp(argv[i], "-o")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_out=str;
- }
- else if (strcmp(argv[i], "-miss")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.miss_level=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-maf")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- if (cPar.maf_level!=-1) {cPar.maf_level=atof(str.c_str());}
- }
- else if (strcmp(argv[i], "-hwe")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.hwe_level=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-r2")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.r2_level=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-notsnp")==0) {
- cPar.maf_level=-1;
- }
- else if (strcmp(argv[i], "-gk")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=21; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=20+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-gs")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=25; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=24+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-gq")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=27; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=26+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-gw")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=72; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=71+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-sample")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.ni_subsample=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-eigen")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=31; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=30+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-calccor")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=71; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=70+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-vc")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=61; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=60+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-ci")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=66; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=65+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-pve")==0) {
- double s=0;
- while (argv[i+1] != NULL && (argv[i+1][0] != '-' || !isalpha(argv[i+1][1]) ) ) {
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.v_pve.push_back(atof(str.c_str()));
- s+=atof(str.c_str());
- }
- if (s==1) {
- cout<<"summation of pve equals one."<<endl;
- }
- }
- else if (strcmp(argv[i], "-blocks")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.n_block=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-noconstrain")==0) {
- cPar.noconstrain=true;
- }
- else if (strcmp(argv[i], "-lm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=51; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=50+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-fa")==0 || strcmp(argv[i], "-lmm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=1; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-lmin")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.l_min=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-lmax")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.l_max=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-region")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.n_region=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-pnr")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.p_nr=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-emi")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.em_iter=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-nri")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.nr_iter=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-emp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.em_prec=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-nrp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.nr_prec=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-crt")==0) {
- cPar.crt=1;
- }
- else if (strcmp(argv[i], "-bslmm")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=11; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=10+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-hmin")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.h_min=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-hmax")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.h_max=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-rmin")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.rho_min=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-rmax")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.rho_max=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-pmin")==0) {
- if(argv[i+1] == NULL) {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.logp_min=atof(str.c_str())*log(10.0);
- }
- else if (strcmp(argv[i], "-pmax")==0) {
- if(argv[i+1] == NULL) {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.logp_max=atof(str.c_str())*log(10.0);
- }
- else if (strcmp(argv[i], "-smin")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.s_min=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-smax")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.s_max=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-gmean")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.geo_mean=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-hscale")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.h_scale=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-rscale")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.rho_scale=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-pscale")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.logp_scale=atof(str.c_str())*log(10.0);
- }
- else if (strcmp(argv[i], "-w")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.w_step=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-s")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.s_step=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-rpace")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.r_pace=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-wpace")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.w_pace=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-seed")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.randseed=atol(str.c_str());
- }
- else if (strcmp(argv[i], "-mh")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.n_mh=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-predict")==0) {
- if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;}
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=41; continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.a_mode=40+atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-windowcm")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.window_cm=atof(str.c_str());
- }
- else if (strcmp(argv[i], "-windowbp")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.window_bp=atoi(str.c_str());
- }
- else if (strcmp(argv[i], "-windowns")==0) {
- if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;}
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.window_ns=atoi(str.c_str());
- }
- else {cout<<"error! unrecognized option: "<<argv[i]<<endl; cPar.error=true; continue;}
- }
-
- // Change prediction mode to 43 if the epm file is not provided.
- if (cPar.a_mode==41 && cPar.file_epm.empty()) {
- cPar.a_mode=43;
- }
-
- return;
+void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
+ string str;
+
+ for (int i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "-bfile") == 0 || strcmp(argv[i], "--bfile") == 0 ||
+ strcmp(argv[i], "-b") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_bfile = str;
+ } else if (strcmp(argv[i], "-mbfile") == 0 ||
+ strcmp(argv[i], "--mbfile") == 0 ||
+ strcmp(argv[i], "-mb") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mbfile = str;
+ } else if (strcmp(argv[i], "-silence") == 0) {
+ cPar.mode_silence = true;
+ } else if (strcmp(argv[i], "-g") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_geno = str;
+ } else if (strcmp(argv[i], "-mg") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mgeno = str;
+ } else if (strcmp(argv[i], "-p") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_pheno = str;
+ } else if (strcmp(argv[i], "-a") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_anno = str;
+ }
+
+ // WJA added.
+ else if (strcmp(argv[i], "-oxford") == 0 ||
+ strcmp(argv[i], "--oxford") == 0 || strcmp(argv[i], "-x") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_oxford = str;
+ } else if (strcmp(argv[i], "-gxe") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_gxe = str;
+ } else if (strcmp(argv[i], "-widv") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_weight = str;
+ } else if (strcmp(argv[i], "-wsnp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_wsnp = str;
+ } else if (strcmp(argv[i], "-wcat") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_wcat = str;
+ } else if (strcmp(argv[i], "-k") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_kin = str;
+ } else if (strcmp(argv[i], "-mk") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mk = str;
+ } else if (strcmp(argv[i], "-u") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_ku = str;
+ } else if (strcmp(argv[i], "-d") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_kd = str;
+ } else if (strcmp(argv[i], "-c") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_cvt = str;
+ } else if (strcmp(argv[i], "-cat") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_cat = str;
+ } else if (strcmp(argv[i], "-mcat") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mcat = str;
+ } else if (strcmp(argv[i], "-catc") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_catc = str;
+ } else if (strcmp(argv[i], "-mcatc") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mcatc = str;
+ } else if (strcmp(argv[i], "-beta") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_beta = str;
+ } else if (strcmp(argv[i], "-bf") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_bf = str;
+ } else if (strcmp(argv[i], "-hyp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_hyp = str;
+ } else if (strcmp(argv[i], "-cor") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_cor = str;
+ } else if (strcmp(argv[i], "-study") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_study = str;
+ } else if (strcmp(argv[i], "-ref") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_ref = str;
+ } else if (strcmp(argv[i], "-mstudy") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mstudy = str;
+ } else if (strcmp(argv[i], "-mref") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_mref = str;
+ } else if (strcmp(argv[i], "-epm") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_epm = str;
+ } else if (strcmp(argv[i], "-en") == 0) {
+ while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.est_column.push_back(atoi(str.c_str()));
+ }
+ } else if (strcmp(argv[i], "-ebv") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_ebv = str;
+ } else if (strcmp(argv[i], "-emu") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_log = str;
+ } else if (strcmp(argv[i], "-mu") == 0) {
+ if (argv[i + 1] == NULL) {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.pheno_mean = atof(str.c_str());
+ } else if (strcmp(argv[i], "-gene") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_gene = str;
+ } else if (strcmp(argv[i], "-r") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_read = str;
+ } else if (strcmp(argv[i], "-snps") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_snps = str;
+ } else if (strcmp(argv[i], "-km") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.k_mode = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-n") == 0) {
+ (cPar.p_column).clear();
+ while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ (cPar.p_column).push_back(atoi(str.c_str()));
+ }
+ } else if (strcmp(argv[i], "-pace") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.d_pace = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-outdir") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.path_out = str;
+ } else if (strcmp(argv[i], "-o") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.file_out = str;
+ } else if (strcmp(argv[i], "-miss") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.miss_level = atof(str.c_str());
+ } else if (strcmp(argv[i], "-maf") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ if (cPar.maf_level != -1) {
+ cPar.maf_level = atof(str.c_str());
+ }
+ } else if (strcmp(argv[i], "-hwe") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.hwe_level = atof(str.c_str());
+ } else if (strcmp(argv[i], "-r2") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.r2_level = atof(str.c_str());
+ } else if (strcmp(argv[i], "-notsnp") == 0) {
+ cPar.maf_level = -1;
+ } else if (strcmp(argv[i], "-gk") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 21;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 20 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-gs") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 25;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 24 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-gq") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 27;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 26 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-gw") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 72;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 71 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-sample") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.ni_subsample = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-eigen") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 31;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 30 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-calccor") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 71;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 70 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-vc") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 61;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 60 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-ci") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 66;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 65 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-pve") == 0) {
+ double s = 0;
+ while (argv[i + 1] != NULL &&
+ (argv[i + 1][0] != '-' || !isalpha(argv[i + 1][1]))) {
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.v_pve.push_back(atof(str.c_str()));
+ s += atof(str.c_str());
+ }
+ if (s == 1) {
+ cout << "summation of pve equals one." << endl;
+ }
+ } else if (strcmp(argv[i], "-blocks") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.n_block = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-noconstrain") == 0) {
+ cPar.noconstrain = true;
+ } else if (strcmp(argv[i], "-lm") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 51;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 50 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-fa") == 0 || strcmp(argv[i], "-lmm") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 1;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-lmin") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.l_min = atof(str.c_str());
+ } else if (strcmp(argv[i], "-lmax") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.l_max = atof(str.c_str());
+ } else if (strcmp(argv[i], "-region") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.n_region = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-pnr") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.p_nr = atof(str.c_str());
+ } else if (strcmp(argv[i], "-emi") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.em_iter = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-nri") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.nr_iter = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-emp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.em_prec = atof(str.c_str());
+ } else if (strcmp(argv[i], "-nrp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.nr_prec = atof(str.c_str());
+ } else if (strcmp(argv[i], "-crt") == 0) {
+ cPar.crt = 1;
+ } else if (strcmp(argv[i], "-bslmm") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 11;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 10 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-hmin") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.h_min = atof(str.c_str());
+ } else if (strcmp(argv[i], "-hmax") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.h_max = atof(str.c_str());
+ } else if (strcmp(argv[i], "-rmin") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.rho_min = atof(str.c_str());
+ } else if (strcmp(argv[i], "-rmax") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.rho_max = atof(str.c_str());
+ } else if (strcmp(argv[i], "-pmin") == 0) {
+ if (argv[i + 1] == NULL) {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.logp_min = atof(str.c_str()) * log(10.0);
+ } else if (strcmp(argv[i], "-pmax") == 0) {
+ if (argv[i + 1] == NULL) {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.logp_max = atof(str.c_str()) * log(10.0);
+ } else if (strcmp(argv[i], "-smin") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.s_min = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-smax") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.s_max = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-gmean") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.geo_mean = atof(str.c_str());
+ } else if (strcmp(argv[i], "-hscale") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.h_scale = atof(str.c_str());
+ } else if (strcmp(argv[i], "-rscale") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.rho_scale = atof(str.c_str());
+ } else if (strcmp(argv[i], "-pscale") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.logp_scale = atof(str.c_str()) * log(10.0);
+ } else if (strcmp(argv[i], "-w") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.w_step = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-s") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.s_step = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-rpace") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.r_pace = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-wpace") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.w_pace = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-seed") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.randseed = atol(str.c_str());
+ } else if (strcmp(argv[i], "-mh") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.n_mh = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-predict") == 0) {
+ if (cPar.a_mode != 0) {
+ cPar.error = true;
+ cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm "
+ "-predict -calccor options is allowed."
+ << endl;
+ break;
+ }
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ cPar.a_mode = 41;
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.a_mode = 40 + atoi(str.c_str());
+ } else if (strcmp(argv[i], "-windowcm") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_cm = atof(str.c_str());
+ } else if (strcmp(argv[i], "-windowbp") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_bp = atoi(str.c_str());
+ } else if (strcmp(argv[i], "-windowns") == 0) {
+ if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+ continue;
+ }
+ ++i;
+ str.clear();
+ str.assign(argv[i]);
+ cPar.window_ns = atoi(str.c_str());
+ } else {
+ cout << "error! unrecognized option: " << argv[i] << endl;
+ cPar.error = true;
+ continue;
+ }
+ }
+
+ // Change prediction mode to 43 if the epm file is not provided.
+ if (cPar.a_mode == 41 && cPar.file_epm.empty()) {
+ cPar.a_mode = 43;
+ }
+
+ return;
}
-void GEMMA::BatchRun (PARAM &cPar) {
- clock_t time_begin, time_start;
- time_begin=clock();
-
- // Read Files.
- cout<<"Reading Files ... "<<endl;
- cPar.ReadFiles();
- if (cPar.error==true) {cout<<"error! fail to read files. "<<endl; return;}
- cPar.CheckData();
- if (cPar.error==true) {cout<<"error! fail to check data. "<<endl; return;}
-
- //Prediction for bslmm
- if (cPar.a_mode==41 || cPar.a_mode==42) {
- gsl_vector *y_prdt;
-
- y_prdt=gsl_vector_alloc (cPar.ni_total-cPar.ni_test);
-
- //set to zero
- gsl_vector_set_zero (y_prdt);
-
- PRDT cPRDT;
- cPRDT.CopyFromParam(cPar);
-
- //add breeding value if needed
- if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
- cout<<"Adding Breeding Values ... "<<endl;
-
- gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total);
- gsl_vector *u_hat=gsl_vector_alloc (cPar.ni_test);
-
- //read kinship matrix and set u_hat
- vector<int> indicator_all;
- size_t c_bv=0;
- for (size_t i=0; i<cPar.indicator_idv.size(); i++) {
- indicator_all.push_back(1);
- if (cPar.indicator_bv[i]==1) {gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]); c_bv++;}
- }
-
- ReadFile_kin (cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //read u
- cPRDT.AddBV(G, u_hat, y_prdt);
-
- gsl_matrix_free(G);
- gsl_vector_free(u_hat);
- }
-
- //add beta
- if (!cPar.file_bfile.empty()) {
- cPRDT.AnalyzePlink (y_prdt);
- }
- else {
- cPRDT.AnalyzeBimbam (y_prdt);
- }
-
- //add mu
- gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
-
- //convert y to probability if needed
- if (cPar.a_mode==42) {
- double d;
- for (size_t i=0; i<y_prdt->size; i++) {
- d=gsl_vector_get(y_prdt, i);
- d=gsl_cdf_gaussian_P(d, 1.0);
- gsl_vector_set(y_prdt, i, d);
- }
- }
-
-
- cPRDT.CopyToParam(cPar);
-
- cPRDT.WriteFiles(y_prdt);
-
- gsl_vector_free(y_prdt);
- }
-
- //Prediction with kinship matrix only; for one or more phenotypes
- if (cPar.a_mode==43) {
- //first, use individuals with full phenotypes to obtain estimates of Vg and Ve
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
- gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
- gsl_vector *eval=gsl_vector_alloc (Y->size1);
-
- gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph);
- gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
- cPar.CopyCvtPhen (W_full, Y_full, 1);
-
- gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph);
- gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1);
- gsl_matrix *H_full=gsl_matrix_alloc (Y_full->size1*Y_hat->size2, Y_full->size1*Y_hat->size2);
-
- //read relatedness matrix G, and matrix G_full
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
- ReadFile_kin (cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, cPar.error, G_full);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
- CenterMatrix (G_full);
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- //calculate variance component and beta estimates
- //and then obtain predicted values
- if (cPar.n_ph==1) {
- gsl_vector *beta=gsl_vector_alloc (W->size2);
- gsl_vector *se_beta=gsl_vector_alloc (W->size2);
-
- double lambda, logl, vg, ve;
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- //obtain estimates
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, lambda, logl);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, lambda, vg, ve, beta, se_beta);
-
- cout<<"REMLE estimate for vg in the null model = "<<vg<<endl;
- cout<<"REMLE estimate for ve in the null model = "<<ve<<endl;
- cPar.vg_remle_null=vg; cPar.ve_remle_null=ve;
-
- //obtain Y_hat from fixed effects
- gsl_vector_view Yhat_col=gsl_matrix_column (Y_hat, 0);
- gsl_blas_dgemv (CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
-
- //obtain H
- gsl_matrix_set_identity (H_full);
- gsl_matrix_scale (H_full, ve);
- gsl_matrix_scale (G_full, vg);
- gsl_matrix_add (H_full, G_full);
-
- //free matrices
- gsl_vector_free(beta);
- gsl_vector_free(se_beta);
- } else {
- gsl_matrix *Vg=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph);
- gsl_matrix *Ve=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph);
- gsl_matrix *B=gsl_matrix_alloc (cPar.n_ph, W->size2);
- gsl_matrix *se_B=gsl_matrix_alloc (cPar.n_ph, W->size2);
-
- //obtain estimates
- CalcMvLmmVgVeBeta (eval, UtW, UtY, cPar.em_iter, cPar.nr_iter, cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max, cPar.n_region, Vg, Ve, B, se_B);
-
- cout<<"REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<Vg->size1; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(Vg, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<Ve->size1; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(Ve, i, j)<<"\t";
- }
- cout<<endl;
- }
- cPar.Vg_remle_null.clear();
- cPar.Ve_remle_null.clear();
- for (size_t i=0; i<Vg->size1; i++) {
- for (size_t j=i; j<Vg->size2; j++) {
- cPar.Vg_remle_null.push_back(gsl_matrix_get (Vg, i, j) );
- cPar.Ve_remle_null.push_back(gsl_matrix_get (Ve, i, j) );
- }
- }
-
- //obtain Y_hat from fixed effects
- gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
-
- //obtain H
- KroneckerSym(G_full, Vg, H_full);
- for (size_t i=0; i<G_full->size1; i++) {
- gsl_matrix_view H_sub=gsl_matrix_submatrix (H_full, i*Ve->size1, i*Ve->size2, Ve->size1, Ve->size2);
- gsl_matrix_add (&H_sub.matrix, Ve);
- }
-
- //free matrices
- gsl_matrix_free (Vg);
- gsl_matrix_free (Ve);
- gsl_matrix_free (B);
- gsl_matrix_free (se_B);
- }
-
- PRDT cPRDT;
-
- cPRDT.CopyFromParam(cPar);
-
- cout<<"Predicting Missing Phentypes ... "<<endl;
- time_start=clock();
- cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- cPRDT.WriteFiles(Y_full);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(W);
- gsl_matrix_free(G);
- gsl_matrix_free(U);
- gsl_matrix_free(UtW);
- gsl_matrix_free(UtY);
- gsl_vector_free(eval);
-
- gsl_matrix_free(Y_full);
- gsl_matrix_free(Y_hat);
- gsl_matrix_free(W_full);
- gsl_matrix_free(G_full);
- gsl_matrix_free(H_full);
- }
-
-
- //Generate Kinship matrix
- if (cPar.a_mode==21 || cPar.a_mode==22) {
- cout<<"Calculating Relatedness Matrix ... "<<endl;
-
- gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total);
-
- time_start=clock();
- cPar.CalcKin (G);
- cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate relatedness matrix. "<<endl; return;}
-
- if (cPar.a_mode==21) {
- cPar.WriteMatrix (G, "cXX");
- } else {
- cPar.WriteMatrix (G, "sXX");
- }
-
- gsl_matrix_free (G);
- }
-
- //Compute the LDSC weights (not implemented yet)
- if (cPar.a_mode==72) {
- cout<<"Calculating Weights ... "<<endl;
-
- VARCOV cVarcov;
- cVarcov.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- cVarcov.AnalyzePlink ();
- } else {
- cVarcov.AnalyzeBimbam ();
- }
-
- cVarcov.CopyToParam(cPar);
- }
-
- // Compute the S matrix (and its variance), that is used for
- // variance component estimation using summary statistics.
- if (cPar.a_mode==25 || cPar.a_mode==26) {
- cout<<"Calculating the S Matrix ... "<<endl;
-
- gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
- gsl_vector *ns=gsl_vector_alloc (cPar.n_vc+1);
- gsl_matrix_set_zero(S);
- gsl_vector_set_zero(ns);
-
- gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
- gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
- gsl_vector_view ns_vec=gsl_vector_subvector(ns, 0, cPar.n_vc);
-
- gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
- gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
- gsl_matrix_set_zero (K);
- gsl_matrix_set_zero (A);
-
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
-
- cPar.CopyCvtPhen (W, y, 0);
-
- set<string> setSnps_beta;
- map <string, double> mapRS2wA, mapRS2wK;
-
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- time_start=clock();
- cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &ns_vec.vector);
- cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
- gsl_vector_set (ns, cPar.n_vc, cPar.ni_test);
-
- cPar.WriteMatrix (S, "S");
- cPar.WriteVector (ns, "size");
- cPar.WriteVar ("snps");
-
- gsl_matrix_free (S);
- gsl_vector_free (ns);
-
- gsl_matrix_free (A);
- gsl_matrix_free (K);
-
- gsl_vector_free (y);
- gsl_matrix_free (K);
- }
-
- //Compute the q vector, that is used for variance component estimation using summary statistics
- if (cPar.a_mode==27 || cPar.a_mode==28) {
- gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
- gsl_vector_set_zero (q);
- gsl_vector_set_zero (s);
-
- gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
-
- vector<size_t> vec_cat, vec_ni;
- vector<double> vec_weight, vec_z2;
- map<string, double> mapRS2weight;
- mapRS2weight.clear();
-
- time_start=clock();
- ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, cPar.ns_test);
- cout<<"## number of total individuals = "<<cPar.ni_total<<endl;
- cout<<"## number of total SNPs = "<<cPar.ns_total<<endl;
- cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- cout<<"## number of variance components = "<<cPar.n_vc<<endl;
- cout<<"Calculating the q vector ... "<<endl;
- Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
- cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- if (cPar.error==true) {cout<<"error! fail to calculate the q vector. "<<endl; return;}
-
- gsl_vector_set (s, cPar.n_vc, cPar.ni_total);
-
- cPar.WriteMatrix (Vq, "Vq");
- cPar.WriteVector (q, "q");
- cPar.WriteVector (s, "size");
- /*
- for (size_t i=0; i<cPar.n_vc; i++) {
- cout<<gsl_vector_get(q, i)<<endl;
- }
- */
- gsl_matrix_free (Vq);
- gsl_vector_free (q);
- gsl_vector_free (s);
- }
-
- // Calculate SNP covariance.
- if (cPar.a_mode==71) {
- VARCOV cVarcov;
- cVarcov.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- cVarcov.AnalyzePlink ();
- } else {
- cVarcov.AnalyzeBimbam ();
- }
-
- cVarcov.CopyToParam(cPar);
- }
-
- // LM.
- if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
-
- //Fit LM or mvLM
- if (cPar.n_ph==1) {
- LM cLm;
- cLm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
-
- if (!cPar.file_gene.empty()) {
- cLm.AnalyzeGene (W, &Y_col.vector); //y is the predictor, not the phenotype
- } else if (!cPar.file_bfile.empty()) {
- cLm.AnalyzePlink (W, &Y_col.vector);
- } else if (!cPar.file_oxford.empty()) {
- cLm.Analyzebgen (W, &Y_col.vector);
- } else {
- cLm.AnalyzeBimbam (W, &Y_col.vector);
- }
-
- cLm.WriteFiles();
- cLm.CopyToParam(cPar);
- }
- /*
- else {
- MVLM cMvlm;
- cMvlm.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- cMvlm.AnalyzePlink (W, Y);
- } else {
- cMvlm.AnalyzeBimbam (W, Y);
- }
-
- cMvlm.WriteFiles();
- cMvlm.CopyToParam(cPar);
- }
- */
- //release all matrices and vectors
- gsl_matrix_free (Y);
- gsl_matrix_free (W);
- }
-
- //VC estimation with one or multiple kinship matrices
- //REML approach only
- //if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp
- //for one phenotype only;
- if (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) {
- if (!cPar.file_beta.empty() ) {
- //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
- //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
- //update indicator_snps, so that the numbers are in accordance with mapRS2wK
- set<string> setSnps_beta;
- ReadFile_snps_header (cPar.file_beta, setSnps_beta);
-
- map <string, double> mapRS2wA, mapRS2wK;
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- cPar.UpdateSNP (mapRS2wK);
-
- // Setup matrices and vectors.
- gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc);
- gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
-
- gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
- gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test);
-
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt);
-
- gsl_matrix_set_zero (K);
- gsl_matrix_set_zero (A);
-
- gsl_matrix_set_zero(S);
- gsl_matrix_set_zero(Vq);
- gsl_vector_set_zero (q);
- gsl_vector_set_zero (s);
-
- cPar.CopyCvtPhen (W, y, 0);
-
- gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
- gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
- gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc);
-
- vector<size_t> vec_cat, vec_ni;
- vector<double> vec_weight, vec_z2;
-
- //read beta, based on the mapRS2wK
- ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, cPar.ns_test);
-
- cout<<"Study Panel: "<<endl;
- cout<<"## number of total individuals = "<<cPar.ni_study<<endl;
- cout<<"## number of total SNPs = "<<cPar.ns_study<<endl;
- cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- cout<<"## number of variance components = "<<cPar.n_vc<<endl;
-
- //compute q
- Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
-
- //compute S
- time_start=clock();
- cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector);
- cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
- //compute vc estimates
- CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
- //if LDSC weights, then compute the weights and run the above steps again
- if (cPar.a_mode==62) {
- //compute the weights and normalize the weights for A
- cPar.UpdateWeight (1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
-
- //read beta file again, and update weigths vector
- ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_total, cPar.ns_test);
-
- //compute q
- Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector);
-
- //compute S
- time_start=clock();
- cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector);
- cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;}
-
- //compute vc estimates
- CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
- }
-
- gsl_vector_set (s, cPar.n_vc, cPar.ni_test);
-
- cPar.WriteMatrix (S, "S");
- cPar.WriteMatrix (Vq, "Vq");
- cPar.WriteVector (q, "q");
- cPar.WriteVector (s, "size");
-
- gsl_matrix_free (S);
- gsl_matrix_free (Vq);
- gsl_vector_free (q);
- gsl_vector_free (s);
-
- gsl_matrix_free (A);
- gsl_matrix_free (K);
- gsl_vector_free (y);
- gsl_matrix_free (W);
- } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
- if (!cPar.file_study.empty()) {
- string sfile=cPar.file_study+".size.txt";
- CountFileLines (sfile, cPar.n_vc);
- } else {
- string file_name;
- igzstream infile (cPar.file_mstudy.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open mstudy file: "<<cPar.file_study<<endl; return;}
-
- safeGetline(infile, file_name);
-
- infile.clear();
- infile.close();
-
- string sfile=file_name+".size.txt";
- CountFileLines (sfile, cPar.n_vc);
- }
-
- cPar.n_vc=cPar.n_vc-1;
-
- gsl_matrix *S=gsl_matrix_alloc (2*cPar.n_vc, cPar.n_vc);
- gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- //gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1, (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
- //gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1, (n_lines*(n_lines+1))/2*(n_lines+1) );
- gsl_vector *q=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s_study=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
- gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1);
-
- gsl_matrix_set_zero(S);
- gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
- gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
-
- gsl_matrix_set_zero(Vq);
- //gsl_matrix_set_zero(V);
- //gsl_matrix_set_zero(Vslope);
- gsl_vector_set_zero(q);
- gsl_vector_set_zero(s_study);
- gsl_vector_set_zero(s_ref);
-
- if (!cPar.file_study.empty()) {
- ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
- } else {
- ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
- }
-
- if (!cPar.file_ref.empty()) {
- ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref);
- } else {
- ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref);
- }
-
- cout<<"## number of variance components = "<<cPar.n_vc<<endl;
- cout<<"## number of individuals in the sample = "<<cPar.ni_study<<endl;
- cout<<"## number of individuals in the reference = "<<cPar.ni_ref<<endl;
-
- CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
- gsl_vector_view s_sub=gsl_vector_subvector (s, 0, cPar.n_vc);
- gsl_vector_memcpy (&s_sub.vector, s_ref);
- gsl_vector_set (s, cPar.n_vc, cPar.ni_ref);
-
- cPar.WriteMatrix (S, "S");
- cPar.WriteMatrix (Vq, "Vq");
- cPar.WriteVector (q, "q");
- cPar.WriteVector (s, "size");
-
- gsl_matrix_free (S);
- gsl_matrix_free (Vq);
- //gsl_matrix_free (V);
- //gsl_matrix_free (Vslope);
- gsl_vector_free (q);
- gsl_vector_free (s_study);
- gsl_vector_free (s_ref);
- gsl_vector_free (s);
- } else {
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1*cPar.n_vc );
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
-
- //read kinship matrices
- if (!(cPar.file_mk).empty()) {
- ReadFile_mk (cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G, and obtain v_traceG
- double d=0;
- (cPar.v_traceG).clear();
- for (size_t i=0; i<cPar.n_vc; i++) {
- gsl_matrix_view G_sub=gsl_matrix_submatrix (G, 0, i*G->size1, G->size1, G->size1);
- CenterMatrix (&G_sub.matrix);
- d=0;
- for (size_t j=0; j<G->size1; j++) {
- d+=gsl_matrix_get (&G_sub.matrix, j, j);
- }
- d/=(double)G->size1;
- (cPar.v_traceG).push_back(d);
- }
- } else if (!(cPar.file_kin).empty()) {
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
-
- (cPar.v_traceG).clear();
- double d=0;
- for (size_t j=0; j<G->size1; j++) {
- d+=gsl_matrix_get (G, j, j);
- }
- d/=(double)G->size1;
- (cPar.v_traceG).push_back(d);
- }
- /*
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
-
- if (cPar.a_mode==31) {
- cPar.trace_G=EigenDecomp (G, U, eval, 1);
- } else {
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- }
-
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
-
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- } else {
- ReadFile_eigenU (cPar.file_ku, cPar.error, U);
- if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;}
-
- ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
- if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;}
-
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
- cPar.trace_G+=gsl_vector_get(eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- }
- */
- //fit multiple variance components
- if (cPar.n_ph==1) {
- // if (cPar.n_vc==1) {
- /*
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- gsl_vector_view beta=gsl_matrix_row (B, 0);
- gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
-
- cPar.beta_mle_null.clear();
- cPar.se_beta_mle_null.clear();
- for (size_t i=0; i<B->size2; i++) {
- cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
- cPar.beta_remle_null.clear();
- cPar.se_beta_remle_null.clear();
- for (size_t i=0; i<B->size2; i++) {
- cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
- cPar.PrintSummary();
-
- //calculate and output residuals
- if (cPar.a_mode==5) {
- gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
- //obtain Utu and Ute
- gsl_vector_memcpy (y_hat, &UtY_col.vector);
- gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
- double d, u, e;
- for (size_t i=0; i<eval->size; i++) {
- d=gsl_vector_get (eval, i);
- u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- gsl_vector_set (Utu_hat, i, u);
- gsl_vector_set (Ute_hat, i, e);
- }
-
- //obtain u and e
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
- //output residuals
- cPar.WriteVector(u_hat, "residU");
- cPar.WriteVector(e_hat, "residE");
-
- gsl_vector_free(u_hat);
- gsl_vector_free(e_hat);
- gsl_vector_free(y_hat);
- }
-*/
- // } else {
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- VC cVc;
- cVc.CopyFromParam(cPar);
- if (cPar.a_mode==61) {
- cVc.CalcVChe (G, W, &Y_col.vector);
- } else if (cPar.a_mode==62) {
- cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector);
- } else {
- cVc.CalcVCacl (G, W, &Y_col.vector);
- }
- cVc.CopyToParam(cPar);
- //obtain pve from sigma2
- //obtain se_pve from se_sigma2
-
- //}
- }
- }
-
- }
-
-
- //compute confidence intervals with additional summary statistics
- //we do not check the sign of z-scores here, but they have to be matched with the genotypes
- if (cPar.a_mode==66 || cPar.a_mode==67) {
- //read reference file first
- gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_matrix *Svar=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc);
- gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc);
-
- gsl_matrix_set_zero(S);
- gsl_matrix_set_zero(Svar);
- gsl_vector_set_zero(s_ref);
-
- if (!cPar.file_ref.empty()) {
- ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
- } else {
- ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
- }
-
- //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK
- //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1
- set<string> setSnps_beta;
- ReadFile_snps_header (cPar.file_beta, setSnps_beta);
-
- //obtain the weights for wA, which contains the SNP weights for SNPs used in the model
- map <string, double> mapRS2wK;
- cPar.ObtainWeight(setSnps_beta, mapRS2wK);
-
- //set up matrices and vector
- gsl_matrix *Xz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
- gsl_matrix *XWz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc);
- gsl_matrix *XtXWz=gsl_matrix_alloc (mapRS2wK.size(), cPar.n_vc*cPar.n_vc);
- gsl_vector *w=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *w1=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *z=gsl_vector_alloc (mapRS2wK.size());
- gsl_vector *s_vec=gsl_vector_alloc (cPar.n_vc);
-
- vector<size_t> vec_cat, vec_size;
- vector<double> vec_z;
-
- map <string, double> mapRS2z, mapRS2wA;
- map <string, string> mapRS2A1;
- string file_str;
-
- //update s_vec, the number of snps in each category
- for (size_t i=0; i<cPar.n_vc; i++) {
- vec_size.push_back(0);
- }
-
- for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
- vec_size[cPar.mapRS2cat[it->first]]++;
- }
-
- for (size_t i=0; i<cPar.n_vc; i++) {
- gsl_vector_set(s_vec, i, vec_size[i]);
- }
-
- //update mapRS2wA using v_pve and s_vec
- if (cPar.a_mode==66) {
- for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) {
- mapRS2wA[it->first]=1;
- }
- } else {
- cPar.UpdateWeight (0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
- }
-
- //read in z-scores based on allele 0, and save that into a vector
- ReadFile_beta (cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
-
- //update snp indicator, save weights to w, save z-scores to vec_z, save category label to vec_cat
- //sign of z is determined by matching alleles
- cPar.UpdateSNPnZ (mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
-
- //compute an n by k matrix of X_iWz
- cout<<"Calculating Xz ... "<<endl;
-
- gsl_matrix_set_zero(Xz);
- gsl_vector_set_all (w1, 1);
-
- if (!cPar.file_bfile.empty() ) {
- file_str=cPar.file_bfile+".bed";
- PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
- } else if (!cPar.file_geno.empty()) {
- BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
- } else if (!cPar.file_mbfile.empty() ){
- MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz);
- } else if (!cPar.file_mgeno.empty()) {
- MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz);
- }
- /*
- cout<<"Xz: "<<endl;
- for (size_t i=0; i<5; i++) {
- for (size_t j=0; j<cPar.n_vc; j++) {
- cout<<gsl_matrix_get (Xz, i, j)<<" ";
- }
- cout<<endl;
- }
- */
- if (cPar.a_mode==66) {
- gsl_matrix_memcpy (XWz, Xz);
- } else if (cPar.a_mode==67) {
- cout<<"Calculating XWz ... "<<endl;
-
- gsl_matrix_set_zero(XWz);
-
- if (!cPar.file_bfile.empty() ) {
- file_str=cPar.file_bfile+".bed";
- PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz);
- } else if (!cPar.file_geno.empty()) {
- BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz);
- } else if (!cPar.file_mbfile.empty() ){
- MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz);
- } else if (!cPar.file_mgeno.empty()) {
- MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz);
- }
- }
- /*
- cout<<"XWz: "<<endl;
- for (size_t i=0; i<5; i++) {
- cout<<gsl_vector_get (w, i)<<endl;
- for (size_t j=0; j<cPar.n_vc; j++) {
- cout<<gsl_matrix_get (XWz, i, j)<<" ";
- }
- cout<<endl;
- }
- */
- //compute an p by k matrix of X_j^TWX_iWz
- cout<<"Calculating XtXWz ... "<<endl;
- gsl_matrix_set_zero(XtXWz);
-
- if (!cPar.file_bfile.empty() ) {
- file_str=cPar.file_bfile+".bed";
- PlinkXtXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz);
- } else if (!cPar.file_geno.empty()) {
- BimbamXtXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz);
- } else if (!cPar.file_mbfile.empty() ){
- MFILEXtXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz);
- } else if (!cPar.file_mgeno.empty()) {
- MFILEXtXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz);
- }
- /*
- cout<<"XtXWz: "<<endl;
- for (size_t i=0; i<5; i++) {
- for (size_t j=0; j<cPar.n_vc; j++) {
- cout<<gsl_matrix_get (XtXWz, i, j)<<" ";
- }
- cout<<endl;
- }
- */
- //compute confidence intervals
- CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
-
- //write files
- //cPar.WriteMatrix (XWz, "XWz");
- //cPar.WriteMatrix (XtXWz, "XtXWz");
- //cPar.WriteVector (w, "w");
-
- gsl_matrix_free(S);
- gsl_matrix_free(Svar);
- gsl_vector_free(s_ref);
-
- gsl_matrix_free(Xz);
- gsl_matrix_free(XWz);
- gsl_matrix_free(XtXWz);
- gsl_vector_free(w);
- gsl_vector_free(w1);
- gsl_vector_free(z);
- gsl_vector_free(s_vec);
- }
-
-
- //LMM or mvLMM or Eigen-Decomposition
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==31) { //Fit LMM or mvLMM or eigen
- gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
- gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt);
- gsl_matrix *B=gsl_matrix_alloc (Y->size2, W->size2); //B is a d by c matrix
- gsl_matrix *se_B=gsl_matrix_alloc (Y->size2, W->size2);
- gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1);
- gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2);
- gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2);
- gsl_vector *eval=gsl_vector_alloc (Y->size1);
- gsl_vector *env=gsl_vector_alloc (Y->size1);
- gsl_vector *weight=gsl_vector_alloc (Y->size1);
-
- //set covariates matrix W and phenotype matrix Y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, Y, 0);
- if (!cPar.file_gxe.empty()) {cPar.CopyGxe (env);}
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
-
- //is residual weights are provided, then
- if (!cPar.file_weight.empty()) {
- cPar.CopyWeight (weight);
- double d, wi, wj;
- for (size_t i=0; i<G->size1; i++) {
- wi=gsl_vector_get(weight, i);
- for (size_t j=i; j<G->size2; j++) {
- wj=gsl_vector_get(weight, j);
- d=gsl_matrix_get(G, i, j);
- if (wi<=0 || wj<=0) {d=0;} else {d/=sqrt(wi*wj);}
- gsl_matrix_set(G, i, j, d);
- if (j!=i) {gsl_matrix_set(G, j, i, d);}
- }
- }
- }
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
-
- if (cPar.a_mode==31) {
- cPar.trace_G=EigenDecomp (G, U, eval, 1);
- } else {
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- }
-
- if (!cPar.file_weight.empty()) {
- double wi;
- for (size_t i=0; i<U->size1; i++) {
- wi=gsl_vector_get(weight, i);
- if (wi<=0) {wi=0;} else {wi=sqrt(wi);}
- gsl_vector_view Urow=gsl_matrix_row (U, i);
- gsl_vector_scale (&Urow.vector, wi);
- }
- }
-
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
-
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- } else {
- ReadFile_eigenU (cPar.file_ku, cPar.error, U);
- if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;}
-
- ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
- if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;}
-
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
- cPar.trace_G+=gsl_vector_get(eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- }
-
- if (cPar.a_mode==31) {
- cPar.WriteMatrix(U, "eigenU");
- cPar.WriteVector(eval, "eigenD");
- } else if (!cPar.file_gene.empty() ) {
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- LMM cLmm;
- cLmm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype
-
- cLmm.WriteFiles();
- cLmm.CopyToParam(cPar);
- } else {
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, Y, UtY);
-
- //calculate REMLE/MLE estimate and pve for univariate model
- if (cPar.n_ph==1) {
- gsl_vector_view beta=gsl_matrix_row (B, 0);
- gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
-
- cPar.beta_mle_null.clear();
- cPar.se_beta_mle_null.clear();
- for (size_t i=0; i<B->size2; i++) {
- cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
- cPar.beta_remle_null.clear();
- cPar.se_beta_remle_null.clear();
- for (size_t i=0; i<B->size2; i++) {
- cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
- cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
- }
-
- CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
- cPar.PrintSummary();
-
- //calculate and output residuals
- if (cPar.a_mode==5) {
- gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
- gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
-
- //obtain Utu and Ute
- gsl_vector_memcpy (y_hat, &UtY_col.vector);
- gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
-
- double d, u, e;
- for (size_t i=0; i<eval->size; i++) {
- d=gsl_vector_get (eval, i);
- u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
- gsl_vector_set (Utu_hat, i, u);
- gsl_vector_set (Ute_hat, i, e);
- }
-
- //obtain u and e
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
-
- //output residuals
- cPar.WriteVector(u_hat, "residU");
- cPar.WriteVector(e_hat, "residE");
-
- gsl_vector_free(u_hat);
- gsl_vector_free(e_hat);
- gsl_vector_free(y_hat);
- }
- }
-
- //Fit LMM or mvLMM
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4) {
- if (cPar.n_ph==1) {
- LMM cLmm;
- cLmm.CopyFromParam(cPar);
-
- gsl_vector_view Y_col=gsl_matrix_column (Y, 0);
- gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
-
- if (!cPar.file_bfile.empty()) {
- if (cPar.file_gxe.empty()) {
- cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- } else {
- cLmm.AnalyzePlinkGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
- }
- }
- // WJA added
- else if(!cPar.file_oxford.empty()) {
- cLmm.Analyzebgen (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- }
- else {
- if (cPar.file_gxe.empty()) {
- cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- } else {
- cLmm.AnalyzeBimbamGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env);
- }
- }
-
- cLmm.WriteFiles();
- cLmm.CopyToParam(cPar);
- } else {
- MVLMM cMvlmm;
- cMvlmm.CopyFromParam(cPar);
-
- if (!cPar.file_bfile.empty()) {
- if (cPar.file_gxe.empty()) {
- cMvlmm.AnalyzePlink (U, eval, UtW, UtY);
- } else {
- cMvlmm.AnalyzePlinkGXE (U, eval, UtW, UtY, env);
- }
- }
- else if(!cPar.file_oxford.empty())
- {
- cMvlmm.Analyzebgen (U, eval, UtW, UtY);
- }
- else {
- if (cPar.file_gxe.empty()) {
- cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY);
- } else {
- cMvlmm.AnalyzeBimbamGXE (U, eval, UtW, UtY, env);
- }
- }
-
- cMvlmm.WriteFiles();
- cMvlmm.CopyToParam(cPar);
- }
- }
- }
-
-
- //release all matrices and vectors
- gsl_matrix_free (Y);
- gsl_matrix_free (W);
- gsl_matrix_free(B);
- gsl_matrix_free(se_B);
- gsl_matrix_free (G);
- gsl_matrix_free (U);
- gsl_matrix_free (UtW);
- gsl_matrix_free (UtY);
- gsl_vector_free (eval);
- gsl_vector_free (env);
- }
-
-
- //BSLMM
- if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
- gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
- //set covariates matrix W and phenotype vector y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, y, 0);
-
- //center y, even for case/control data
- cPar.pheno_mean=CenterVector(y);
-
- //run bvsr if rho==1
- if (cPar.rho_min==1 && cPar.rho_max==1) {
- //read genotypes X (not UtX)
- cPar.ReadGenotypes (UtX, G, false);
-
- //perform BSLMM analysis
- BSLMM cBslmm;
- cBslmm.CopyFromParam(cPar);
- time_start=clock();
- cBslmm.MCMC(UtX, y);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmm.CopyToParam(cPar);
- //else, if rho!=1
- } else {
- gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
- gsl_vector *eval=gsl_vector_alloc (y->size);
- gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
- gsl_vector *Uty=gsl_vector_alloc (y->size);
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- cPar.ReadGenotypes (UtX, G, false);
-
- //read relatedness matrix G
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
- } else {
- cPar.ReadGenotypes (UtX, G, true);
- }
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, y, Uty);
-
- //calculate REMLE/MLE estimate and pve
- CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
- cPar.PrintSummary();
-
- //Creat and calcualte UtX, use a large memory
- cout<<"Calculating UtX..."<<endl;
- time_start=clock();
- CalcUtX (U, UtX);
- cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //perform BSLMM or BSLMMDAP analysis
- if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- BSLMM cBslmm;
- cBslmm.CopyFromParam(cPar);
- time_start=clock();
- if (cPar.a_mode==12) { //ridge regression
- cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
- } else { //Run MCMC
- cBslmm.MCMC(U, UtX, Uty, eval, y);
- }
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmm.CopyToParam(cPar);
- } else {
- }
-
- //release all matrices and vectors
- gsl_matrix_free (G);
- gsl_matrix_free (U);
- gsl_matrix_free (UtW);
- gsl_vector_free (eval);
- gsl_vector_free (Uty);
-
- }
- gsl_matrix_free (W);
- gsl_vector_free (y);
- gsl_matrix_free (UtX);
- }
-
-
-
- //BSLMM-DAP
- if (cPar.a_mode==14 || cPar.a_mode==15 || cPar.a_mode==16) {
- if (cPar.a_mode==14) {
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (y->size, y->size);
- gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test);
-
- //set covariates matrix W and phenotype vector y
- //an intercept should be included in W,
- cPar.CopyCvtPhen (W, y, 0);
-
- //center y, even for case/control data
- cPar.pheno_mean=CenterVector(y);
-
- //run bvsr if rho==1
- if (cPar.rho_min==1 && cPar.rho_max==1) {
- //read genotypes X (not UtX)
- cPar.ReadGenotypes (UtX, G, false);
-
- //perform BSLMM analysis
- BSLMM cBslmm;
- cBslmm.CopyFromParam(cPar);
- time_start=clock();
- cBslmm.MCMC(UtX, y);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmm.CopyToParam(cPar);
- //else, if rho!=1
- } else {
- gsl_matrix *U=gsl_matrix_alloc (y->size, y->size);
- gsl_vector *eval=gsl_vector_alloc (y->size);
- gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2);
- gsl_vector *Uty=gsl_vector_alloc (y->size);
-
- //read relatedness matrix G
- if (!(cPar.file_kin).empty()) {
- cPar.ReadGenotypes (UtX, G, false);
-
- //read relatedness matrix G
- ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G);
- if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;}
-
- //center matrix G
- CenterMatrix (G);
- } else {
- cPar.ReadGenotypes (UtX, G, true);
- }
-
- //eigen-decomposition and calculate trace_G
- cout<<"Start Eigen-Decomposition..."<<endl;
- time_start=clock();
- cPar.trace_G=EigenDecomp (G, U, eval, 0);
- cPar.trace_G=0.0;
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
- cPar.trace_G+=gsl_vector_get (eval, i);
- }
- cPar.trace_G/=(double)eval->size;
- cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //calculate UtW and Uty
- CalcUtX (U, W, UtW);
- CalcUtX (U, y, Uty);
-
- //calculate REMLE/MLE estimate and pve
- CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
- CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
- CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null);
-
- cPar.PrintSummary();
-
- //Creat and calcualte UtX, use a large memory
- cout<<"Calculating UtX..."<<endl;
- time_start=clock();
- CalcUtX (U, UtX);
- cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //perform analysis; assume X and y are already centered
- BSLMMDAP cBslmmDap;
- cBslmmDap.CopyFromParam(cPar);
- time_start=clock();
- cBslmmDap.DAP_CalcBF (U, UtX, Uty, eval, y);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmmDap.CopyToParam(cPar);
-
- //release all matrices and vectors
- gsl_matrix_free (G);
- gsl_matrix_free (U);
- gsl_matrix_free (UtW);
- gsl_vector_free (eval);
- gsl_vector_free (Uty);
- }
-
- gsl_matrix_free (W);
- gsl_vector_free (y);
- gsl_matrix_free (UtX);
- } else if (cPar.a_mode==15) {
- //perform EM algorithm and estimate parameters
- vector<string> vec_rs;
- vector<double> vec_sa2, vec_sb2, wab;
- vector<vector<vector<double> > > BF;
-
- //read hyp and bf files (functions defined in BSLMMDAP)
- ReadFile_hyb (cPar.file_hyp, vec_sa2, vec_sb2, wab);
- ReadFile_bf (cPar.file_bf, vec_rs, BF);
-
- cPar.ns_test=vec_rs.size();
- if (wab.size()!=BF[0][0].size()) {cout<<"error! hyp and bf files dimension do not match"<<endl;}
-
- //load annotations
- gsl_matrix *Ac;
- gsl_matrix_int *Ad;
- gsl_vector_int *dlevel;
- size_t kc, kd;
- if (!cPar.file_cat.empty()) {
- ReadFile_cat (cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
- } else {
- kc=0; kd=0;
- }
-
- cout<<"## number of blocks = "<<BF.size()<<endl;
- cout<<"## number of analyzed SNPs = "<<vec_rs.size()<<endl;
- cout<<"## grid size for hyperparameters = "<<wab.size()<<endl;
- cout<<"## number of continuous annotations = "<<kc<<endl;
- cout<<"## number of discrete annotations = "<<kd<<endl;
-
- //DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel);
-
- //perform analysis
- BSLMMDAP cBslmmDap;
- cBslmmDap.CopyFromParam(cPar);
- time_start=clock();
- cBslmmDap.DAP_EstimateHyper (kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac, Ad, dlevel);
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cBslmmDap.CopyToParam(cPar);
-
- gsl_matrix_free(Ac);
- gsl_matrix_int_free(Ad);
- gsl_vector_int_free(dlevel);
- } else {
- //
- }
-
- }
-
-
-
-
- /*
- //LDR (change 14 to 16?)
- if (cPar.a_mode==14) {
- gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
- gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
- gsl_matrix *G=gsl_matrix_alloc (1, 1);
- vector<vector<unsigned char> > Xt;
-
- //set covariates matrix W and phenotype vector y
- //an intercept is included in W
- cPar.CopyCvtPhen (W, y, 0);
-
- //read in genotype matrix X
- cPar.ReadGenotypes (Xt, G, false);
-
- LDR cLdr;
- cLdr.CopyFromParam(cPar);
- time_start=clock();
-
- cLdr.VB(Xt, W, y);
-
- cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- cLdr.CopyToParam(cPar);
-
- gsl_vector_free (y);
- gsl_matrix_free (W);
- gsl_matrix_free (G);
- }
- */
-
- cPar.time_total=(clock()-time_begin)/(double(CLOCKS_PER_SEC)*60.0);
-
- return;
+void GEMMA::BatchRun(PARAM &cPar) {
+ clock_t time_begin, time_start;
+ time_begin = clock();
+
+ // Read Files.
+ cout << "Reading Files ... " << endl;
+ cPar.ReadFiles();
+ if (cPar.error == true) {
+ cout << "error! fail to read files. " << endl;
+ return;
+ }
+ cPar.CheckData();
+ if (cPar.error == true) {
+ cout << "error! fail to check data. " << endl;
+ return;
+ }
+
+ // Prediction for bslmm
+ if (cPar.a_mode == 41 || cPar.a_mode == 42) {
+ gsl_vector *y_prdt;
+
+ y_prdt = gsl_vector_alloc(cPar.ni_total - cPar.ni_test);
+
+ // set to zero
+ gsl_vector_set_zero(y_prdt);
+
+ PRDT cPRDT;
+ cPRDT.CopyFromParam(cPar);
+
+ // add breeding value if needed
+ if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
+ cout << "Adding Breeding Values ... " << endl;
+
+ gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+ gsl_vector *u_hat = gsl_vector_alloc(cPar.ni_test);
+
+ // read kinship matrix and set u_hat
+ vector<int> indicator_all;
+ size_t c_bv = 0;
+ for (size_t i = 0; i < cPar.indicator_idv.size(); i++) {
+ indicator_all.push_back(1);
+ if (cPar.indicator_bv[i] == 1) {
+ gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]);
+ c_bv++;
+ }
+ }
+
+ ReadFile_kin(cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // read u
+ cPRDT.AddBV(G, u_hat, y_prdt);
+
+ gsl_matrix_free(G);
+ gsl_vector_free(u_hat);
+ }
+
+ // add beta
+ if (!cPar.file_bfile.empty()) {
+ cPRDT.AnalyzePlink(y_prdt);
+ } else {
+ cPRDT.AnalyzeBimbam(y_prdt);
+ }
+
+ // add mu
+ gsl_vector_add_constant(y_prdt, cPar.pheno_mean);
+
+ // convert y to probability if needed
+ if (cPar.a_mode == 42) {
+ double d;
+ for (size_t i = 0; i < y_prdt->size; i++) {
+ d = gsl_vector_get(y_prdt, i);
+ d = gsl_cdf_gaussian_P(d, 1.0);
+ gsl_vector_set(y_prdt, i, d);
+ }
+ }
+
+ cPRDT.CopyToParam(cPar);
+
+ cPRDT.WriteFiles(y_prdt);
+
+ gsl_vector_free(y_prdt);
+ }
+
+ // Prediction with kinship matrix only; for one or more phenotypes
+ if (cPar.a_mode == 43) {
+ // first, use individuals with full phenotypes to obtain estimates of Vg and
+ // Ve
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
+ gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
+ gsl_vector *eval = gsl_vector_alloc(Y->size1);
+
+ gsl_matrix *Y_full = gsl_matrix_alloc(cPar.ni_cvt, cPar.n_ph);
+ gsl_matrix *W_full = gsl_matrix_alloc(Y_full->size1, cPar.n_cvt);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+ cPar.CopyCvtPhen(W_full, Y_full, 1);
+
+ gsl_matrix *Y_hat = gsl_matrix_alloc(Y_full->size1, cPar.n_ph);
+ gsl_matrix *G_full = gsl_matrix_alloc(Y_full->size1, Y_full->size1);
+ gsl_matrix *H_full = gsl_matrix_alloc(Y_full->size1 * Y_hat->size2,
+ Y_full->size1 * Y_hat->size2);
+
+ // read relatedness matrix G, and matrix G_full
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+ ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
+ cPar.error, G_full);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ CenterMatrix(G_full);
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ // calculate variance component and beta estimates
+ // and then obtain predicted values
+ if (cPar.n_ph == 1) {
+ gsl_vector *beta = gsl_vector_alloc(W->size2);
+ gsl_vector *se_beta = gsl_vector_alloc(W->size2);
+
+ double lambda, logl, vg, ve;
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ // obtain estimates
+ CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, lambda, logl);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, lambda, vg, ve, beta,
+ se_beta);
+
+ cout << "REMLE estimate for vg in the null model = " << vg << endl;
+ cout << "REMLE estimate for ve in the null model = " << ve << endl;
+ cPar.vg_remle_null = vg;
+ cPar.ve_remle_null = ve;
+
+ // obtain Y_hat from fixed effects
+ gsl_vector_view Yhat_col = gsl_matrix_column(Y_hat, 0);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector);
+
+ // obtain H
+ gsl_matrix_set_identity(H_full);
+ gsl_matrix_scale(H_full, ve);
+ gsl_matrix_scale(G_full, vg);
+ gsl_matrix_add(H_full, G_full);
+
+ // free matrices
+ gsl_vector_free(beta);
+ gsl_vector_free(se_beta);
+ } else {
+ gsl_matrix *Vg = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
+ gsl_matrix *Ve = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
+ gsl_matrix *B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+ gsl_matrix *se_B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+
+ // obtain estimates
+ CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter,
+ cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max,
+ cPar.n_region, Vg, Ve, B, se_B);
+
+ cout << "REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < Vg->size1; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(Vg, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < Ve->size1; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(Ve, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cPar.Vg_remle_null.clear();
+ cPar.Ve_remle_null.clear();
+ for (size_t i = 0; i < Vg->size1; i++) {
+ for (size_t j = i; j < Vg->size2; j++) {
+ cPar.Vg_remle_null.push_back(gsl_matrix_get(Vg, i, j));
+ cPar.Ve_remle_null.push_back(gsl_matrix_get(Ve, i, j));
+ }
+ }
+
+ // obtain Y_hat from fixed effects
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat);
+
+ // obtain H
+ KroneckerSym(G_full, Vg, H_full);
+ for (size_t i = 0; i < G_full->size1; i++) {
+ gsl_matrix_view H_sub = gsl_matrix_submatrix(
+ H_full, i * Ve->size1, i * Ve->size2, Ve->size1, Ve->size2);
+ gsl_matrix_add(&H_sub.matrix, Ve);
+ }
+
+ // free matrices
+ gsl_matrix_free(Vg);
+ gsl_matrix_free(Ve);
+ gsl_matrix_free(B);
+ gsl_matrix_free(se_B);
+ }
+
+ PRDT cPRDT;
+
+ cPRDT.CopyFromParam(cPar);
+
+ cout << "Predicting Missing Phentypes ... " << endl;
+ time_start = clock();
+ cPRDT.MvnormPrdt(Y_hat, H_full, Y_full);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ cPRDT.WriteFiles(Y_full);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_matrix_free(UtY);
+ gsl_vector_free(eval);
+
+ gsl_matrix_free(Y_full);
+ gsl_matrix_free(Y_hat);
+ gsl_matrix_free(W_full);
+ gsl_matrix_free(G_full);
+ gsl_matrix_free(H_full);
+ }
+
+ // Generate Kinship matrix
+ if (cPar.a_mode == 21 || cPar.a_mode == 22) {
+ cout << "Calculating Relatedness Matrix ... " << endl;
+
+ gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+
+ time_start = clock();
+ cPar.CalcKin(G);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate relatedness matrix. " << endl;
+ return;
+ }
+
+ if (cPar.a_mode == 21) {
+ cPar.WriteMatrix(G, "cXX");
+ } else {
+ cPar.WriteMatrix(G, "sXX");
+ }
+
+ gsl_matrix_free(G);
+ }
+
+ // Compute the LDSC weights (not implemented yet)
+ if (cPar.a_mode == 72) {
+ cout << "Calculating Weights ... " << endl;
+
+ VARCOV cVarcov;
+ cVarcov.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cVarcov.AnalyzePlink();
+ } else {
+ cVarcov.AnalyzeBimbam();
+ }
+
+ cVarcov.CopyToParam(cPar);
+ }
+
+ // Compute the S matrix (and its variance), that is used for
+ // variance component estimation using summary statistics.
+ if (cPar.a_mode == 25 || cPar.a_mode == 26) {
+ cout << "Calculating the S Matrix ... " << endl;
+
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
+ gsl_vector *ns = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_matrix_set_zero(S);
+ gsl_vector_set_zero(ns);
+
+ gsl_matrix_view S_mat = gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+ gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc);
+
+ gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix_set_zero(K);
+ gsl_matrix_set_zero(A);
+
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+
+ cPar.CopyCvtPhen(W, y, 0);
+
+ set<string> setSnps_beta;
+ map<string, double> mapRS2wA, mapRS2wK;
+
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &ns_vec.vector);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ gsl_vector_set(ns, cPar.n_vc, cPar.ni_test);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteVector(ns, "size");
+ cPar.WriteVar("snps");
+
+ gsl_matrix_free(S);
+ gsl_vector_free(ns);
+
+ gsl_matrix_free(A);
+ gsl_matrix_free(K);
+
+ gsl_vector_free(y);
+ gsl_matrix_free(K);
+ }
+
+ // Compute the q vector, that is used for variance component estimation using
+ // summary statistics
+ if (cPar.a_mode == 27 || cPar.a_mode == 28) {
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s);
+
+ gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_ni;
+ vector<double> vec_weight, vec_z2;
+ map<string, double> mapRS2weight;
+ mapRS2weight.clear();
+
+ time_start = clock();
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_total, cPar.ns_total,
+ cPar.ns_test);
+ cout << "## number of total individuals = " << cPar.ni_total << endl;
+ cout << "## number of total SNPs = " << cPar.ns_total << endl;
+ cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+ cout << "Calculating the q vector ... " << endl;
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+ cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the q vector. " << endl;
+ return;
+ }
+
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_total);
+
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+ /*
+ for (size_t i=0; i<cPar.n_vc; i++) {
+ cout<<gsl_vector_get(q, i)<<endl;
+ }
+ */
+ gsl_matrix_free(Vq);
+ gsl_vector_free(q);
+ gsl_vector_free(s);
+ }
+
+ // Calculate SNP covariance.
+ if (cPar.a_mode == 71) {
+ VARCOV cVarcov;
+ cVarcov.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cVarcov.AnalyzePlink();
+ } else {
+ cVarcov.AnalyzeBimbam();
+ }
+
+ cVarcov.CopyToParam(cPar);
+ }
+
+ // LM.
+ if (cPar.a_mode == 51 || cPar.a_mode == 52 || cPar.a_mode == 53 ||
+ cPar.a_mode == 54) { // Fit LM
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+
+ // Fit LM or mvLM
+ if (cPar.n_ph == 1) {
+ LM cLm;
+ cLm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+
+ if (!cPar.file_gene.empty()) {
+ cLm.AnalyzeGene(W,
+ &Y_col.vector); // y is the predictor, not the phenotype
+ } else if (!cPar.file_bfile.empty()) {
+ cLm.AnalyzePlink(W, &Y_col.vector);
+ } else if (!cPar.file_oxford.empty()) {
+ cLm.Analyzebgen(W, &Y_col.vector);
+ } else {
+ cLm.AnalyzeBimbam(W, &Y_col.vector);
+ }
+
+ cLm.WriteFiles();
+ cLm.CopyToParam(cPar);
+ }
+ /*
+ else {
+ MVLM cMvlm;
+ cMvlm.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ cMvlm.AnalyzePlink (W, Y);
+ } else {
+ cMvlm.AnalyzeBimbam (W, Y);
+ }
+
+ cMvlm.WriteFiles();
+ cMvlm.CopyToParam(cPar);
+ }
+ */
+ // release all matrices and vectors
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ }
+
+ // VC estimation with one or multiple kinship matrices
+ // REML approach only
+ // if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already,
+ // in param.cpp
+ // for one phenotype only;
+ if (cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) {
+ if (!cPar.file_beta.empty()) {
+ // need to obtain a common set of SNPs between beta file and the genotype
+ // file; these are saved in mapRS2wA and mapRS2wK
+ // normalize the weight in mapRS2wK to have an average of one; each
+ // element of mapRS2wA is 1
+ // update indicator_snps, so that the numbers are in accordance with
+ // mapRS2wK
+ set<string> setSnps_beta;
+ ReadFile_snps_header(cPar.file_beta, setSnps_beta);
+
+ map<string, double> mapRS2wA, mapRS2wK;
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ cPar.UpdateSNP(mapRS2wK);
+
+ // Setup matrices and vectors.
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+
+ gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+
+ gsl_matrix_set_zero(K);
+ gsl_matrix_set_zero(A);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Vq);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s);
+
+ cPar.CopyCvtPhen(W, y, 0);
+
+ gsl_matrix_view S_mat =
+ gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+ gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_ni;
+ vector<double> vec_weight, vec_z2;
+
+ // read beta, based on the mapRS2wK
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_study, cPar.ns_study,
+ cPar.ns_test);
+
+ cout << "Study Panel: " << endl;
+ cout << "## number of total individuals = " << cPar.ni_study << endl;
+ cout << "## number of total SNPs = " << cPar.ns_study << endl;
+ cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+
+ // compute q
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+
+ // compute S
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &s_vec.vector);
+ cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ // compute vc estimates
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
+ cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
+ cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
+ cPar.v_enrich, cPar.v_se_enrich);
+
+ // if LDSC weights, then compute the weights and run the above steps again
+ if (cPar.a_mode == 62) {
+ // compute the weights and normalize the weights for A
+ cPar.UpdateWeight(1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA);
+
+ // read beta file again, and update weigths vector
+ ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni,
+ vec_weight, vec_z2, cPar.ni_study, cPar.ns_total,
+ cPar.ns_test);
+
+ // compute q
+ Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
+ &s_vec.vector);
+
+ // compute S
+ time_start = clock();
+ cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix,
+ &s_vec.vector);
+ cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ if (cPar.error == true) {
+ cout << "error! fail to calculate the S matrix. " << endl;
+ return;
+ }
+
+ // compute vc estimates
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector,
+ cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
+ cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
+ cPar.v_enrich, cPar.v_se_enrich);
+ }
+
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_test);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Vq);
+ gsl_vector_free(q);
+ gsl_vector_free(s);
+
+ gsl_matrix_free(A);
+ gsl_matrix_free(K);
+ gsl_vector_free(y);
+ gsl_matrix_free(W);
+ } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
+ if (!cPar.file_study.empty()) {
+ string sfile = cPar.file_study + ".size.txt";
+ CountFileLines(sfile, cPar.n_vc);
+ } else {
+ string file_name;
+ igzstream infile(cPar.file_mstudy.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open mstudy file: " << cPar.file_study
+ << endl;
+ return;
+ }
+
+ safeGetline(infile, file_name);
+
+ infile.clear();
+ infile.close();
+
+ string sfile = file_name + ".size.txt";
+ CountFileLines(sfile, cPar.n_vc);
+ }
+
+ cPar.n_vc = cPar.n_vc - 1;
+
+ gsl_matrix *S = gsl_matrix_alloc(2 * cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ // gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1,
+ // (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
+ // gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1,
+ // (n_lines*(n_lines+1))/2*(n_lines+1) );
+ gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s_study = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_view S_mat =
+ gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc);
+ gsl_matrix_view Svar_mat =
+ gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
+
+ gsl_matrix_set_zero(Vq);
+ // gsl_matrix_set_zero(V);
+ // gsl_matrix_set_zero(Vslope);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s_study);
+ gsl_vector_set_zero(s_ref);
+
+ if (!cPar.file_study.empty()) {
+ ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study);
+ } else {
+ ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study);
+ }
+
+ if (!cPar.file_ref.empty()) {
+ ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
+ cPar.ni_ref);
+ } else {
+ ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref,
+ cPar.ni_ref);
+ }
+
+ cout << "## number of variance components = " << cPar.n_vc << endl;
+ cout << "## number of individuals in the sample = " << cPar.ni_study
+ << endl;
+ cout << "## number of individuals in the reference = " << cPar.ni_ref
+ << endl;
+
+ CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study,
+ cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total,
+ cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich,
+ cPar.v_se_enrich);
+
+ gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
+ gsl_vector_memcpy(&s_sub.vector, s_ref);
+ gsl_vector_set(s, cPar.n_vc, cPar.ni_ref);
+
+ cPar.WriteMatrix(S, "S");
+ cPar.WriteMatrix(Vq, "Vq");
+ cPar.WriteVector(q, "q");
+ cPar.WriteVector(s, "size");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Vq);
+ // gsl_matrix_free (V);
+ // gsl_matrix_free (Vslope);
+ gsl_vector_free(q);
+ gsl_vector_free(s_study);
+ gsl_vector_free(s_ref);
+ gsl_vector_free(s);
+ } else {
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1 * cPar.n_vc);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+
+ // read kinship matrices
+ if (!(cPar.file_mk).empty()) {
+ ReadFile_mk(cPar.file_mk, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G, and obtain v_traceG
+ double d = 0;
+ (cPar.v_traceG).clear();
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ gsl_matrix_view G_sub =
+ gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1);
+ CenterMatrix(&G_sub.matrix);
+ d = 0;
+ for (size_t j = 0; j < G->size1; j++) {
+ d += gsl_matrix_get(&G_sub.matrix, j, j);
+ }
+ d /= (double)G->size1;
+ (cPar.v_traceG).push_back(d);
+ }
+ } else if (!(cPar.file_kin).empty()) {
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+
+ (cPar.v_traceG).clear();
+ double d = 0;
+ for (size_t j = 0; j < G->size1; j++) {
+ d += gsl_matrix_get(G, j, j);
+ }
+ d /= (double)G->size1;
+ (cPar.v_traceG).push_back(d);
+ }
+ /*
+ //eigen-decomposition and calculate trace_G
+ cout<<"Start Eigen-Decomposition..."<<endl;
+ time_start=clock();
+
+ if (cPar.a_mode==31) {
+ cPar.trace_G=EigenDecomp (G, U, eval, 1);
+ } else {
+ cPar.trace_G=EigenDecomp (G, U, eval, 0);
+ }
+
+ cPar.trace_G=0.0;
+ for (size_t i=0; i<eval->size; i++) {
+ if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);}
+ cPar.trace_G+=gsl_vector_get (eval, i);
+ }
+ cPar.trace_G/=(double)eval->size;
+
+ cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+} else {
+ ReadFile_eigenU (cPar.file_ku, cPar.error, U);
+ if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl;
+return;}
+
+ ReadFile_eigenD (cPar.file_kd, cPar.error, eval);
+ if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl;
+return;}
+
+ cPar.trace_G=0.0;
+ for (size_t i=0; i<eval->size; i++) {
+ if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);}
+ cPar.trace_G+=gsl_vector_get(eval, i);
+ }
+ cPar.trace_G/=(double)eval->size;
}
+*/
+ // fit multiple variance components
+ if (cPar.n_ph == 1) {
+ // if (cPar.n_vc==1) {
+ /*
+ //calculate UtW and Uty
+ CalcUtX (U, W, UtW);
+ CalcUtX (U, Y, UtY);
+
+ gsl_vector_view beta=gsl_matrix_row (B, 0);
+ gsl_vector_view se_beta=gsl_matrix_row (se_B, 0);
+ gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0);
+
+ CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null,
+ cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector);
+
+ cPar.beta_mle_null.clear();
+ cPar.se_beta_mle_null.clear();
+ for (size_t i=0; i<B->size2; i++) {
+ cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) );
+ cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) );
+ }
+
+ CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null,
+ cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector);
+ cPar.beta_remle_null.clear();
+ cPar.se_beta_remle_null.clear();
+ for (size_t i=0; i<B->size2; i++) {
+ cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) );
+ cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) );
+ }
+
+ CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
+ cPar.pve_null, cPar.pve_se_null);
+ cPar.PrintSummary();
+
+ //calculate and output residuals
+ if (cPar.a_mode==5) {
+ gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *u_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *e_hat=gsl_vector_alloc (Y->size1);
+ gsl_vector *y_hat=gsl_vector_alloc (Y->size1);
+
+ //obtain Utu and Ute
+ gsl_vector_memcpy (y_hat, &UtY_col.vector);
+ gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
+
+ double d, u, e;
+ for (size_t i=0; i<eval->size; i++) {
+ d=gsl_vector_get (eval, i);
+ u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat,
+ i);
+ e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i);
+ gsl_vector_set (Utu_hat, i, u);
+ gsl_vector_set (Ute_hat, i, e);
+ }
+
+ //obtain u and e
+ gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
+
+ //output residuals
+ cPar.WriteVector(u_hat, "residU");
+ cPar.WriteVector(e_hat, "residE");
+
+ gsl_vector_free(u_hat);
+ gsl_vector_free(e_hat);
+ gsl_vector_free(y_hat);
+ }
+*/
+ // } else {
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ VC cVc;
+ cVc.CopyFromParam(cPar);
+ if (cPar.a_mode == 61) {
+ cVc.CalcVChe(G, W, &Y_col.vector);
+ } else if (cPar.a_mode == 62) {
+ cVc.CalcVCreml(cPar.noconstrain, G, W, &Y_col.vector);
+ } else {
+ cVc.CalcVCacl(G, W, &Y_col.vector);
+ }
+ cVc.CopyToParam(cPar);
+ // obtain pve from sigma2
+ // obtain se_pve from se_sigma2
+
+ //}
+ }
+ }
+ }
+
+ // compute confidence intervals with additional summary statistics
+ // we do not check the sign of z-scores here, but they have to be matched with
+ // the genotypes
+ if (cPar.a_mode == 66 || cPar.a_mode == 67) {
+ // read reference file first
+ gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Svar = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Svar);
+ gsl_vector_set_zero(s_ref);
+
+ if (!cPar.file_ref.empty()) {
+ ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref);
+ } else {
+ ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref);
+ }
+
+ // need to obtain a common set of SNPs between beta file and the genotype
+ // file; these are saved in mapRS2wA and mapRS2wK
+ // normalize the weight in mapRS2wK to have an average of one; each element
+ // of mapRS2wA is 1
+ set<string> setSnps_beta;
+ ReadFile_snps_header(cPar.file_beta, setSnps_beta);
+
+ // obtain the weights for wA, which contains the SNP weights for SNPs used
+ // in the model
+ map<string, double> mapRS2wK;
+ cPar.ObtainWeight(setSnps_beta, mapRS2wK);
+
+ // set up matrices and vector
+ gsl_matrix *Xz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XWz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XtXWz =
+ gsl_matrix_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
+ gsl_vector *w = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *w1 = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *z = gsl_vector_alloc(mapRS2wK.size());
+ gsl_vector *s_vec = gsl_vector_alloc(cPar.n_vc);
+
+ vector<size_t> vec_cat, vec_size;
+ vector<double> vec_z;
+
+ map<string, double> mapRS2z, mapRS2wA;
+ map<string, string> mapRS2A1;
+ string file_str;
+
+ // update s_vec, the number of snps in each category
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ vec_size.push_back(0);
+ }
+
+ for (map<string, double>::const_iterator it = mapRS2wK.begin();
+ it != mapRS2wK.end(); ++it) {
+ vec_size[cPar.mapRS2cat[it->first]]++;
+ }
+
+ for (size_t i = 0; i < cPar.n_vc; i++) {
+ gsl_vector_set(s_vec, i, vec_size[i]);
+ }
+
+ // update mapRS2wA using v_pve and s_vec
+ if (cPar.a_mode == 66) {
+ for (map<string, double>::const_iterator it = mapRS2wK.begin();
+ it != mapRS2wK.end(); ++it) {
+ mapRS2wA[it->first] = 1;
+ }
+ } else {
+ cPar.UpdateWeight(0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA);
+ }
+
+ // read in z-scores based on allele 0, and save that into a vector
+ ReadFile_beta(cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z);
+
+ // update snp indicator, save weights to w, save z-scores to vec_z, save
+ // category label to vec_cat
+ // sign of z is determined by matching alleles
+ cPar.UpdateSNPnZ(mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat);
+
+ // compute an n by k matrix of X_iWz
+ cout << "Calculating Xz ... " << endl;
+
+ gsl_matrix_set_zero(Xz);
+ gsl_vector_set_all(w1, 1);
+
+ if (!cPar.file_bfile.empty()) {
+ file_str = cPar.file_bfile + ".bed";
+ PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+ vec_cat, w1, z, 0, Xz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+ cPar.indicator_snp, vec_cat, w1, z, 0, Xz);
+ } else if (!cPar.file_mbfile.empty()) {
+ MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w1, z, Xz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w1, z, Xz);
+ }
+ /*
+ cout<<"Xz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (Xz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ if (cPar.a_mode == 66) {
+ gsl_matrix_memcpy(XWz, Xz);
+ } else if (cPar.a_mode == 67) {
+ cout << "Calculating XWz ... " << endl;
+
+ gsl_matrix_set_zero(XWz);
+
+ if (!cPar.file_bfile.empty()) {
+ file_str = cPar.file_bfile + ".bed";
+ PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+ vec_cat, w, z, 0, XWz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+ cPar.indicator_snp, vec_cat, w, z, 0, XWz);
+ } else if (!cPar.file_mbfile.empty()) {
+ MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w, z, XWz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, vec_cat, w, z, XWz);
+ }
+ }
+ /*
+ cout<<"XWz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ cout<<gsl_vector_get (w, i)<<endl;
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (XWz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ // compute an p by k matrix of X_j^TWX_iWz
+ cout << "Calculating XtXWz ... " << endl;
+ gsl_matrix_set_zero(XtXWz);
+
+ if (!cPar.file_bfile.empty()) {
+ file_str = cPar.file_bfile + ".bed";
+ PlinkXtXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp,
+ XWz, 0, XtXWz);
+ } else if (!cPar.file_geno.empty()) {
+ BimbamXtXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv,
+ cPar.indicator_snp, XWz, 0, XtXWz);
+ } else if (!cPar.file_mbfile.empty()) {
+ MFILEXtXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, XWz, XtXWz);
+ } else if (!cPar.file_mgeno.empty()) {
+ MFILEXtXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv,
+ cPar.mindicator_snp, XWz, XtXWz);
+ }
+ /*
+ cout<<"XtXWz: "<<endl;
+ for (size_t i=0; i<5; i++) {
+ for (size_t j=0; j<cPar.n_vc; j++) {
+ cout<<gsl_matrix_get (XtXWz, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+ // compute confidence intervals
+ CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve,
+ cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2,
+ cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+
+ // write files
+ // cPar.WriteMatrix (XWz, "XWz");
+ // cPar.WriteMatrix (XtXWz, "XtXWz");
+ // cPar.WriteVector (w, "w");
+
+ gsl_matrix_free(S);
+ gsl_matrix_free(Svar);
+ gsl_vector_free(s_ref);
+
+ gsl_matrix_free(Xz);
+ gsl_matrix_free(XWz);
+ gsl_matrix_free(XtXWz);
+ gsl_vector_free(w);
+ gsl_vector_free(w1);
+ gsl_vector_free(z);
+ gsl_vector_free(s_vec);
+ }
+
+ // LMM or mvLMM or Eigen-Decomposition
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4 || cPar.a_mode == 5 ||
+ cPar.a_mode == 31) { // Fit LMM or mvLMM or eigen
+ gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c
+ // matrix
+ gsl_matrix *se_B = gsl_matrix_alloc(Y->size2, W->size2);
+ gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
+ gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
+ gsl_vector *eval = gsl_vector_alloc(Y->size1);
+ gsl_vector *env = gsl_vector_alloc(Y->size1);
+ gsl_vector *weight = gsl_vector_alloc(Y->size1);
+
+ // set covariates matrix W and phenotype matrix Y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, Y, 0);
+ if (!cPar.file_gxe.empty()) {
+ cPar.CopyGxe(env);
+ }
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+
+ // is residual weights are provided, then
+ if (!cPar.file_weight.empty()) {
+ cPar.CopyWeight(weight);
+ double d, wi, wj;
+ for (size_t i = 0; i < G->size1; i++) {
+ wi = gsl_vector_get(weight, i);
+ for (size_t j = i; j < G->size2; j++) {
+ wj = gsl_vector_get(weight, j);
+ d = gsl_matrix_get(G, i, j);
+ if (wi <= 0 || wj <= 0) {
+ d = 0;
+ } else {
+ d /= sqrt(wi * wj);
+ }
+ gsl_matrix_set(G, i, j, d);
+ if (j != i) {
+ gsl_matrix_set(G, j, i, d);
+ }
+ }
+ }
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+
+ if (cPar.a_mode == 31) {
+ cPar.trace_G = EigenDecomp(G, U, eval, 1);
+ } else {
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ }
+
+ if (!cPar.file_weight.empty()) {
+ double wi;
+ for (size_t i = 0; i < U->size1; i++) {
+ wi = gsl_vector_get(weight, i);
+ if (wi <= 0) {
+ wi = 0;
+ } else {
+ wi = sqrt(wi);
+ }
+ gsl_vector_view Urow = gsl_matrix_row(U, i);
+ gsl_vector_scale(&Urow.vector, wi);
+ }
+ }
+
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ } else {
+ ReadFile_eigenU(cPar.file_ku, cPar.error, U);
+ if (cPar.error == true) {
+ cout << "error! fail to read the U file. " << endl;
+ return;
+ }
+
+ ReadFile_eigenD(cPar.file_kd, cPar.error, eval);
+ if (cPar.error == true) {
+ cout << "error! fail to read the D file. " << endl;
+ return;
+ }
+
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ }
+
+ if (cPar.a_mode == 31) {
+ cPar.WriteMatrix(U, "eigenU");
+ cPar.WriteVector(eval, "eigenD");
+ } else if (!cPar.file_gene.empty()) {
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ LMM cLmm;
+ cLmm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector); // y is the predictor, not the phenotype
+
+ cLmm.WriteFiles();
+ cLmm.CopyToParam(cPar);
+ } else {
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, Y, UtY);
+
+ // calculate REMLE/MLE estimate and pve for univariate model
+ if (cPar.n_ph == 1) {
+ gsl_vector_view beta = gsl_matrix_row(B, 0);
+ gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null,
+ cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector,
+ &se_beta.vector);
+
+ cPar.beta_mle_null.clear();
+ cPar.se_beta_mle_null.clear();
+ for (size_t i = 0; i < B->size2; i++) {
+ cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i));
+ cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i));
+ }
+
+ CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
+ cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_remle_null,
+ cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector,
+ &se_beta.vector);
+ cPar.beta_remle_null.clear();
+ cPar.se_beta_remle_null.clear();
+ for (size_t i = 0; i < B->size2; i++) {
+ cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i));
+ cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i));
+ }
+
+ CalcPve(eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G,
+ cPar.pve_null, cPar.pve_se_null);
+ cPar.PrintSummary();
+
+ // calculate and output residuals
+ if (cPar.a_mode == 5) {
+ gsl_vector *Utu_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *Ute_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *u_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *e_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *y_hat = gsl_vector_alloc(Y->size1);
+
+ // obtain Utu and Ute
+ gsl_vector_memcpy(y_hat, &UtY_col.vector);
+ gsl_blas_dgemv(CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
+
+ double d, u, e;
+ for (size_t i = 0; i < eval->size; i++) {
+ d = gsl_vector_get(eval, i);
+ u = cPar.l_remle_null * d / (cPar.l_remle_null * d + 1.0) *
+ gsl_vector_get(y_hat, i);
+ e = 1.0 / (cPar.l_remle_null * d + 1.0) * gsl_vector_get(y_hat, i);
+ gsl_vector_set(Utu_hat, i, u);
+ gsl_vector_set(Ute_hat, i, e);
+ }
+
+ // obtain u and e
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat);
+
+ // output residuals
+ cPar.WriteVector(u_hat, "residU");
+ cPar.WriteVector(e_hat, "residE");
+
+ gsl_vector_free(u_hat);
+ gsl_vector_free(e_hat);
+ gsl_vector_free(y_hat);
+ }
+ }
+
+ // Fit LMM or mvLMM
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4) {
+ if (cPar.n_ph == 1) {
+ LMM cLmm;
+ cLmm.CopyFromParam(cPar);
+
+ gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
+ gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+
+ if (!cPar.file_bfile.empty()) {
+ if (cPar.file_gxe.empty()) {
+ cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector);
+ } else {
+ cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector, env);
+ }
+ }
+ // WJA added
+ else if (!cPar.file_oxford.empty()) {
+ cLmm.Analyzebgen(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
+ } else {
+ if (cPar.file_gxe.empty()) {
+ cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector);
+ } else {
+ cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
+ &Y_col.vector, env);
+ }
+ }
+
+ cLmm.WriteFiles();
+ cLmm.CopyToParam(cPar);
+ } else {
+ MVLMM cMvlmm;
+ cMvlmm.CopyFromParam(cPar);
+
+ if (!cPar.file_bfile.empty()) {
+ if (cPar.file_gxe.empty()) {
+ cMvlmm.AnalyzePlink(U, eval, UtW, UtY);
+ } else {
+ cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env);
+ }
+ } else if (!cPar.file_oxford.empty()) {
+ cMvlmm.Analyzebgen(U, eval, UtW, UtY);
+ } else {
+ if (cPar.file_gxe.empty()) {
+ cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY);
+ } else {
+ cMvlmm.AnalyzeBimbamGXE(U, eval, UtW, UtY, env);
+ }
+ }
+
+ cMvlmm.WriteFiles();
+ cMvlmm.CopyToParam(cPar);
+ }
+ }
+ }
+
+ // release all matrices and vectors
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+ gsl_matrix_free(B);
+ gsl_matrix_free(se_B);
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_matrix_free(UtY);
+ gsl_vector_free(eval);
+ gsl_vector_free(env);
+ }
+
+ // BSLMM
+ if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
+ gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+
+ // set covariates matrix W and phenotype vector y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, y, 0);
+
+ // center y, even for case/control data
+ cPar.pheno_mean = CenterVector(y);
+
+ // run bvsr if rho==1
+ if (cPar.rho_min == 1 && cPar.rho_max == 1) {
+ // read genotypes X (not UtX)
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // perform BSLMM analysis
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmm.MCMC(UtX, y);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ // else, if rho!=1
+ } else {
+ gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
+ gsl_vector *eval = gsl_vector_alloc(y->size);
+ gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
+ gsl_vector *Uty = gsl_vector_alloc(y->size);
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // read relatedness matrix G
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ } else {
+ cPar.ReadGenotypes(UtX, G, true);
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, y, Uty);
+
+ // calculate REMLE/MLE estimate and pve
+ CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
+ cPar.pve_se_null);
+
+ cPar.PrintSummary();
+
+ // Creat and calcualte UtX, use a large memory
+ cout << "Calculating UtX..." << endl;
+ time_start = clock();
+ CalcUtX(U, UtX);
+ cPar.time_UtX = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // perform BSLMM or BSLMMDAP analysis
+ if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ if (cPar.a_mode == 12) { // ridge regression
+ cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null);
+ } else { // Run MCMC
+ cBslmm.MCMC(U, UtX, Uty, eval, y);
+ }
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ } else {
+ }
+
+ // release all matrices and vectors
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_vector_free(eval);
+ gsl_vector_free(Uty);
+ }
+ gsl_matrix_free(W);
+ gsl_vector_free(y);
+ gsl_matrix_free(UtX);
+ }
+
+ // BSLMM-DAP
+ if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) {
+ if (cPar.a_mode == 14) {
+ gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
+ gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+
+ // set covariates matrix W and phenotype vector y
+ // an intercept should be included in W,
+ cPar.CopyCvtPhen(W, y, 0);
+
+ // center y, even for case/control data
+ cPar.pheno_mean = CenterVector(y);
+
+ // run bvsr if rho==1
+ if (cPar.rho_min == 1 && cPar.rho_max == 1) {
+ // read genotypes X (not UtX)
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // perform BSLMM analysis
+ BSLMM cBslmm;
+ cBslmm.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmm.MCMC(UtX, y);
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmm.CopyToParam(cPar);
+ // else, if rho!=1
+ } else {
+ gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
+ gsl_vector *eval = gsl_vector_alloc(y->size);
+ gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
+ gsl_vector *Uty = gsl_vector_alloc(y->size);
+
+ // read relatedness matrix G
+ if (!(cPar.file_kin).empty()) {
+ cPar.ReadGenotypes(UtX, G, false);
+
+ // read relatedness matrix G
+ ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
+ cPar.k_mode, cPar.error, G);
+ if (cPar.error == true) {
+ cout << "error! fail to read kinship/relatedness file. " << endl;
+ return;
+ }
+
+ // center matrix G
+ CenterMatrix(G);
+ } else {
+ cPar.ReadGenotypes(UtX, G, true);
+ }
+
+ // eigen-decomposition and calculate trace_G
+ cout << "Start Eigen-Decomposition..." << endl;
+ time_start = clock();
+ cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = 0.0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ cPar.trace_G += gsl_vector_get(eval, i);
+ }
+ cPar.trace_G /= (double)eval->size;
+ cPar.time_eigen =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // calculate UtW and Uty
+ CalcUtX(U, W, UtW);
+ CalcUtX(U, y, Uty);
+
+ // calculate REMLE/MLE estimate and pve
+ CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_mle_null, cPar.logl_mle_H0);
+ CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region,
+ cPar.l_remle_null, cPar.logl_remle_H0);
+ CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null,
+ cPar.pve_se_null);
+
+ cPar.PrintSummary();
+
+ // Creat and calcualte UtX, use a large memory
+ cout << "Calculating UtX..." << endl;
+ time_start = clock();
+ CalcUtX(U, UtX);
+ cPar.time_UtX =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // perform analysis; assume X and y are already centered
+ BSLMMDAP cBslmmDap;
+ cBslmmDap.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmmDap.DAP_CalcBF(U, UtX, Uty, eval, y);
+ cPar.time_opt =
+ (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmmDap.CopyToParam(cPar);
+
+ // release all matrices and vectors
+ gsl_matrix_free(G);
+ gsl_matrix_free(U);
+ gsl_matrix_free(UtW);
+ gsl_vector_free(eval);
+ gsl_vector_free(Uty);
+ }
+
+ gsl_matrix_free(W);
+ gsl_vector_free(y);
+ gsl_matrix_free(UtX);
+ } else if (cPar.a_mode == 15) {
+ // perform EM algorithm and estimate parameters
+ vector<string> vec_rs;
+ vector<double> vec_sa2, vec_sb2, wab;
+ vector<vector<vector<double>>> BF;
+
+ // read hyp and bf files (functions defined in BSLMMDAP)
+ ReadFile_hyb(cPar.file_hyp, vec_sa2, vec_sb2, wab);
+ ReadFile_bf(cPar.file_bf, vec_rs, BF);
+
+ cPar.ns_test = vec_rs.size();
+ if (wab.size() != BF[0][0].size()) {
+ cout << "error! hyp and bf files dimension do not match" << endl;
+ }
+
+ // load annotations
+ gsl_matrix *Ac;
+ gsl_matrix_int *Ad;
+ gsl_vector_int *dlevel;
+ size_t kc, kd;
+ if (!cPar.file_cat.empty()) {
+ ReadFile_cat(cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
+ } else {
+ kc = 0;
+ kd = 0;
+ }
+
+ cout << "## number of blocks = " << BF.size() << endl;
+ cout << "## number of analyzed SNPs = " << vec_rs.size() << endl;
+ cout << "## grid size for hyperparameters = " << wab.size() << endl;
+ cout << "## number of continuous annotations = " << kc << endl;
+ cout << "## number of discrete annotations = " << kd << endl;
+
+ // DAP_EstimateHyper (const size_t kc, const size_t kd, const
+ // vector<string> &vec_rs, const vector<double> &vec_sa2, const
+ // vector<double> &vec_sb2, const vector<double> &wab, const
+ // vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int
+ // *Ad, gsl_vector_int *dlevel);
+
+ // perform analysis
+ BSLMMDAP cBslmmDap;
+ cBslmmDap.CopyFromParam(cPar);
+ time_start = clock();
+ cBslmmDap.DAP_EstimateHyper(kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac,
+ Ad, dlevel);
+ cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ cBslmmDap.CopyToParam(cPar);
+
+ gsl_matrix_free(Ac);
+ gsl_matrix_int_free(Ad);
+ gsl_vector_int_free(dlevel);
+ } else {
+ //
+ }
+ }
+
+ /*
+ //LDR (change 14 to 16?)
+ if (cPar.a_mode==14) {
+ gsl_vector *y=gsl_vector_alloc (cPar.ni_test);
+ gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt);
+ gsl_matrix *G=gsl_matrix_alloc (1, 1);
+ vector<vector<unsigned char> > Xt;
+
+ //set covariates matrix W and phenotype vector y
+ //an intercept is included in W
+ cPar.CopyCvtPhen (W, y, 0);
+ //read in genotype matrix X
+ cPar.ReadGenotypes (Xt, G, false);
+ LDR cLdr;
+ cLdr.CopyFromParam(cPar);
+ time_start=clock();
+ cLdr.VB(Xt, W, y);
-void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar)
-{
- string file_str;
- file_str=cPar.path_out+"/"+cPar.file_out;
- file_str+=".log.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {cout<<"error writing log file: "<<file_str.c_str()<<endl; return;}
-
- outfile<<"##"<<endl;
- outfile<<"## GEMMA Version = "<<version<<endl;
-
- outfile<<"##"<<endl;
- outfile<<"## Command Line Input = ";
- for(int i = 0; i < argc; i++) {
- outfile<<argv[i]<<" ";
- }
- outfile<<endl;
-
- outfile<<"##"<<endl;
- time_t rawtime;
- time(&rawtime);
- tm *ptm = localtime (&rawtime);
-
- outfile<<"## Date = "<<asctime(ptm);
- //ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl;
-
- outfile<<"##"<<endl;
- outfile<<"## Summary Statistics:"<<endl;
- if (!cPar.file_cor.empty() || !cPar.file_study.empty() || !cPar.file_mstudy.empty() ) {
- outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl;
- outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl;
- //outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
- //outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_ref<<endl;
- //outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- //outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl;
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
-
- outfile<<"## pve estimates = ";
- for (size_t i=0; i<cPar.v_pve.size(); i++) {
- outfile<<" "<<cPar.v_pve[i];
- }
- outfile<<endl;
-
- outfile<<"## se(pve) = ";
- for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
- outfile<<" "<<cPar.v_se_pve[i];
- }
- outfile<<endl;
-
- if (cPar.n_vc>1) {
- outfile<<"## total pve = "<<cPar.pve_total<<endl;
- outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
- }
-
- outfile<<"## sigma2 per snp = ";
- for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## se(sigma2 per snp) = ";
- for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_se_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## enrichment = ";
- for (size_t i=0; i<cPar.v_enrich.size(); i++) {
- outfile<<" "<<cPar.v_enrich[i];
- }
- outfile<<endl;
-
- outfile<<"## se(enrichment) = ";
- for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
- outfile<<" "<<cPar.v_se_enrich[i];
- }
- outfile<<endl;
- } else if (!cPar.file_beta.empty() && (cPar.a_mode==61 || cPar.a_mode==62) ) {
- outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl;
- outfile<<"## number of total individuals in the reference = "<<cPar.ni_total<<endl;
- outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
- outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_total<<endl;
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
- } else if (!cPar.file_beta.empty() && (cPar.a_mode==66 || cPar.a_mode==67) ) {
- outfile<<"## number of total individuals in the sample = "<<cPar.ni_total<<endl;
- outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl;
- outfile<<"## number of total SNPs in the sample = "<<cPar.ns_total<<endl;
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
-
- outfile<<"## pve estimates = ";
- for (size_t i=0; i<cPar.v_pve.size(); i++) {
- outfile<<" "<<cPar.v_pve[i];
- }
- outfile<<endl;
-
- outfile<<"## se(pve) = ";
- for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
- outfile<<" "<<cPar.v_se_pve[i];
- }
- outfile<<endl;
-
- if (cPar.n_vc>1) {
- outfile<<"## total pve = "<<cPar.pve_total<<endl;
- outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
- }
-
- outfile<<"## sigma2 per snp = ";
- for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## se(sigma2 per snp) = ";
- for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_se_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## enrichment = ";
- for (size_t i=0; i<cPar.v_enrich.size(); i++) {
- outfile<<" "<<cPar.v_enrich[i];
- }
- outfile<<endl;
-
- outfile<<"## se(enrichment) = ";
- for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
- outfile<<" "<<cPar.v_se_enrich[i];
- }
- outfile<<endl;
- } else {
- outfile<<"## number of total individuals = "<<cPar.ni_total<<endl;
-
- if (cPar.a_mode==43) {
- outfile<<"## number of analyzed individuals = "<<cPar.ni_cvt<<endl;
- outfile<<"## number of individuals with full phenotypes = "<<cPar.ni_test<<endl;
- } else if (cPar.a_mode!=27 && cPar.a_mode!=28) {
- outfile<<"## number of analyzed individuals = "<<cPar.ni_test<<endl;
- }
-
- outfile<<"## number of covariates = "<<cPar.n_cvt<<endl;
- outfile<<"## number of phenotypes = "<<cPar.n_ph<<endl;
- if (cPar.a_mode==43) {
- outfile<<"## number of observed data = "<<cPar.np_obs<<endl;
- outfile<<"## number of missing data = "<<cPar.np_miss<<endl;
- }
- if (cPar.a_mode==25 || cPar.a_mode==26 || cPar.a_mode==27 || cPar.a_mode==28 || cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63 || cPar.a_mode==66 || cPar.a_mode==67) {
- outfile<<"## number of variance components = "<<cPar.n_vc<<endl;
- }
-
- if (!(cPar.file_gene).empty()) {
- outfile<<"## number of total genes = "<<cPar.ng_total<<endl;
- outfile<<"## number of analyzed genes = "<<cPar.ng_test<<endl;
- } else if (cPar.file_epm.empty()) {
- outfile<<"## number of total SNPs = "<<cPar.ns_total<<endl;
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- } else {
- outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
- }
-
- if (cPar.a_mode==13) {
- outfile<<"## number of cases = "<<cPar.ni_case<<endl;
- outfile<<"## number of controls = "<<cPar.ni_control<<endl;
- }
- }
-
- if ( (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) && cPar.file_cor.empty() && cPar.file_study.empty() && cPar.file_mstudy.empty() ) {
- // outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
- if (cPar.n_ph==1) {
- outfile<<"## pve estimates = ";
- for (size_t i=0; i<cPar.v_pve.size(); i++) {
- outfile<<" "<<cPar.v_pve[i];
- }
- outfile<<endl;
-
- outfile<<"## se(pve) = ";
- for (size_t i=0; i<cPar.v_se_pve.size(); i++) {
- outfile<<" "<<cPar.v_se_pve[i];
- }
- outfile<<endl;
-
- if (cPar.n_vc>1) {
- outfile<<"## total pve = "<<cPar.pve_total<<endl;
- outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl;
- }
-
- outfile<<"## sigma2 estimates = ";
- for (size_t i=0; i<cPar.v_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_sigma2[i];
- }
- outfile<<endl;
-
- outfile<<"## se(sigma2) = ";
- for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) {
- outfile<<" "<<cPar.v_se_sigma2[i];
- }
- outfile<<endl;
-
- if (!cPar.file_beta.empty() ) {
- outfile<<"## enrichment = ";
- for (size_t i=0; i<cPar.v_enrich.size(); i++) {
- outfile<<" "<<cPar.v_enrich[i];
- }
- outfile<<endl;
-
- outfile<<"## se(enrichment) = ";
- for (size_t i=0; i<cPar.v_se_enrich.size(); i++) {
- outfile<<" "<<cPar.v_se_enrich[i];
- }
- outfile<<endl;
- }
- /*
- outfile<<"## beta estimate in the null model = ";
- for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
- outfile<<" "<<cPar.beta_remle_null[i];
- }
- outfile<<endl;
- outfile<<"## se(beta) = ";
- for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
- outfile<<" "<<cPar.se_beta_remle_null[i];
- }
- outfile<<endl;
- */
- }
- }
-
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl;
- outfile<<"## MLE log-likelihood in the null model = "<<cPar.logl_mle_H0<<endl;
- if (cPar.n_ph==1) {
- //outfile<<"## lambda REMLE estimate in the null (linear mixed) model = "<<cPar.l_remle_null<<endl;
- //outfile<<"## lambda MLE estimate in the null (linear mixed) model = "<<cPar.l_mle_null<<endl;
- outfile<<"## pve estimate in the null model = "<<cPar.pve_null<<endl;
- outfile<<"## se(pve) in the null model = "<<cPar.pve_se_null<<endl;
- outfile<<"## vg estimate in the null model = "<<cPar.vg_remle_null<<endl;
- outfile<<"## ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
- outfile<<"## beta estimate in the null model = ";
- for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
- outfile<<" "<<cPar.beta_remle_null[i];
- }
- outfile<<endl;
- outfile<<"## se(beta) = ";
- for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
- outfile<<" "<<cPar.se_beta_remle_null[i];
- }
- outfile<<endl;
-
- } else {
- size_t c;
- outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Vg_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(Vg): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<sqrt(cPar.VVg_remle_null[c])<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Ve_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(Ve): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<sqrt(cPar.VVe_remle_null[c])<<"\t";
- }
- outfile<<endl;
- }
-
- outfile<<"## MLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<cPar.n_ph; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Vg_mle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(Vg): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<sqrt(cPar.VVg_mle_null[c])<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## MLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<cPar.n_ph; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Ve_mle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(Ve): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<sqrt(cPar.VVe_mle_null[c])<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## estimate for B (d by c) in the null model (columns correspond to the covariates provided in the file): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<cPar.n_cvt; j++) {
- c=i*cPar.n_cvt+j;
- outfile<<cPar.beta_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## se(B): "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<cPar.n_cvt; j++) {
- c=i*cPar.n_cvt+j;
- outfile<<cPar.se_beta_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- }
- }
-
- /*
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
- if (cPar.n_ph==1) {
- outfile<<"## REMLE vg estimate in the null model = "<<cPar.vg_remle_null<<endl;
- outfile<<"## REMLE ve estimate in the null model = "<<cPar.ve_remle_null<<endl;
- } else {
- size_t c;
- outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Vg_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<cPar.n_ph; i++) {
- for (size_t j=0; j<=i; j++) {
- c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
- outfile<<cPar.Ve_remle_null[c]<<"\t";
- }
- outfile<<endl;
- }
- }
- }
- */
-
-
- if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
- outfile<<"## estimated mean = "<<cPar.pheno_mean<<endl;
- }
-
- if (cPar.a_mode==11 || cPar.a_mode==13) {
- outfile<<"##"<<endl;
- outfile<<"## MCMC related:"<<endl;
- outfile<<"## initial value of h = "<<cPar.cHyp_initial.h<<endl;
- outfile<<"## initial value of rho = "<<cPar.cHyp_initial.rho<<endl;
- outfile<<"## initial value of pi = "<<exp(cPar.cHyp_initial.logp)<<endl;
- outfile<<"## initial value of |gamma| = "<<cPar.cHyp_initial.n_gamma<<endl;
- outfile<<"## random seed = "<<cPar.randseed<<endl;
- outfile<<"## acceptance ratio = "<<(double)cPar.n_accept/(double)((cPar.w_step+cPar.s_step)*cPar.n_mh)<<endl;
- }
-
- outfile<<"##"<<endl;
- outfile<<"## Computation Time:"<<endl;
- outfile<<"## total computation time = "<<cPar.time_total<<" min "<<endl;
- outfile<<"## computation time break down: "<<endl;
- if (cPar.a_mode==21 || cPar.a_mode==22 || cPar.a_mode==11 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
- outfile<<"## time on calculating relatedness matrix = "<<cPar.time_G<<" min "<<endl;
- }
- if (cPar.a_mode==31) {
- outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- }
- if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) {
- outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- outfile<<"## time on calculating UtX = "<<cPar.time_UtX<<" min "<<endl;
- }
- if ((cPar.a_mode>=1 && cPar.a_mode<=4) || (cPar.a_mode>=51 && cPar.a_mode<=54) ) {
- outfile<<"## time on optimization = "<<cPar.time_opt<<" min "<<endl;
- }
- if (cPar.a_mode==11 || cPar.a_mode==13) {
- outfile<<"## time on proposal = "<<cPar.time_Proposal<<" min "<<endl;
- outfile<<"## time on mcmc = "<<cPar.time_opt<<" min "<<endl;
- outfile<<"## time on Omega = "<<cPar.time_Omega<<" min "<<endl;
- }
- if (cPar.a_mode==41 || cPar.a_mode==42) {
- outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- }
- if (cPar.a_mode==43) {
- outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl;
- outfile<<"## time on predicting phenotypes = "<<cPar.time_opt<<" min "<<endl;
- }
- outfile<<"##"<<endl;
-
- outfile.close();
- outfile.clear();
- return;
+ cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ cLdr.CopyToParam(cPar);
+
+ gsl_vector_free (y);
+ gsl_matrix_free (W);
+ gsl_matrix_free (G);
+ }
+ */
+
+ cPar.time_total = (clock() - time_begin) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ return;
}
+void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
+ string file_str;
+ file_str = cPar.path_out + "/" + cPar.file_out;
+ file_str += ".log.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing log file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ outfile << "##" << endl;
+ outfile << "## GEMMA Version = " << version << endl;
+ outfile << "##" << endl;
+ outfile << "## Command Line Input = ";
+ for (int i = 0; i < argc; i++) {
+ outfile << argv[i] << " ";
+ }
+ outfile << endl;
+
+ outfile << "##" << endl;
+ time_t rawtime;
+ time(&rawtime);
+ tm *ptm = localtime(&rawtime);
+
+ outfile << "## Date = " << asctime(ptm);
+ // ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl;
+
+ outfile << "##" << endl;
+ outfile << "## Summary Statistics:" << endl;
+ if (!cPar.file_cor.empty() || !cPar.file_study.empty() ||
+ !cPar.file_mstudy.empty()) {
+ outfile << "## number of total individuals in the sample = "
+ << cPar.ni_study << endl;
+ outfile << "## number of total individuals in the reference = "
+ << cPar.ni_ref << endl;
+ // outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
+ // outfile<<"## number of total SNPs in the reference panel =
+ // "<<cPar.ns_ref<<endl;
+ // outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
+ // outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl;
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+
+ outfile << "## pve estimates = ";
+ for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+ outfile << " " << cPar.v_pve[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(pve) = ";
+ for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+ outfile << " " << cPar.v_se_pve[i];
+ }
+ outfile << endl;
+
+ if (cPar.n_vc > 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 per snp = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2 per snp) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ } else if (!cPar.file_beta.empty() &&
+ (cPar.a_mode == 61 || cPar.a_mode == 62)) {
+ outfile << "## number of total individuals in the sample = "
+ << cPar.ni_study << endl;
+ outfile << "## number of total individuals in the reference = "
+ << cPar.ni_total << endl;
+ outfile << "## number of total SNPs in the sample = " << cPar.ns_study
+ << endl;
+ outfile << "## number of total SNPs in the reference panel = "
+ << cPar.ns_total << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+ } else if (!cPar.file_beta.empty() &&
+ (cPar.a_mode == 66 || cPar.a_mode == 67)) {
+ outfile << "## number of total individuals in the sample = "
+ << cPar.ni_total << endl;
+ outfile << "## number of total individuals in the reference = "
+ << cPar.ni_ref << endl;
+ outfile << "## number of total SNPs in the sample = " << cPar.ns_total
+ << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+
+ outfile << "## pve estimates = ";
+ for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+ outfile << " " << cPar.v_pve[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(pve) = ";
+ for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+ outfile << " " << cPar.v_se_pve[i];
+ }
+ outfile << endl;
+
+ if (cPar.n_vc > 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 per snp = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2 per snp) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ } else {
+ outfile << "## number of total individuals = " << cPar.ni_total << endl;
+
+ if (cPar.a_mode == 43) {
+ outfile << "## number of analyzed individuals = " << cPar.ni_cvt << endl;
+ outfile << "## number of individuals with full phenotypes = "
+ << cPar.ni_test << endl;
+ } else if (cPar.a_mode != 27 && cPar.a_mode != 28) {
+ outfile << "## number of analyzed individuals = " << cPar.ni_test << endl;
+ }
+
+ outfile << "## number of covariates = " << cPar.n_cvt << endl;
+ outfile << "## number of phenotypes = " << cPar.n_ph << endl;
+ if (cPar.a_mode == 43) {
+ outfile << "## number of observed data = " << cPar.np_obs << endl;
+ outfile << "## number of missing data = " << cPar.np_miss << endl;
+ }
+ if (cPar.a_mode == 25 || cPar.a_mode == 26 || cPar.a_mode == 27 ||
+ cPar.a_mode == 28 || cPar.a_mode == 61 || cPar.a_mode == 62 ||
+ cPar.a_mode == 63 || cPar.a_mode == 66 || cPar.a_mode == 67) {
+ outfile << "## number of variance components = " << cPar.n_vc << endl;
+ }
+
+ if (!(cPar.file_gene).empty()) {
+ outfile << "## number of total genes = " << cPar.ng_total << endl;
+ outfile << "## number of analyzed genes = " << cPar.ng_test << endl;
+ } else if (cPar.file_epm.empty()) {
+ outfile << "## number of total SNPs = " << cPar.ns_total << endl;
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ } else {
+ outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+ }
+
+ if (cPar.a_mode == 13) {
+ outfile << "## number of cases = " << cPar.ni_case << endl;
+ outfile << "## number of controls = " << cPar.ni_control << endl;
+ }
+ }
+
+ if ((cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) &&
+ cPar.file_cor.empty() && cPar.file_study.empty() &&
+ cPar.file_mstudy.empty()) {
+ // outfile<<"## REMLE log-likelihood in the null model =
+ //"<<cPar.logl_remle_H0<<endl;
+ if (cPar.n_ph == 1) {
+ outfile << "## pve estimates = ";
+ for (size_t i = 0; i < cPar.v_pve.size(); i++) {
+ outfile << " " << cPar.v_pve[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(pve) = ";
+ for (size_t i = 0; i < cPar.v_se_pve.size(); i++) {
+ outfile << " " << cPar.v_se_pve[i];
+ }
+ outfile << endl;
+
+ if (cPar.n_vc > 1) {
+ outfile << "## total pve = " << cPar.pve_total << endl;
+ outfile << "## se(total pve) = " << cPar.se_pve_total << endl;
+ }
+
+ outfile << "## sigma2 estimates = ";
+ for (size_t i = 0; i < cPar.v_sigma2.size(); i++) {
+ outfile << " " << cPar.v_sigma2[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(sigma2) = ";
+ for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) {
+ outfile << " " << cPar.v_se_sigma2[i];
+ }
+ outfile << endl;
+
+ if (!cPar.file_beta.empty()) {
+ outfile << "## enrichment = ";
+ for (size_t i = 0; i < cPar.v_enrich.size(); i++) {
+ outfile << " " << cPar.v_enrich[i];
+ }
+ outfile << endl;
+
+ outfile << "## se(enrichment) = ";
+ for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) {
+ outfile << " " << cPar.v_se_enrich[i];
+ }
+ outfile << endl;
+ }
+ /*
+ outfile<<"## beta estimate in the null model = ";
+ for (size_t i=0; i<cPar.beta_remle_null.size(); i++) {
+ outfile<<" "<<cPar.beta_remle_null[i];
+ }
+ outfile<<endl;
+ outfile<<"## se(beta) = ";
+ for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) {
+ outfile<<" "<<cPar.se_beta_remle_null[i];
+ }
+ outfile<<endl;
+ */
+ }
+ }
+
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 ||
+ cPar.a_mode == 12 || cPar.a_mode == 13) {
+ outfile << "## REMLE log-likelihood in the null model = "
+ << cPar.logl_remle_H0 << endl;
+ outfile << "## MLE log-likelihood in the null model = " << cPar.logl_mle_H0
+ << endl;
+ if (cPar.n_ph == 1) {
+ // outfile<<"## lambda REMLE estimate in the null (linear mixed) model =
+ // "<<cPar.l_remle_null<<endl;
+ // outfile<<"## lambda MLE estimate in the null (linear mixed) model =
+ // "<<cPar.l_mle_null<<endl;
+ outfile << "## pve estimate in the null model = " << cPar.pve_null
+ << endl;
+ outfile << "## se(pve) in the null model = " << cPar.pve_se_null << endl;
+ outfile << "## vg estimate in the null model = " << cPar.vg_remle_null
+ << endl;
+ outfile << "## ve estimate in the null model = " << cPar.ve_remle_null
+ << endl;
+ outfile << "## beta estimate in the null model = ";
+ for (size_t i = 0; i < cPar.beta_remle_null.size(); i++) {
+ outfile << " " << cPar.beta_remle_null[i];
+ }
+ outfile << endl;
+ outfile << "## se(beta) = ";
+ for (size_t i = 0; i < cPar.se_beta_remle_null.size(); i++) {
+ outfile << " " << cPar.se_beta_remle_null[i];
+ }
+ outfile << endl;
+
+ } else {
+ size_t c;
+ outfile << "## REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << cPar.Vg_remle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(Vg): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << sqrt(cPar.VVg_remle_null[c]) << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << cPar.Ve_remle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(Ve): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << sqrt(cPar.VVe_remle_null[c]) << "\t";
+ }
+ outfile << endl;
+ }
+
+ outfile << "## MLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j < cPar.n_ph; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << cPar.Vg_mle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(Vg): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << sqrt(cPar.VVg_mle_null[c]) << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## MLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j < cPar.n_ph; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << cPar.Ve_mle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(Ve): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) -
+ min(i, j);
+ outfile << sqrt(cPar.VVe_mle_null[c]) << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## estimate for B (d by c) in the null model (columns "
+ "correspond to the covariates provided in the file): "
+ << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j < cPar.n_cvt; j++) {
+ c = i * cPar.n_cvt + j;
+ outfile << cPar.beta_remle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ outfile << "## se(B): " << endl;
+ for (size_t i = 0; i < cPar.n_ph; i++) {
+ for (size_t j = 0; j < cPar.n_cvt; j++) {
+ c = i * cPar.n_cvt + j;
+ outfile << cPar.se_beta_remle_null[c] << "\t";
+ }
+ outfile << endl;
+ }
+ }
+ }
+
+ /*
+ if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 ||
+ cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) {
+ if (cPar.n_ph==1) {
+ outfile<<"## REMLE vg estimate in the null model =
+ "<<cPar.vg_remle_null<<endl;
+ outfile<<"## REMLE ve estimate in the null model =
+ "<<cPar.ve_remle_null<<endl;
+ } else {
+ size_t c;
+ outfile<<"## REMLE estimate for Vg in the null model: "<<endl;
+ for (size_t i=0; i<cPar.n_ph; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
+ outfile<<cPar.Vg_remle_null[c]<<"\t";
+ }
+ outfile<<endl;
+ }
+ outfile<<"## REMLE estimate for Ve in the null model: "<<endl;
+ for (size_t i=0; i<cPar.n_ph; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j);
+ outfile<<cPar.Ve_remle_null[c]<<"\t";
+ }
+ outfile<<endl;
+ }
+ }
+ }
+ */
+
+ if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13 ||
+ cPar.a_mode == 14 || cPar.a_mode == 16) {
+ outfile << "## estimated mean = " << cPar.pheno_mean << endl;
+ }
+
+ if (cPar.a_mode == 11 || cPar.a_mode == 13) {
+ outfile << "##" << endl;
+ outfile << "## MCMC related:" << endl;
+ outfile << "## initial value of h = " << cPar.cHyp_initial.h << endl;
+ outfile << "## initial value of rho = " << cPar.cHyp_initial.rho << endl;
+ outfile << "## initial value of pi = " << exp(cPar.cHyp_initial.logp)
+ << endl;
+ outfile << "## initial value of |gamma| = " << cPar.cHyp_initial.n_gamma
+ << endl;
+ outfile << "## random seed = " << cPar.randseed << endl;
+ outfile << "## acceptance ratio = "
+ << (double)cPar.n_accept /
+ (double)((cPar.w_step + cPar.s_step) * cPar.n_mh)
+ << endl;
+ }
+
+ outfile << "##" << endl;
+ outfile << "## Computation Time:" << endl;
+ outfile << "## total computation time = " << cPar.time_total << " min "
+ << endl;
+ outfile << "## computation time break down: " << endl;
+ if (cPar.a_mode == 21 || cPar.a_mode == 22 || cPar.a_mode == 11 ||
+ cPar.a_mode == 13 || cPar.a_mode == 14 || cPar.a_mode == 16) {
+ outfile << "## time on calculating relatedness matrix = "
+ << cPar.time_G << " min " << endl;
+ }
+ if (cPar.a_mode == 31) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ }
+ if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
+ cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 ||
+ cPar.a_mode == 12 || cPar.a_mode == 13 || cPar.a_mode == 14 ||
+ cPar.a_mode == 16) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ outfile << "## time on calculating UtX = " << cPar.time_UtX << " min "
+ << endl;
+ }
+ if ((cPar.a_mode >= 1 && cPar.a_mode <= 4) ||
+ (cPar.a_mode >= 51 && cPar.a_mode <= 54)) {
+ outfile << "## time on optimization = " << cPar.time_opt << " min "
+ << endl;
+ }
+ if (cPar.a_mode == 11 || cPar.a_mode == 13) {
+ outfile << "## time on proposal = " << cPar.time_Proposal << " min "
+ << endl;
+ outfile << "## time on mcmc = " << cPar.time_opt << " min " << endl;
+ outfile << "## time on Omega = " << cPar.time_Omega << " min " << endl;
+ }
+ if (cPar.a_mode == 41 || cPar.a_mode == 42) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ }
+ if (cPar.a_mode == 43) {
+ outfile << "## time on eigen-decomposition = " << cPar.time_eigen
+ << " min " << endl;
+ outfile << "## time on predicting phenotypes = " << cPar.time_opt
+ << " min " << endl;
+ }
+ outfile << "##" << endl;
+
+ outfile.close();
+ outfile.clear();
+ return;
+}
diff --git a/src/gemma.h b/src/gemma.h
index 78828ef..cd1683a 100644
--- a/src/gemma.h
+++ b/src/gemma.h
@@ -26,22 +26,21 @@ using namespace std;
class GEMMA {
public:
- // Parameters.
- string version;
- string date;
- string year;
-
- // Constructor.
- GEMMA(void);
-
- // Functions.
- void PrintHeader (void);
- void PrintHelp (size_t option);
- void PrintLicense (void);
- void Assign (int argc, char **argv, PARAM &cPar);
- void BatchRun (PARAM &cPar);
- void WriteLog (int argc, char **argv, PARAM &cPar);
+ // Parameters.
+ string version;
+ string date;
+ string year;
+
+ // Constructor.
+ GEMMA(void);
+
+ // Functions.
+ void PrintHeader(void);
+ void PrintHelp(size_t option);
+ void PrintLicense(void);
+ void Assign(int argc, char **argv, PARAM &cPar);
+ void BatchRun(PARAM &cPar);
+ void WriteLog(int argc, char **argv, PARAM &cPar);
};
#endif
-
diff --git a/src/gzstream.cpp b/src/gzstream.cpp
index 688b625..a7014d6 100644
--- a/src/gzstream.cpp
+++ b/src/gzstream.cpp
@@ -28,7 +28,7 @@
#include "gzstream.h"
#include <iostream>
-#include <string.h> // for memcpy
+#include <string.h> // for memcpy
#ifdef GZSTREAM_NAMESPACE
namespace GZSTREAM_NAMESPACE {
@@ -42,119 +42,117 @@ namespace GZSTREAM_NAMESPACE {
// class gzstreambuf:
// --------------------------------------
-gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
- if ( is_open())
- return (gzstreambuf*)0;
- mode = open_mode;
- // no append nor read/write mode
- if ((mode & std::ios::ate) || (mode & std::ios::app)
- || ((mode & std::ios::in) && (mode & std::ios::out)))
- return (gzstreambuf*)0;
- char fmode[10];
- char* fmodeptr = fmode;
- if ( mode & std::ios::in)
- *fmodeptr++ = 'r';
- else if ( mode & std::ios::out)
- *fmodeptr++ = 'w';
- *fmodeptr++ = 'b';
- *fmodeptr = '\0';
- file = gzopen( name, fmode);
- if (file == 0)
- return (gzstreambuf*)0;
- opened = 1;
- return this;
+gzstreambuf *gzstreambuf::open(const char *name, int open_mode) {
+ if (is_open())
+ return (gzstreambuf *)0;
+ mode = open_mode;
+ // no append nor read/write mode
+ if ((mode & std::ios::ate) || (mode & std::ios::app) ||
+ ((mode & std::ios::in) && (mode & std::ios::out)))
+ return (gzstreambuf *)0;
+ char fmode[10];
+ char *fmodeptr = fmode;
+ if (mode & std::ios::in)
+ *fmodeptr++ = 'r';
+ else if (mode & std::ios::out)
+ *fmodeptr++ = 'w';
+ *fmodeptr++ = 'b';
+ *fmodeptr = '\0';
+ file = gzopen(name, fmode);
+ if (file == 0)
+ return (gzstreambuf *)0;
+ opened = 1;
+ return this;
}
-gzstreambuf * gzstreambuf::close() {
- if ( is_open()) {
- sync();
- opened = 0;
- if ( gzclose( file) == Z_OK)
- return this;
- }
- return (gzstreambuf*)0;
+gzstreambuf *gzstreambuf::close() {
+ if (is_open()) {
+ sync();
+ opened = 0;
+ if (gzclose(file) == Z_OK)
+ return this;
+ }
+ return (gzstreambuf *)0;
}
int gzstreambuf::underflow() { // used for input buffer only
- if ( gptr() && ( gptr() < egptr()))
- return * reinterpret_cast<unsigned char *>( gptr());
-
- if ( ! (mode & std::ios::in) || ! opened)
- return EOF;
- // Josuttis' implementation of inbuf
- int n_putback = gptr() - eback();
- if ( n_putback > 4)
- n_putback = 4;
- memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);
-
- int num = gzread( file, buffer+4, bufferSize-4);
- if (num <= 0) // ERROR or EOF
- return EOF;
-
- // reset buffer pointers
- setg( buffer + (4 - n_putback), // beginning of putback area
- buffer + 4, // read position
- buffer + 4 + num); // end of buffer
-
- // return next character
- return * reinterpret_cast<unsigned char *>( gptr());
+ if (gptr() && (gptr() < egptr()))
+ return *reinterpret_cast<unsigned char *>(gptr());
+
+ if (!(mode & std::ios::in) || !opened)
+ return EOF;
+ // Josuttis' implementation of inbuf
+ int n_putback = gptr() - eback();
+ if (n_putback > 4)
+ n_putback = 4;
+ memcpy(buffer + (4 - n_putback), gptr() - n_putback, n_putback);
+
+ int num = gzread(file, buffer + 4, bufferSize - 4);
+ if (num <= 0) // ERROR or EOF
+ return EOF;
+
+ // reset buffer pointers
+ setg(buffer + (4 - n_putback), // beginning of putback area
+ buffer + 4, // read position
+ buffer + 4 + num); // end of buffer
+
+ // return next character
+ return *reinterpret_cast<unsigned char *>(gptr());
}
int gzstreambuf::flush_buffer() {
- // Separate the writing of the buffer from overflow() and
- // sync() operation.
- int w = pptr() - pbase();
- if ( gzwrite( file, pbase(), w) != w)
- return EOF;
- pbump( -w);
- return w;
+ // Separate the writing of the buffer from overflow() and
+ // sync() operation.
+ int w = pptr() - pbase();
+ if (gzwrite(file, pbase(), w) != w)
+ return EOF;
+ pbump(-w);
+ return w;
}
-int gzstreambuf::overflow( int c) { // used for output buffer only
- if ( ! ( mode & std::ios::out) || ! opened)
- return EOF;
- if (c != EOF) {
- *pptr() = c;
- pbump(1);
- }
- if ( flush_buffer() == EOF)
- return EOF;
- return c;
+int gzstreambuf::overflow(int c) { // used for output buffer only
+ if (!(mode & std::ios::out) || !opened)
+ return EOF;
+ if (c != EOF) {
+ *pptr() = c;
+ pbump(1);
+ }
+ if (flush_buffer() == EOF)
+ return EOF;
+ return c;
}
int gzstreambuf::sync() {
- // Changed to use flush_buffer() instead of overflow( EOF)
- // which caused improper behavior with std::endl and flush(),
- // bug reported by Vincent Ricard.
- if ( pptr() && pptr() > pbase()) {
- if ( flush_buffer() == EOF)
- return -1;
- }
- return 0;
+ // Changed to use flush_buffer() instead of overflow( EOF)
+ // which caused improper behavior with std::endl and flush(),
+ // bug reported by Vincent Ricard.
+ if (pptr() && pptr() > pbase()) {
+ if (flush_buffer() == EOF)
+ return -1;
+ }
+ return 0;
}
// --------------------------------------
// class gzstreambase:
// --------------------------------------
-gzstreambase::gzstreambase( const char* name, int mode) {
- init( &buf);
- open( name, mode);
+gzstreambase::gzstreambase(const char *name, int mode) {
+ init(&buf);
+ open(name, mode);
}
-gzstreambase::~gzstreambase() {
- buf.close();
-}
+gzstreambase::~gzstreambase() { buf.close(); }
-void gzstreambase::open( const char* name, int open_mode) {
- if ( ! buf.open( name, open_mode))
- clear( rdstate() | std::ios::badbit);
+void gzstreambase::open(const char *name, int open_mode) {
+ if (!buf.open(name, open_mode))
+ clear(rdstate() | std::ios::badbit);
}
void gzstreambase::close() {
- if ( buf.is_open())
- if ( ! buf.close())
- clear( rdstate() | std::ios::badbit);
+ if (buf.is_open())
+ if (!buf.close())
+ clear(rdstate() | std::ios::badbit);
}
#ifdef GZSTREAM_NAMESPACE
diff --git a/src/gzstream.h b/src/gzstream.h
index 241ff76..f760138 100644
--- a/src/gzstream.h
+++ b/src/gzstream.h
@@ -30,8 +30,8 @@
#define GZSTREAM_H 1
// Standard C++ with new header file names and std::namespace.
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <zlib.h>
#ifdef GZSTREAM_NAMESPACE
@@ -44,43 +44,45 @@ namespace GZSTREAM_NAMESPACE {
class gzstreambuf : public std::streambuf {
private:
- static const int bufferSize = 47+256; // size of data buff
- // totals 512 bytes under g++ for igzstream at the end.
+ static const int bufferSize = 47 + 256; // size of data buff
+ // totals 512 bytes under g++ for igzstream at the end.
+
+ gzFile file; // file handle for compressed file
+ char buffer[bufferSize]; // data buffer
+ char opened; // open/close state of stream
+ int mode; // I/O mode
- gzFile file; // file handle for compressed file
- char buffer[bufferSize]; // data buffer
- char opened; // open/close state of stream
- int mode; // I/O mode
+ int flush_buffer();
- int flush_buffer();
public:
- gzstreambuf() : opened(0) {
- setp( buffer, buffer + (bufferSize-1));
- setg( buffer + 4, // beginning of putback area
- buffer + 4, // read position
- buffer + 4); // end position
- // ASSERT: both input & output capabilities will not be used together
- }
- int is_open() { return opened; }
- gzstreambuf* open( const char* name, int open_mode);
- gzstreambuf* close();
- ~gzstreambuf() { close(); }
-
- virtual int overflow( int c = EOF);
- virtual int underflow();
- virtual int sync();
+ gzstreambuf() : opened(0) {
+ setp(buffer, buffer + (bufferSize - 1));
+ setg(buffer + 4, // beginning of putback area
+ buffer + 4, // read position
+ buffer + 4); // end position
+ // ASSERT: both input & output capabilities will not be used together
+ }
+ int is_open() { return opened; }
+ gzstreambuf *open(const char *name, int open_mode);
+ gzstreambuf *close();
+ ~gzstreambuf() { close(); }
+
+ virtual int overflow(int c = EOF);
+ virtual int underflow();
+ virtual int sync();
};
class gzstreambase : virtual public std::ios {
protected:
- gzstreambuf buf;
+ gzstreambuf buf;
+
public:
- gzstreambase() { init(&buf); }
- gzstreambase( const char* name, int open_mode);
- ~gzstreambase();
- void open( const char* name, int open_mode);
- void close();
- gzstreambuf* rdbuf() { return &buf; }
+ gzstreambase() { init(&buf); }
+ gzstreambase(const char *name, int open_mode);
+ ~gzstreambase();
+ void open(const char *name, int open_mode);
+ void close();
+ gzstreambuf *rdbuf() { return &buf; }
};
// ----------------------------------------------------------------------------
@@ -91,24 +93,24 @@ public:
class igzstream : public gzstreambase, public std::istream {
public:
- igzstream() : std::istream( &buf) {}
- igzstream( const char* name, int open_mode = std::ios::in)
- : gzstreambase( name, open_mode), std::istream( &buf) {}
- gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
- void open( const char* name, int open_mode = std::ios::in) {
- gzstreambase::open( name, open_mode);
- }
+ igzstream() : std::istream(&buf) {}
+ igzstream(const char *name, int open_mode = std::ios::in)
+ : gzstreambase(name, open_mode), std::istream(&buf) {}
+ gzstreambuf *rdbuf() { return gzstreambase::rdbuf(); }
+ void open(const char *name, int open_mode = std::ios::in) {
+ gzstreambase::open(name, open_mode);
+ }
};
class ogzstream : public gzstreambase, public std::ostream {
public:
- ogzstream() : std::ostream( &buf) {}
- ogzstream( const char* name, int mode = std::ios::out)
- : gzstreambase( name, mode), std::ostream( &buf) {}
- gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
- void open( const char* name, int open_mode = std::ios::out) {
- gzstreambase::open( name, open_mode);
- }
+ ogzstream() : std::ostream(&buf) {}
+ ogzstream(const char *name, int mode = std::ios::out)
+ : gzstreambase(name, mode), std::ostream(&buf) {}
+ gzstreambuf *rdbuf() { return gzstreambase::rdbuf(); }
+ void open(const char *name, int open_mode = std::ios::out) {
+ gzstreambase::open(name, open_mode);
+ }
};
#ifdef GZSTREAM_NAMESPACE
diff --git a/src/io.cpp b/src/io.cpp
index 3bf6a9e..44251aa 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -16,3007 +16,3191 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include <assert.h>
+#include <bitset>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
#include <fstream>
-#include <sstream>
-#include <string>
#include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
#include <map>
#include <set>
-#include <cstring>
-#include <cmath>
-#include <cstdint>
+#include <sstream>
#include <stdio.h>
#include <stdlib.h>
-#include <assert.h>
+#include <string>
+#include <vector>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
-#include "lapack.h"
-#include "gzstream.h"
-#include "mathfunc.h"
#include "eigenlib.h"
+#include "gzstream.h"
#include "io.h"
+#include "lapack.h"
+#include "mathfunc.h"
using namespace std;
// Print progress bar.
-void ProgressBar (string str, double p, double total) {
- double progress = (100.0 * p / total);
- int barsize = (int) (progress / 2.0);
- char bar[51];
-
- cout<<str;
- for (int i = 0; i <50; i++) {
- if (i<barsize) {bar[i] = '=';}
- else {bar[i]=' ';}
- cout<<bar[i];
- }
- cout<<setprecision(2)<<fixed<<progress<<"%\r"<<flush;
-
- return;
-}
+void ProgressBar(string str, double p, double total) {
+ double progress = (100.0 * p / total);
+ int barsize = (int)(progress / 2.0);
+ char bar[51];
+
+ cout << str;
+ for (int i = 0; i < 50; i++) {
+ if (i < barsize) {
+ bar[i] = '=';
+ } else {
+ bar[i] = ' ';
+ }
+ cout << bar[i];
+ }
+ cout << setprecision(2) << fixed << progress << "%\r" << flush;
-// Print progress bar with acceptance ratio.
-void ProgressBar (string str, double p, double total, double ratio) {
- double progress = (100.0 * p / total);
- int barsize = (int) (progress / 2.0);
- char bar[51];
-
- cout<<str;
- for (int i = 0; i <50; i++) {
- if (i<barsize) {bar[i] = '=';}
- else {bar[i]=' ';}
- cout<<bar[i];
- }
- cout<<setprecision(2)<<fixed<<progress<<"% "<<ratio<<"\r"<<flush;
- return;
+ return;
}
-bool isBlankLine(char const* line) {
- for ( char const* cp = line; *cp; ++cp ) {
- if ( !isspace(*cp) )
- return false;
+// Print progress bar with acceptance ratio.
+void ProgressBar(string str, double p, double total, double ratio) {
+ double progress = (100.0 * p / total);
+ int barsize = (int)(progress / 2.0);
+ char bar[51];
+
+ cout << str;
+ for (int i = 0; i < 50; i++) {
+ if (i < barsize) {
+ bar[i] = '=';
+ } else {
+ bar[i] = ' ';
}
- return true;
+ cout << bar[i];
+ }
+ cout << setprecision(2) << fixed << progress << "% " << ratio << "\r"
+ << flush;
+ return;
}
-bool isBlankLine(std::string const& line) {
- return isBlankLine(line.c_str());
+bool isBlankLine(char const *line) {
+ for (char const *cp = line; *cp; ++cp) {
+ if (!isspace(*cp))
+ return false;
+ }
+ return true;
}
+bool isBlankLine(std::string const &line) { return isBlankLine(line.c_str()); }
+
// In case files are ended with "\r" or "\r\n".
-std::istream& safeGetline(std::istream& is, std::string& t) {
- t.clear();
-
- // The characters in the stream are read one-by-one using a
- // std::streambuf. That is faster than reading them one-by-one
- // using the std::istream. Code that uses streambuf this way must
- // be guarded by a sentry object. The sentry object performs
- // various tasks, such as thread synchronization and updating the
- // stream state.
- std::istream::sentry se(is, true);
- std::streambuf* sb = is.rdbuf();
-
- for(;;) {
- int c = sb->sbumpc();
- switch (c) {
- case '\n':
- return is;
- case '\r':
- if(sb->sgetc() == '\n')
- sb->sbumpc();
- return is;
- case EOF:
-
- // Also handle the case when the last line has no line
- // ending.
- if(t.empty())
- is.setstate(std::ios::eofbit);
- return is;
- default:
- t += (char)c;
- }
+std::istream &safeGetline(std::istream &is, std::string &t) {
+ t.clear();
+
+ // The characters in the stream are read one-by-one using a
+ // std::streambuf. That is faster than reading them one-by-one
+ // using the std::istream. Code that uses streambuf this way must
+ // be guarded by a sentry object. The sentry object performs
+ // various tasks, such as thread synchronization and updating the
+ // stream state.
+ std::istream::sentry se(is, true);
+ std::streambuf *sb = is.rdbuf();
+
+ for (;;) {
+ int c = sb->sbumpc();
+ switch (c) {
+ case '\n':
+ return is;
+ case '\r':
+ if (sb->sgetc() == '\n')
+ sb->sbumpc();
+ return is;
+ case EOF:
+
+ // Also handle the case when the last line has no line
+ // ending.
+ if (t.empty())
+ is.setstate(std::ios::eofbit);
+ return is;
+ default:
+ t += (char)c;
}
+ }
}
// Read SNP file.
-bool ReadFile_snps (const string &file_snps, set<string> &setSnps) {
- setSnps.clear();
+bool ReadFile_snps(const string &file_snps, set<string> &setSnps) {
+ setSnps.clear();
- igzstream infile (file_snps.c_str(), igzstream::in);
- if (!infile) {
- cout << "error! fail to open snps file: " << file_snps << endl;
- return false;
- }
+ igzstream infile(file_snps.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open snps file: " << file_snps << endl;
+ return false;
+ }
- string line;
- char *ch_ptr;
+ string line;
+ char *ch_ptr;
- while (getline(infile, line)) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- setSnps.insert(ch_ptr);
- }
+ while (getline(infile, line)) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ setSnps.insert(ch_ptr);
+ }
- infile.close();
- infile.clear();
+ infile.close();
+ infile.clear();
- return true;
+ return true;
}
-bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps) {
- setSnps.clear();
+bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
+ setSnps.clear();
- igzstream infile (file_snps.c_str(), igzstream::in);
- if (!infile) {
- cout << "error! fail to open snps file: " << file_snps << endl;
- return false;
- }
+ igzstream infile(file_snps.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open snps file: " << file_snps << endl;
+ return false;
+ }
- string line, rs, chr, pos;
- char *ch_ptr;
+ string line, rs, chr, pos;
+ char *ch_ptr;
- // Read header.
- HEADER header;
- !safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ // Read header.
+ HEADER header;
+ !safeGetline(infile, line).eof();
+ ReadHeader_io(line, header);
- if (header.rs_col==0 && (header.chr_col==0 || header.pos_col==0) ) {
- cout<<"missing rs id in the hearder"<<endl;
- }
+ if (header.rs_col == 0 && (header.chr_col == 0 || header.pos_col == 0)) {
+ cout << "missing rs id in the hearder" << endl;
+ }
- while (!safeGetline(infile, line).eof()) {
- if (isBlankLine(line)) {continue;}
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ while (!safeGetline(infile, line).eof()) {
+ if (isBlankLine(line)) {
+ continue;
+ }
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
- for (size_t i=0; i<header.coln; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
- if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
- if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
+ for (size_t i = 0; i < header.coln; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ }
+ if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ }
+ if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ }
- ch_ptr=strtok (NULL, " , \t");
- }
+ ch_ptr = strtok(NULL, " , \t");
+ }
- if (header.rs_col==0) {
- rs=chr+":"+pos;
- }
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
+ }
- setSnps.insert(rs);
- }
+ setSnps.insert(rs);
+ }
- infile.close();
- infile.clear();
+ infile.close();
+ infile.clear();
- return true;
+ return true;
}
// Read log file.
-bool ReadFile_log (const string &file_log, double &pheno_mean) {
- ifstream infile (file_log.c_str(), ifstream::in);
- if (!infile) {
- cout << "error! fail to open log file: " << file_log << endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
- size_t flag=0;
-
- while (getline(infile, line)) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- if (ch_ptr!=NULL && strcmp(ch_ptr, "estimated")==0) {
- ch_ptr=strtok (NULL, " , \t");
- if (ch_ptr!=NULL && strcmp(ch_ptr, "mean")==0) {
- ch_ptr=strtok (NULL, " , \t");
- if (ch_ptr!=NULL && strcmp(ch_ptr, "=")==0) {
- ch_ptr=strtok (NULL, " , \t");
- pheno_mean=atof(ch_ptr);
- flag=1;
- }
- }
- }
-
- if (flag==1) {break;}
- }
-
- infile.close();
- infile.clear();
-
- return true;
+bool ReadFile_log(const string &file_log, double &pheno_mean) {
+ ifstream infile(file_log.c_str(), ifstream::in);
+ if (!infile) {
+ cout << "error! fail to open log file: " << file_log << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+ size_t flag = 0;
+
+ while (getline(infile, line)) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ if (ch_ptr != NULL && strcmp(ch_ptr, "estimated") == 0) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (ch_ptr != NULL && strcmp(ch_ptr, "mean") == 0) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (ch_ptr != NULL && strcmp(ch_ptr, "=") == 0) {
+ ch_ptr = strtok(NULL, " , \t");
+ pheno_mean = atof(ch_ptr);
+ flag = 1;
+ }
+ }
+ }
+
+ if (flag == 1) {
+ break;
+ }
+ }
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read bimbam annotation file.
-bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr,
- map<string, long int> &mapRS2bp,
- map<string, double> &mapRS2cM) {
- mapRS2chr.clear();
- mapRS2bp.clear();
-
- ifstream infile (file_anno.c_str(), ifstream::in);
- if (!infile) {
- cout << "error opening annotation file: " << file_anno << endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- string rs;
- long int b_pos;
- string chr;
- double cM;
-
- while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- rs=ch_ptr;
- ch_ptr=strtok (NULL, " , \t");
- if (strcmp(ch_ptr, "NA")==0) {
- b_pos=-9;
- } else {
- b_pos=atol(ch_ptr);
- }
- ch_ptr=strtok (NULL, " , \t");
- if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {
- chr="-9";
- } else {
- chr=ch_ptr;
- }
- ch_ptr=strtok (NULL, " , \t");
- if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {
- cM=-9;
- } else {
- cM=atof(ch_ptr);
- }
-
- mapRS2chr[rs]=chr;
- mapRS2bp[rs]=b_pos;
- mapRS2cM[rs]=cM;
- }
-
- infile.close();
- infile.clear();
-
- return true;
+bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr,
+ map<string, long int> &mapRS2bp,
+ map<string, double> &mapRS2cM) {
+ mapRS2chr.clear();
+ mapRS2bp.clear();
+
+ ifstream infile(file_anno.c_str(), ifstream::in);
+ if (!infile) {
+ cout << "error opening annotation file: " << file_anno << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ string rs;
+ long int b_pos;
+ string chr;
+ double cM;
+
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ rs = ch_ptr;
+ ch_ptr = strtok(NULL, " , \t");
+ if (strcmp(ch_ptr, "NA") == 0) {
+ b_pos = -9;
+ } else {
+ b_pos = atol(ch_ptr);
+ }
+ ch_ptr = strtok(NULL, " , \t");
+ if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) {
+ chr = "-9";
+ } else {
+ chr = ch_ptr;
+ }
+ ch_ptr = strtok(NULL, " , \t");
+ if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) {
+ cM = -9;
+ } else {
+ cM = atof(ch_ptr);
+ }
+
+ mapRS2chr[rs] = chr;
+ mapRS2bp[rs] = b_pos;
+ mapRS2cM[rs] = cM;
+ }
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read 1 column of phenotype.
-bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv,
- vector<double> &pheno, const int &p_column) {
- indicator_idv.clear();
- pheno.clear();
-
- igzstream infile (file_pheno.c_str(), igzstream::in);
- if (!infile) {
- cout << "error! fail to open phenotype file: " << file_pheno << endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- string id;
- double p;
- while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- for (int i=0; i<(p_column-1); ++i) {
- ch_ptr=strtok (NULL, " , \t");
- }
- if (strcmp(ch_ptr, "NA")==0) {
- indicator_idv.push_back(0);
- pheno.push_back(-9);
- }
- else {
-
- // Pheno is different from pimass2.
- p=atof(ch_ptr);
- indicator_idv.push_back(1);
- pheno.push_back(p);
- }
- }
-
- infile.close();
- infile.clear();
-
- return true;
+bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
+ vector<double> &pheno, const int &p_column) {
+ indicator_idv.clear();
+ pheno.clear();
+
+ igzstream infile(file_pheno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open phenotype file: " << file_pheno << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ string id;
+ double p;
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ for (int i = 0; i < (p_column - 1); ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ }
+ if (strcmp(ch_ptr, "NA") == 0) {
+ indicator_idv.push_back(0);
+ pheno.push_back(-9);
+ } else {
+
+ // Pheno is different from pimass2.
+ p = atof(ch_ptr);
+ indicator_idv.push_back(1);
+ pheno.push_back(p);
+ }
+ }
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read bimbam phenotype file, p_column=1, 2,...
-bool ReadFile_pheno (const string &file_pheno,
- vector<vector<int> > &indicator_pheno,
- vector<vector<double> > &pheno,
- const vector<size_t> &p_column) {
- indicator_pheno.clear();
- pheno.clear();
-
- igzstream infile (file_pheno.c_str(), igzstream::in);
- if (!infile) {
- cout << "error! fail to open phenotype file: " << file_pheno << endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- string id;
- double p;
-
- vector<double> pheno_row;
- vector<int> ind_pheno_row;
-
- size_t p_max=*max_element(p_column.begin(), p_column.end() );
- map<size_t, size_t> mapP2c;
- for (size_t i=0; i<p_column.size(); i++) {
- mapP2c[p_column[i]]=i;
- pheno_row.push_back(-9);
- ind_pheno_row.push_back(0);
- }
-
- while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
- size_t i=0;
- while (i<p_max ) {
- if (mapP2c.count(i+1)!=0) {
- if (strcmp(ch_ptr, "NA")==0) {
- ind_pheno_row[mapP2c[i+1]]=0;
- pheno_row[mapP2c[i+1]]=-9;
- }
- else {
- p=atof(ch_ptr);
- ind_pheno_row[mapP2c[i+1]]=1;
- pheno_row[mapP2c[i+1]]=p;
- }
- }
- i++;
- ch_ptr=strtok (NULL, " , \t");
- }
-
- indicator_pheno.push_back(ind_pheno_row);
- pheno.push_back(pheno_row);
- }
-
- infile.close();
- infile.clear();
-
- return true;
+bool ReadFile_pheno(const string &file_pheno,
+ vector<vector<int>> &indicator_pheno,
+ vector<vector<double>> &pheno,
+ const vector<size_t> &p_column) {
+ indicator_pheno.clear();
+ pheno.clear();
+
+ igzstream infile(file_pheno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open phenotype file: " << file_pheno << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ string id;
+ double p;
+
+ vector<double> pheno_row;
+ vector<int> ind_pheno_row;
+
+ size_t p_max = *max_element(p_column.begin(), p_column.end());
+ map<size_t, size_t> mapP2c;
+ for (size_t i = 0; i < p_column.size(); i++) {
+ mapP2c[p_column[i]] = i;
+ pheno_row.push_back(-9);
+ ind_pheno_row.push_back(0);
+ }
+
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+ size_t i = 0;
+ while (i < p_max) {
+ if (mapP2c.count(i + 1) != 0) {
+ if (strcmp(ch_ptr, "NA") == 0) {
+ ind_pheno_row[mapP2c[i + 1]] = 0;
+ pheno_row[mapP2c[i + 1]] = -9;
+ } else {
+ p = atof(ch_ptr);
+ ind_pheno_row[mapP2c[i + 1]] = 1;
+ pheno_row[mapP2c[i + 1]] = p;
+ }
+ }
+ i++;
+ ch_ptr = strtok(NULL, " , \t");
+ }
+
+ indicator_pheno.push_back(ind_pheno_row);
+ pheno.push_back(pheno_row);
+ }
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
-bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt,
- vector<vector<double> > &cvt, size_t &n_cvt) {
- indicator_cvt.clear();
-
- ifstream infile (file_cvt.c_str(), ifstream::in);
- if (!infile) {
- cout << "error! fail to open covariates file: " << file_cvt << endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
- double d;
-
- int flag_na=0;
-
- while (!safeGetline(infile, line).eof()) {
- vector<double> v_d; flag_na=0;
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- while (ch_ptr!=NULL) {
- if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
- else {d=atof(ch_ptr);}
-
- v_d.push_back(d);
- ch_ptr=strtok (NULL, " , \t");
- }
- if (flag_na==0) {
- indicator_cvt.push_back(1);
- } else {
- indicator_cvt.push_back(0);
- }
- cvt.push_back(v_d);
- }
-
- if (indicator_cvt.empty()) {n_cvt=0;}
- else {
- flag_na=0;
- for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
- if (indicator_cvt[i]==0) {
- continue;
- }
-
- if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
- if (flag_na!=0 && n_cvt!=cvt[i].size()) {
- cout << "error! number of covariates in row " <<
- i << " do not match other rows." << endl;
- return false;
- }
- }
- }
-
- infile.close();
- infile.clear();
-
- return true;
+bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt,
+ vector<vector<double>> &cvt, size_t &n_cvt) {
+ indicator_cvt.clear();
+
+ ifstream infile(file_cvt.c_str(), ifstream::in);
+ if (!infile) {
+ cout << "error! fail to open covariates file: " << file_cvt << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+ double d;
+
+ int flag_na = 0;
+
+ while (!safeGetline(infile, line).eof()) {
+ vector<double> v_d;
+ flag_na = 0;
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ while (ch_ptr != NULL) {
+ if (strcmp(ch_ptr, "NA") == 0) {
+ flag_na = 1;
+ d = -9;
+ } else {
+ d = atof(ch_ptr);
+ }
+
+ v_d.push_back(d);
+ ch_ptr = strtok(NULL, " , \t");
+ }
+ if (flag_na == 0) {
+ indicator_cvt.push_back(1);
+ } else {
+ indicator_cvt.push_back(0);
+ }
+ cvt.push_back(v_d);
+ }
+
+ if (indicator_cvt.empty()) {
+ n_cvt = 0;
+ } else {
+ flag_na = 0;
+ for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) {
+ if (indicator_cvt[i] == 0) {
+ continue;
+ }
+
+ if (flag_na == 0) {
+ flag_na = 1;
+ n_cvt = cvt[i].size();
+ }
+ if (flag_na != 0 && n_cvt != cvt[i].size()) {
+ cout << "error! number of covariates in row " << i
+ << " do not match other rows." << endl;
+ return false;
+ }
+ }
+ }
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read .bim file.
-bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo) {
- snpInfo.clear();
-
- ifstream infile (file_bim.c_str(), ifstream::in);
- if (!infile) {
- cout << "error opening .bim file: " << file_bim << endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- string rs;
- long int b_pos;
- string chr;
- double cM;
- string major;
- string minor;
-
- while (getline(infile, line)) {
- ch_ptr=strtok ((char *)line.c_str(), " \t");
- chr=ch_ptr;
- ch_ptr=strtok (NULL, " \t");
- rs=ch_ptr;
- ch_ptr=strtok (NULL, " \t");
- cM=atof(ch_ptr);
- ch_ptr=strtok (NULL, " \t");
- b_pos=atol(ch_ptr);
- ch_ptr=strtok (NULL, " \t");
- minor=ch_ptr;
- ch_ptr=strtok (NULL, " \t");
- major=ch_ptr;
-
- SNPINFO sInfo={chr, rs, cM, b_pos, minor, major,
- 0, -9, -9, 0, 0, 0};
- snpInfo.push_back(sInfo);
- }
-
- infile.close();
- infile.clear();
- return true;
+bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
+ snpInfo.clear();
+
+ ifstream infile(file_bim.c_str(), ifstream::in);
+ if (!infile) {
+ cout << "error opening .bim file: " << file_bim << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ string rs;
+ long int b_pos;
+ string chr;
+ double cM;
+ string major;
+ string minor;
+
+ while (getline(infile, line)) {
+ ch_ptr = strtok((char *)line.c_str(), " \t");
+ chr = ch_ptr;
+ ch_ptr = strtok(NULL, " \t");
+ rs = ch_ptr;
+ ch_ptr = strtok(NULL, " \t");
+ cM = atof(ch_ptr);
+ ch_ptr = strtok(NULL, " \t");
+ b_pos = atol(ch_ptr);
+ ch_ptr = strtok(NULL, " \t");
+ minor = ch_ptr;
+ ch_ptr = strtok(NULL, " \t");
+ major = ch_ptr;
+
+ SNPINFO sInfo = {chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0};
+ snpInfo.push_back(sInfo);
+ }
+
+ infile.close();
+ infile.clear();
+ return true;
}
// Read .fam file.
-bool ReadFile_fam (const string &file_fam,
- vector<vector<int> > &indicator_pheno,
- vector<vector<double> > &pheno,
- map<string, int> &mapID2num,
- const vector<size_t> &p_column) {
- indicator_pheno.clear();
- pheno.clear();
- mapID2num.clear();
-
- igzstream infile (file_fam.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error opening .fam file: "<<file_fam<<endl; return false;}
-
- string line;
- char *ch_ptr;
-
- string id;
- int c=0;
- double p;
-
- vector<double> pheno_row;
- vector<int> ind_pheno_row;
-
- size_t p_max=*max_element(p_column.begin(), p_column.end() );
- map<size_t, size_t> mapP2c;
- for (size_t i=0; i<p_column.size(); i++) {
- mapP2c[p_column[i]]=i;
- pheno_row.push_back(-9);
- ind_pheno_row.push_back(0);
- }
-
- while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " \t");
- ch_ptr=strtok (NULL, " \t");
- id=ch_ptr;
- ch_ptr=strtok (NULL, " \t");
- ch_ptr=strtok (NULL, " \t");
- ch_ptr=strtok (NULL, " \t");
- ch_ptr=strtok (NULL, " \t");
-
- size_t i=0;
- while (i<p_max ) {
- if (mapP2c.count(i+1)!=0 ) {
- if (strcmp(ch_ptr, "NA")==0) {
- ind_pheno_row[mapP2c[i+1]]=0;
- pheno_row[mapP2c[i+1]]=-9;
- } else {
- p=atof(ch_ptr);
-
- if (p==-9) {
- ind_pheno_row[mapP2c[i+1]]=0;
- pheno_row[mapP2c[i+1]]=-9;
- }
- else {
- ind_pheno_row[mapP2c[i+1]]=1;
- pheno_row[mapP2c[i+1]]=p;
- }
- }
- }
- i++;
- ch_ptr=strtok (NULL, " , \t");
- }
-
- indicator_pheno.push_back(ind_pheno_row);
- pheno.push_back(pheno_row);
-
- mapID2num[id]=c; c++;
- }
-
- infile.close();
- infile.clear();
- return true;
+bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
+ vector<vector<double>> &pheno, map<string, int> &mapID2num,
+ const vector<size_t> &p_column) {
+ indicator_pheno.clear();
+ pheno.clear();
+ mapID2num.clear();
+
+ igzstream infile(file_fam.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error opening .fam file: " << file_fam << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ string id;
+ int c = 0;
+ double p;
+
+ vector<double> pheno_row;
+ vector<int> ind_pheno_row;
+
+ size_t p_max = *max_element(p_column.begin(), p_column.end());
+ map<size_t, size_t> mapP2c;
+ for (size_t i = 0; i < p_column.size(); i++) {
+ mapP2c[p_column[i]] = i;
+ pheno_row.push_back(-9);
+ ind_pheno_row.push_back(0);
+ }
+
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr = strtok((char *)line.c_str(), " \t");
+ ch_ptr = strtok(NULL, " \t");
+ id = ch_ptr;
+ ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok(NULL, " \t");
+
+ size_t i = 0;
+ while (i < p_max) {
+ if (mapP2c.count(i + 1) != 0) {
+ if (strcmp(ch_ptr, "NA") == 0) {
+ ind_pheno_row[mapP2c[i + 1]] = 0;
+ pheno_row[mapP2c[i + 1]] = -9;
+ } else {
+ p = atof(ch_ptr);
+
+ if (p == -9) {
+ ind_pheno_row[mapP2c[i + 1]] = 0;
+ pheno_row[mapP2c[i + 1]] = -9;
+ } else {
+ ind_pheno_row[mapP2c[i + 1]] = 1;
+ pheno_row[mapP2c[i + 1]] = p;
+ }
+ }
+ }
+ i++;
+ ch_ptr = strtok(NULL, " , \t");
+ }
+
+ indicator_pheno.push_back(ind_pheno_row);
+ pheno.push_back(pheno_row);
+
+ mapID2num[id] = c;
+ c++;
+ }
+
+ infile.close();
+ infile.clear();
+ return true;
}
// Read bimbam mean genotype file, the first time, to obtain #SNPs for
// analysis (ns_test) and total #SNP (ns_total).
-bool ReadFile_geno (const string &file_geno, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, const double &maf_level,
- const double &miss_level, const double &hwe_level,
- const double &r2_level,
- map<string, string> &mapRS2chr,
- map<string, long int> &mapRS2bp,
- map<string, double> &mapRS2cM,
- vector<SNPINFO> &snpInfo,
- size_t &ns_test) {
- indicator_snp.clear();
- snpInfo.clear();
-
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return false;
- }
-
- gsl_vector *genotype=gsl_vector_alloc (W->size1);
- gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- double v_x, v_w;
- int c_idv=0;
-
- string line;
- char *ch_ptr;
-
- string rs;
- long int b_pos;
- string chr;
- string major;
- string minor;
- double cM;
- size_t file_pos;
-
- double maf, geno, geno_old;
- size_t n_miss;
- size_t n_0, n_1, n_2;
- int flag_poly;
-
- int ni_total=indicator_idv.size();
- int ni_test=0;
- for (int i=0; i<ni_total; ++i) {
- ni_test+=indicator_idv[i];
- }
- ns_test=0;
-
- file_pos=0;
- while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- rs=ch_ptr;
- ch_ptr=strtok (NULL, " , \t");
- minor=ch_ptr;
- ch_ptr=strtok (NULL, " , \t");
- major=ch_ptr;
-
- if (setSnps.size()!=0 && setSnps.count(rs)==0) {
- SNPINFO sInfo={"-9", rs, -9, -9, minor, major, 0, -9, -9,
- 0, 0, file_pos};
- snpInfo.push_back(sInfo);
- indicator_snp.push_back(0);
-
- file_pos++;
- continue;
- }
-
- if (mapRS2bp.count(rs)==0) {chr="-9"; b_pos=-9;cM=-9;}
- else {b_pos=mapRS2bp[rs]; chr=mapRS2chr[rs]; cM=mapRS2cM[rs];}
-
- maf=0; n_miss=0; flag_poly=0; geno_old=-9;
- n_0=0; n_1=0; n_2=0;
- c_idv=0; gsl_vector_set_zero (genotype_miss);
- for (int i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set (genotype_miss, c_idv, 1);
- n_miss++;
- c_idv++;
- continue;
- }
-
- geno=atof(ch_ptr);
- if (geno>=0 && geno<=0.5) {n_0++;}
- if (geno>0.5 && geno<1.5) {n_1++;}
- if (geno>=1.5 && geno<=2.0) {n_2++;}
-
- gsl_vector_set (genotype, c_idv, geno);
-
- if (flag_poly==0) {geno_old=geno; flag_poly=2;}
- if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
-
- maf+=geno;
-
- c_idv++;
- }
- maf/=2.0*(double)(ni_test-n_miss);
-
- SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss,
- (double)n_miss/(double)ni_test, maf,
- ni_test-n_miss, 0, file_pos};
- snpInfo.push_back(sInfo);
- file_pos++;
-
- if ( (double)n_miss/(double)ni_test > miss_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if (flag_poly!=1) {indicator_snp.push_back(0); continue;}
-
- if (hwe_level!=0 && maf_level!=-1) {
- if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
- indicator_snp.push_back(0);
- continue;
- }
- }
-
- // Filter SNP if it is correlated with W unless W has
- // only one column, of 1s.
- for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {
- geno=maf*2.0;
- gsl_vector_set (genotype, i, geno);
- }
- }
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
- gsl_blas_ddot (genotype, genotype, &v_x);
- gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-
- if (W->size2!=1 && v_w/v_x >= r2_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- indicator_snp.push_back(1);
- ns_test++;
- }
-
- gsl_vector_free (genotype);
- gsl_vector_free (genotype_miss);
- gsl_matrix_free (WtW);
- gsl_matrix_free (WtWi);
- gsl_vector_free (Wtx);
- gsl_vector_free (WtWiWtx);
- gsl_permutation_free (pmt);
-
- infile.close();
- infile.clear();
-
- return true;
+bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, const double &maf_level,
+ const double &miss_level, const double &hwe_level,
+ const double &r2_level, map<string, string> &mapRS2chr,
+ map<string, long int> &mapRS2bp,
+ map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
+ size_t &ns_test) {
+ indicator_snp.clear();
+ snpInfo.clear();
+
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return false;
+ }
+
+ gsl_vector *genotype = gsl_vector_alloc(W->size1);
+ gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ double v_x, v_w;
+ int c_idv = 0;
+
+ string line;
+ char *ch_ptr;
+
+ string rs;
+ long int b_pos;
+ string chr;
+ string major;
+ string minor;
+ double cM;
+ size_t file_pos;
+
+ double maf, geno, geno_old;
+ size_t n_miss;
+ size_t n_0, n_1, n_2;
+ int flag_poly;
+
+ int ni_total = indicator_idv.size();
+ int ni_test = 0;
+ for (int i = 0; i < ni_total; ++i) {
+ ni_test += indicator_idv[i];
+ }
+ ns_test = 0;
+
+ file_pos = 0;
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ rs = ch_ptr;
+ ch_ptr = strtok(NULL, " , \t");
+ minor = ch_ptr;
+ ch_ptr = strtok(NULL, " , \t");
+ major = ch_ptr;
+
+ if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
+ SNPINFO sInfo = {"-9", rs, -9, -9, minor, major,
+ 0, -9, -9, 0, 0, file_pos};
+ snpInfo.push_back(sInfo);
+ indicator_snp.push_back(0);
+
+ file_pos++;
+ continue;
+ }
+
+ if (mapRS2bp.count(rs) == 0) {
+ chr = "-9";
+ b_pos = -9;
+ cM = -9;
+ } else {
+ b_pos = mapRS2bp[rs];
+ chr = mapRS2chr[rs];
+ cM = mapRS2cM[rs];
+ }
+
+ maf = 0;
+ n_miss = 0;
+ flag_poly = 0;
+ geno_old = -9;
+ n_0 = 0;
+ n_1 = 0;
+ n_2 = 0;
+ c_idv = 0;
+ gsl_vector_set_zero(genotype_miss);
+ for (int i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(genotype_miss, c_idv, 1);
+ n_miss++;
+ c_idv++;
+ continue;
+ }
+
+ geno = atof(ch_ptr);
+ if (geno >= 0 && geno <= 0.5) {
+ n_0++;
+ }
+ if (geno > 0.5 && geno < 1.5) {
+ n_1++;
+ }
+ if (geno >= 1.5 && geno <= 2.0) {
+ n_2++;
+ }
+
+ gsl_vector_set(genotype, c_idv, geno);
+
+ if (flag_poly == 0) {
+ geno_old = geno;
+ flag_poly = 2;
+ }
+ if (flag_poly == 2 && geno != geno_old) {
+ flag_poly = 1;
+ }
+
+ maf += geno;
+
+ c_idv++;
+ }
+ maf /= 2.0 * (double)(ni_test - n_miss);
+
+ SNPINFO sInfo = {chr, rs,
+ cM, b_pos,
+ minor, major,
+ n_miss, (double)n_miss / (double)ni_test,
+ maf, ni_test - n_miss,
+ 0, file_pos};
+ snpInfo.push_back(sInfo);
+ file_pos++;
+
+ if ((double)n_miss / (double)ni_test > miss_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if (flag_poly != 1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if (hwe_level != 0 && maf_level != -1) {
+ if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+ }
+
+ // Filter SNP if it is correlated with W unless W has
+ // only one column, of 1s.
+ for (size_t i = 0; i < genotype->size; ++i) {
+ if (gsl_vector_get(genotype_miss, i) == 1) {
+ geno = maf * 2.0;
+ gsl_vector_set(genotype, i, geno);
+ }
+ }
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+ gsl_blas_ddot(genotype, genotype, &v_x);
+ gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
+
+ if (W->size2 != 1 && v_w / v_x >= r2_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ indicator_snp.push_back(1);
+ ns_test++;
+ }
+
+ gsl_vector_free(genotype);
+ gsl_vector_free(genotype_miss);
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(Wtx);
+ gsl_vector_free(WtWiWtx);
+ gsl_permutation_free(pmt);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read bed file, the first time.
-bool ReadFile_bed (const string &file_bed, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
- const double &maf_level, const double &miss_level,
- const double &hwe_level, const double &r2_level,
- size_t &ns_test) {
- indicator_snp.clear();
- size_t ns_total=snpInfo.size();
-
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return false;
- }
-
- gsl_vector *genotype=gsl_vector_alloc (W->size1);
- gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- double v_x, v_w, geno;
- size_t c_idv=0;
-
- char ch[1];
- bitset<8> b;
-
- size_t ni_total=indicator_idv.size();
- size_t ni_test=0;
- for (size_t i=0; i<ni_total; ++i) {
- ni_test+=indicator_idv[i];
- }
- ns_test=0;
-
- // Calculate n_bit and c, the number of bit for each snp.
- size_t n_bit;
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1;}
-
- // Ignore the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- double maf;
- size_t n_miss;
- size_t n_0, n_1, n_2, c;
-
- // Start reading snps and doing association test.
- for (size_t t=0; t<ns_total; ++t) {
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- if (setSnps.size()!=0 &&
- setSnps.count(snpInfo[t].rs_number) == 0) {
- snpInfo[t].n_miss=-9;
- snpInfo[t].missingness=-9;
- snpInfo[t].maf=-9;
- snpInfo[t].file_position=t;
- indicator_snp.push_back(0);
- continue;
- }
-
- // Read genotypes.
- c=0; maf=0.0; n_miss=0; n_0=0; n_1=0; n_2=0;
- c_idv=0; gsl_vector_set_zero (genotype_miss);
- for (size_t i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0;
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && c==ni_total) {break;}
- if (indicator_idv[c]==0) {c++; continue;}
- c++;
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(genotype, c_idv, 2.0);
- maf+=2.0;
- n_2++;
- }
- else {
- gsl_vector_set(genotype, c_idv, 1.0);
- maf+=1.0;
- n_1++;
- }
- }
- else {
- if (b[2*j+1]==1) {
- gsl_vector_set(genotype, c_idv, 0.0);
- maf+=0.0;
- n_0++;
- }
- else {
- gsl_vector_set(genotype_miss, c_idv, 1);
- n_miss++;
- }
- }
- c_idv++;
- }
- }
- maf/=2.0*(double)(ni_test-n_miss);
-
- snpInfo[t].n_miss=n_miss;
- snpInfo[t].missingness=(double)n_miss/(double)ni_test;
- snpInfo[t].maf=maf;
- snpInfo[t].n_idv=ni_test-n_miss;
- snpInfo[t].n_nb=0;
- snpInfo[t].file_position=t;
-
- if ( (double)n_miss/(double)ni_test > miss_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if (hwe_level!=0 && maf_level!=-1) {
- if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
- indicator_snp.push_back(0);
- continue;
- }
- }
-
- // Filter SNP if it is correlated with W unless W has
- // only one column, of 1s.
- for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {
- geno=maf*2.0;
- gsl_vector_set (genotype, i, geno);
- }
- }
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
- gsl_blas_ddot (genotype, genotype, &v_x);
- gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-
- if (W->size2!=1 && v_w/v_x > r2_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- indicator_snp.push_back(1);
- ns_test++;
- }
-
- gsl_vector_free (genotype);
- gsl_vector_free (genotype_miss);
- gsl_matrix_free (WtW);
- gsl_matrix_free (WtWi);
- gsl_vector_free (Wtx);
- gsl_vector_free (WtWiWtx);
- gsl_permutation_free (pmt);
-
- infile.close();
- infile.clear();
-
- return true;
+bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+ const double &maf_level, const double &miss_level,
+ const double &hwe_level, const double &r2_level,
+ size_t &ns_test) {
+ indicator_snp.clear();
+ size_t ns_total = snpInfo.size();
+
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return false;
+ }
+
+ gsl_vector *genotype = gsl_vector_alloc(W->size1);
+ gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ double v_x, v_w, geno;
+ size_t c_idv = 0;
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t ni_total = indicator_idv.size();
+ size_t ni_test = 0;
+ for (size_t i = 0; i < ni_total; ++i) {
+ ni_test += indicator_idv[i];
+ }
+ ns_test = 0;
+
+ // Calculate n_bit and c, the number of bit for each snp.
+ size_t n_bit;
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Ignore the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ double maf;
+ size_t n_miss;
+ size_t n_0, n_1, n_2, c;
+
+ // Start reading snps and doing association test.
+ for (size_t t = 0; t < ns_total; ++t) {
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ if (setSnps.size() != 0 && setSnps.count(snpInfo[t].rs_number) == 0) {
+ snpInfo[t].n_miss = -9;
+ snpInfo[t].missingness = -9;
+ snpInfo[t].maf = -9;
+ snpInfo[t].file_position = t;
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ // Read genotypes.
+ c = 0;
+ maf = 0.0;
+ n_miss = 0;
+ n_0 = 0;
+ n_1 = 0;
+ n_2 = 0;
+ c_idv = 0;
+ gsl_vector_set_zero(genotype_miss);
+ for (size_t i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && c == ni_total) {
+ break;
+ }
+ if (indicator_idv[c] == 0) {
+ c++;
+ continue;
+ }
+ c++;
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(genotype, c_idv, 2.0);
+ maf += 2.0;
+ n_2++;
+ } else {
+ gsl_vector_set(genotype, c_idv, 1.0);
+ maf += 1.0;
+ n_1++;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(genotype, c_idv, 0.0);
+ maf += 0.0;
+ n_0++;
+ } else {
+ gsl_vector_set(genotype_miss, c_idv, 1);
+ n_miss++;
+ }
+ }
+ c_idv++;
+ }
+ }
+ maf /= 2.0 * (double)(ni_test - n_miss);
+
+ snpInfo[t].n_miss = n_miss;
+ snpInfo[t].missingness = (double)n_miss / (double)ni_test;
+ snpInfo[t].maf = maf;
+ snpInfo[t].n_idv = ni_test - n_miss;
+ snpInfo[t].n_nb = 0;
+ snpInfo[t].file_position = t;
+
+ if ((double)n_miss / (double)ni_test > miss_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if ((n_0 + n_1) == 0 || (n_1 + n_2) == 0 || (n_2 + n_0) == 0) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if (hwe_level != 0 && maf_level != -1) {
+ if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+ }
+
+ // Filter SNP if it is correlated with W unless W has
+ // only one column, of 1s.
+ for (size_t i = 0; i < genotype->size; ++i) {
+ if (gsl_vector_get(genotype_miss, i) == 1) {
+ geno = maf * 2.0;
+ gsl_vector_set(genotype, i, geno);
+ }
+ }
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+ gsl_blas_ddot(genotype, genotype, &v_x);
+ gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
+
+ if (W->size2 != 1 && v_w / v_x > r2_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ indicator_snp.push_back(1);
+ ns_test++;
+ }
+
+ gsl_vector_free(genotype);
+ gsl_vector_free(genotype_miss);
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(Wtx);
+ gsl_vector_free(WtWiWtx);
+ gsl_permutation_free(pmt);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read the genotype for one SNP; remember to read empty lines.
// Geno stores original genotypes without centering.
// Missing values are replaced by mean.
-bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv,
- igzstream &infile, gsl_vector *geno,
- double &geno_mean) {
- size_t ni_total=indicator_idv.size();
+bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
+ igzstream &infile, gsl_vector *geno, double &geno_mean) {
+ size_t ni_total = indicator_idv.size();
string line;
char *ch_ptr;
- bool flag=false;
+ bool flag = false;
- for (size_t i=0; i<inc; i++) {
+ for (size_t i = 0; i < inc; i++) {
!safeGetline(infile, line).eof();
}
if (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
- geno_mean=0.0;
+ geno_mean = 0.0;
double d;
- size_t c_idv=0;
+ size_t c_idv = 0;
vector<size_t> geno_miss;
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
+ for (size_t i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
- if (strcmp(ch_ptr, "NA")==0) {
- geno_miss.push_back(c_idv);
+ if (strcmp(ch_ptr, "NA") == 0) {
+ geno_miss.push_back(c_idv);
} else {
- d=atof(ch_ptr);
- gsl_vector_set (geno, c_idv, d);
- geno_mean+=d;
+ d = atof(ch_ptr);
+ gsl_vector_set(geno, c_idv, d);
+ geno_mean += d;
}
c_idv++;
}
- geno_mean/=(double)(c_idv-geno_miss.size() );
+ geno_mean /= (double)(c_idv - geno_miss.size());
- for (size_t i=0; i<geno_miss.size(); ++i) {
+ for (size_t i = 0; i < geno_miss.size(); ++i) {
gsl_vector_set(geno, geno_miss[i], geno_mean);
}
- flag=true;
+ flag = true;
}
return flag;
}
// For PLINK, store SNPs as double too.
-void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv,
- ifstream &infile, gsl_vector *geno, double &geno_mean) {
- size_t ni_total=indicator_idv.size(), n_bit;
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1;}
+void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv,
+ ifstream &infile, gsl_vector *geno, double &geno_mean) {
+ size_t ni_total = indicator_idv.size(), n_bit;
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
// n_bit, and 3 is the number of magic numbers.
- infile.seekg(pos*n_bit+3);
+ infile.seekg(pos * n_bit + 3);
// Read genotypes.
char ch[1];
bitset<8> b;
- geno_mean=0.0;
- size_t c=0, c_idv=0;
+ geno_mean = 0.0;
+ size_t c = 0, c_idv = 0;
vector<size_t> geno_miss;
- for (size_t i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
+ for (size_t i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
// Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && c==ni_total) {break;}
- if (indicator_idv[c]==0) {c++; continue;}
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && c == ni_total) {
+ break;
+ }
+ if (indicator_idv[c] == 0) {
+ c++;
+ continue;
+ }
c++;
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set (geno, c_idv, 2);
- geno_mean+=2.0;
- } else {
- gsl_vector_set (geno, c_idv, 1);
- geno_mean+=1.0;
- }
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(geno, c_idv, 2);
+ geno_mean += 2.0;
+ } else {
+ gsl_vector_set(geno, c_idv, 1);
+ geno_mean += 1.0;
+ }
} else {
- if (b[2*j+1]==1) {
- gsl_vector_set (geno, c_idv, 0);
- geno_mean+=0.0;
- } else {
- geno_miss.push_back(c_idv);
- }
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(geno, c_idv, 0);
+ geno_mean += 0.0;
+ } else {
+ geno_miss.push_back(c_idv);
+ }
}
c_idv++;
}
}
- geno_mean/=(double)(c_idv-geno_miss.size());
+ geno_mean /= (double)(c_idv - geno_miss.size());
- for (size_t i=0; i<geno_miss.size(); ++i) {
+ for (size_t i = 0; i < geno_miss.size(); ++i) {
gsl_vector_set(geno, geno_miss[i], geno_mean);
}
return;
}
-void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv,
- map<string, int> &mapID2num, const size_t k_mode,
- bool &error, gsl_matrix *G) {
- igzstream infile (file_kin.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error! fail to open kinship file: "<<file_kin<<endl;
- error=true; return;
- }
-
- size_t ni_total=indicator_idv.size();
-
- gsl_matrix_set_zero (G);
-
- string line;
- char *ch_ptr;
- double d;
-
- if (k_mode==1) {
- size_t i_test=0, i_total=0, j_test=0, j_total=0;
- while (getline(infile, line)) {
- if (i_total==ni_total) {
- cout<<"error! number of rows in the kinship "<<
- "file is larger than the number of phentypes."<<
- endl;
- error=true;
- }
-
- if (indicator_idv[i_total]==0) {i_total++; continue;}
-
- j_total=0; j_test=0;
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- while (ch_ptr!=NULL) {
- if (j_total==ni_total) {
- cout<<"error! number of columns in the "<<
- "kinship file is larger than the number"<<
- " of phentypes for row = "<<i_total<<endl;
- error=true;
- }
-
- d=atof(ch_ptr);
- if (indicator_idv[j_total]==1) {
- gsl_matrix_set (G, i_test, j_test, d);
- j_test++;
- }
- j_total++;
-
- ch_ptr=strtok (NULL, " , \t");
- }
- if (j_total!=ni_total) {
- cout<<"error! number of columns in the kinship "<<
- "file do not match the number of phentypes for "<<
- "row = "<<i_total<<endl;
- error=true;
- }
- i_total++; i_test++;
- }
- if (i_total!=ni_total) {
- cout<<"error! number of rows in the kinship file do "<<
- "not match the number of phentypes."<<endl;
- error=true;
- }
- }
- else {
- map<size_t, size_t> mapID2ID;
- size_t c=0;
- for (size_t i=0; i<indicator_idv.size(); i++) {
- if (indicator_idv[i]==1) {mapID2ID[i]=c; c++;}
- }
-
- string id1, id2;
- double Cov_d;
- size_t n_id1, n_id2;
-
- while (getline(infile, line)) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- id1=ch_ptr;
- ch_ptr=strtok (NULL, " , \t");
- id2=ch_ptr;
- ch_ptr=strtok (NULL, " , \t");
- d=atof(ch_ptr);
- if (mapID2num.count(id1)==0 ||
- mapID2num.count(id2)==0) {
- continue;
- }
- if (indicator_idv[mapID2num[id1]]==0 ||
- indicator_idv[mapID2num[id2]]==0) {
- continue;
- }
-
- n_id1=mapID2ID[mapID2num[id1]];
- n_id2=mapID2ID[mapID2num[id2]];
-
- Cov_d=gsl_matrix_get(G, n_id1, n_id2);
- if (Cov_d!=0 && Cov_d!=d) {
- cout<<"error! redundant and unequal terms in the "<<
- "kinship file, for id1 = "<<id1<<" and id2 = "<<
- id2<<endl;
- }
- else {
- gsl_matrix_set(G, n_id1, n_id2, d);
- gsl_matrix_set(G, n_id2, n_id1, d);
- }
- }
- }
-
- infile.close();
- infile.clear();
-
- return;
+void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
+ map<string, int> &mapID2num, const size_t k_mode, bool &error,
+ gsl_matrix *G) {
+ igzstream infile(file_kin.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open kinship file: " << file_kin << endl;
+ error = true;
+ return;
+ }
+
+ size_t ni_total = indicator_idv.size();
+
+ gsl_matrix_set_zero(G);
+
+ string line;
+ char *ch_ptr;
+ double d;
+
+ if (k_mode == 1) {
+ size_t i_test = 0, i_total = 0, j_test = 0, j_total = 0;
+ while (getline(infile, line)) {
+ if (i_total == ni_total) {
+ cout << "error! number of rows in the kinship "
+ << "file is larger than the number of phentypes." << endl;
+ error = true;
+ }
+
+ if (indicator_idv[i_total] == 0) {
+ i_total++;
+ continue;
+ }
+
+ j_total = 0;
+ j_test = 0;
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ while (ch_ptr != NULL) {
+ if (j_total == ni_total) {
+ cout << "error! number of columns in the "
+ << "kinship file is larger than the number"
+ << " of phentypes for row = " << i_total << endl;
+ error = true;
+ }
+
+ d = atof(ch_ptr);
+ if (indicator_idv[j_total] == 1) {
+ gsl_matrix_set(G, i_test, j_test, d);
+ j_test++;
+ }
+ j_total++;
+
+ ch_ptr = strtok(NULL, " , \t");
+ }
+ if (j_total != ni_total) {
+ cout << "error! number of columns in the kinship "
+ << "file do not match the number of phentypes for "
+ << "row = " << i_total << endl;
+ error = true;
+ }
+ i_total++;
+ i_test++;
+ }
+ if (i_total != ni_total) {
+ cout << "error! number of rows in the kinship file do "
+ << "not match the number of phentypes." << endl;
+ error = true;
+ }
+ } else {
+ map<size_t, size_t> mapID2ID;
+ size_t c = 0;
+ for (size_t i = 0; i < indicator_idv.size(); i++) {
+ if (indicator_idv[i] == 1) {
+ mapID2ID[i] = c;
+ c++;
+ }
+ }
+
+ string id1, id2;
+ double Cov_d;
+ size_t n_id1, n_id2;
+
+ while (getline(infile, line)) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ id1 = ch_ptr;
+ ch_ptr = strtok(NULL, " , \t");
+ id2 = ch_ptr;
+ ch_ptr = strtok(NULL, " , \t");
+ d = atof(ch_ptr);
+ if (mapID2num.count(id1) == 0 || mapID2num.count(id2) == 0) {
+ continue;
+ }
+ if (indicator_idv[mapID2num[id1]] == 0 ||
+ indicator_idv[mapID2num[id2]] == 0) {
+ continue;
+ }
+
+ n_id1 = mapID2ID[mapID2num[id1]];
+ n_id2 = mapID2ID[mapID2num[id2]];
+
+ Cov_d = gsl_matrix_get(G, n_id1, n_id2);
+ if (Cov_d != 0 && Cov_d != d) {
+ cout << "error! redundant and unequal terms in the "
+ << "kinship file, for id1 = " << id1 << " and id2 = " << id2
+ << endl;
+ } else {
+ gsl_matrix_set(G, n_id1, n_id2, d);
+ gsl_matrix_set(G, n_id2, n_id1, d);
+ }
+ }
+ }
+
+ infile.close();
+ infile.clear();
+
+ return;
}
-void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv,
- map<string, int> &mapID2num, const size_t k_mode,
- bool &error, gsl_matrix *G) {
- igzstream infile (file_mk.c_str(), igzstream::in);
- if (!infile) {cout<<"error! fail to open file: "<<file_mk<<endl;
- error=true;
- return;
- }
-
- string file_kin, line;
-
- size_t i=0;
- while (getline(infile, line)) {
- file_kin=line.c_str();
- gsl_matrix_view G_sub=gsl_matrix_submatrix(G, 0, i*G->size1,
- G->size1, G->size1);
- ReadFile_kin (file_kin, indicator_idv, mapID2num, k_mode,
- error, &G_sub.matrix);
- i++;
- }
-
- infile.close();
- infile.clear();
- return;
+void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
+ map<string, int> &mapID2num, const size_t k_mode, bool &error,
+ gsl_matrix *G) {
+ igzstream infile(file_mk.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open file: " << file_mk << endl;
+ error = true;
+ return;
+ }
+
+ string file_kin, line;
+
+ size_t i = 0;
+ while (getline(infile, line)) {
+ file_kin = line.c_str();
+ gsl_matrix_view G_sub =
+ gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1);
+ ReadFile_kin(file_kin, indicator_idv, mapID2num, k_mode, error,
+ &G_sub.matrix);
+ i++;
+ }
+
+ infile.close();
+ infile.clear();
+ return;
}
-void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U) {
- igzstream infile (file_ku.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error! fail to open the U file: "<<file_ku<<endl;
- error=true;
- return;
- }
-
- size_t n_row=U->size1, n_col=U->size2, i_row=0, i_col=0;
-
- gsl_matrix_set_zero (U);
-
- string line;
- char *ch_ptr;
- double d;
-
- while (getline(infile, line)) {
- if (i_row==n_row) {
- cout<<"error! number of rows in the U file is larger "<<
- "than expected."<<endl;
- error=true;
- }
-
- i_col=0;
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- while (ch_ptr!=NULL) {
- if (i_col==n_col) {
- cout<<"error! number of columns in the U file "<<
- "is larger than expected, for row = "<<
- i_row<<endl;
- error=true;
- }
-
- d=atof(ch_ptr);
- gsl_matrix_set (U, i_row, i_col, d);
- i_col++;
-
- ch_ptr=strtok (NULL, " , \t");
- }
-
- i_row++;
- }
-
- infile.close();
- infile.clear();
-
- return;
+void ReadFile_eigenU(const string &file_ku, bool &error, gsl_matrix *U) {
+ igzstream infile(file_ku.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open the U file: " << file_ku << endl;
+ error = true;
+ return;
+ }
+
+ size_t n_row = U->size1, n_col = U->size2, i_row = 0, i_col = 0;
+
+ gsl_matrix_set_zero(U);
+
+ string line;
+ char *ch_ptr;
+ double d;
+
+ while (getline(infile, line)) {
+ if (i_row == n_row) {
+ cout << "error! number of rows in the U file is larger "
+ << "than expected." << endl;
+ error = true;
+ }
+
+ i_col = 0;
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ while (ch_ptr != NULL) {
+ if (i_col == n_col) {
+ cout << "error! number of columns in the U file "
+ << "is larger than expected, for row = " << i_row << endl;
+ error = true;
+ }
+
+ d = atof(ch_ptr);
+ gsl_matrix_set(U, i_row, i_col, d);
+ i_col++;
+
+ ch_ptr = strtok(NULL, " , \t");
+ }
+
+ i_row++;
+ }
+
+ infile.close();
+ infile.clear();
+
+ return;
}
-void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval) {
- igzstream infile (file_kd.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error! fail to open the D file: "<<file_kd<<endl;
- error=true;
- return;
- }
+void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
+ igzstream infile(file_kd.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open the D file: " << file_kd << endl;
+ error = true;
+ return;
+ }
- size_t n_row=eval->size, i_row=0;
+ size_t n_row = eval->size, i_row = 0;
- gsl_vector_set_zero (eval);
+ gsl_vector_set_zero(eval);
- string line;
- char *ch_ptr;
- double d;
+ string line;
+ char *ch_ptr;
+ double d;
- while (getline(infile, line)) {
- if (i_row==n_row) {
- cout<<"error! number of rows in the D file is larger "<<
- "than expected."<<endl;
- error=true;
- }
+ while (getline(infile, line)) {
+ if (i_row == n_row) {
+ cout << "error! number of rows in the D file is larger "
+ << "than expected." << endl;
+ error = true;
+ }
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- d=atof(ch_ptr);
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ d = atof(ch_ptr);
- ch_ptr=strtok (NULL, " , \t");
- if (ch_ptr!=NULL) {
- cout<<"error! number of columns in the D file is larger "<<
- "than expected, for row = "<<i_row<<endl;
- error=true;
- }
+ ch_ptr = strtok(NULL, " , \t");
+ if (ch_ptr != NULL) {
+ cout << "error! number of columns in the D file is larger "
+ << "than expected, for row = " << i_row << endl;
+ error = true;
+ }
- gsl_vector_set (eval, i_row, d);
+ gsl_vector_set(eval, i_row, d);
- i_row++;
- }
+ i_row++;
+ }
- infile.close();
- infile.clear();
+ infile.close();
+ infile.clear();
- return;
+ return;
}
// Read bimbam mean genotype file and calculate kinship matrix.
-bool BimbamKin (const string &file_geno, vector<int> &indicator_snp,
- const int k_mode, const int display_pace,
- gsl_matrix *matrix_kin) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- size_t n_miss;
- double d, geno_mean, geno_var;
-
- size_t ni_total=matrix_kin->size1;
- gsl_vector *geno=gsl_vector_alloc (ni_total);
- gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize);
- gsl_matrix_set_zero(Xlarge);
-
- size_t ns_test=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
- ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- geno_mean=0.0; n_miss=0; geno_var=0.0;
- gsl_vector_set_all(geno_miss, 0);
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(geno_miss, i, 0); n_miss++;
- } else {
- d=atof(ch_ptr);
- gsl_vector_set (geno, i, d);
- gsl_vector_set (geno_miss, i, 1);
- geno_mean+=d;
- geno_var+=d*d;
- }
- }
-
- geno_mean/=(double)(ni_total-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_total;
- geno_var-=geno_mean*geno_mean;
-
- for (size_t i=0; i<ni_total; ++i) {
- if (gsl_vector_get (geno_miss, i)==0) {
- gsl_vector_set(geno, i, geno_mean);
- }
- }
-
- gsl_vector_add_constant (geno, -1.0*geno_mean);
-
- if (k_mode==2 && geno_var!=0) {
- gsl_vector_scale (geno, 1.0/sqrt(geno_var));
- }
- gsl_vector_view Xlarge_col=
- gsl_matrix_column (Xlarge, ns_test%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, geno);
-
- ns_test++;
-
- if (ns_test%msize==0) {
- eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0,
- matrix_kin);
- gsl_matrix_set_zero(Xlarge);
- }
- }
-
- if (ns_test%msize!=0) {
- eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
- }
- cout<<endl;
-
- gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
-
- for (size_t i=0; i<ni_total; ++i) {
- for (size_t j=0; j<i; ++j) {
- d=gsl_matrix_get (matrix_kin, j, i);
- gsl_matrix_set (matrix_kin, i, j, d);
- }
- }
-
- gsl_vector_free (geno);
- gsl_vector_free (geno_miss);
- gsl_matrix_free (Xlarge);
-
- infile.close();
- infile.clear();
-
- return true;
+bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace,
+ gsl_matrix *matrix_kin) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_total = matrix_kin->size1;
+ gsl_vector *geno = gsl_vector_alloc(ni_total);
+ gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+ gsl_matrix_set_zero(Xlarge);
+
+ size_t ns_test = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ geno_mean = 0.0;
+ n_miss = 0;
+ geno_var = 0.0;
+ gsl_vector_set_all(geno_miss, 0);
+ for (size_t i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(geno_miss, i, 0);
+ n_miss++;
+ } else {
+ d = atof(ch_ptr);
+ gsl_vector_set(geno, i, d);
+ gsl_vector_set(geno_miss, i, 1);
+ geno_mean += d;
+ geno_var += d * d;
+ }
+ }
+
+ geno_mean /= (double)(ni_total - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_total;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_total; ++i) {
+ if (gsl_vector_get(geno_miss, i) == 0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
+
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+ if (k_mode == 2 && geno_var != 0) {
+ gsl_vector_scale(geno, 1.0 / sqrt(geno_var));
+ }
+ gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, geno);
+
+ ns_test++;
+
+ if (ns_test % msize == 0) {
+ eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ gsl_matrix_set_zero(Xlarge);
+ }
+ }
+
+ if (ns_test % msize != 0) {
+ eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ }
+ cout << endl;
+
+ gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
+
+ for (size_t i = 0; i < ni_total; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ d = gsl_matrix_get(matrix_kin, j, i);
+ gsl_matrix_set(matrix_kin, i, j, d);
+ }
+ }
+
+ gsl_vector_free(geno);
+ gsl_vector_free(geno_miss);
+ gsl_matrix_free(Xlarge);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
-bool PlinkKin (const string &file_bed, vector<int> &indicator_snp,
- const int k_mode, const int display_pace,
- gsl_matrix *matrix_kin) {
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return false;
- }
-
- char ch[1];
- bitset<8> b;
-
- size_t n_miss, ci_total;
- double d, geno_mean, geno_var;
-
- size_t ni_total=matrix_kin->size1;
- gsl_vector *geno=gsl_vector_alloc (ni_total);
-
- size_t ns_test=0;
- int n_bit;
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize);
- gsl_matrix_set_zero(Xlarge);
-
- // Calculate n_bit and c, the number of bit for each snp.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
-
- //print the first three magic numbers
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
- ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && ci_total==ni_total) {
- break;
- }
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(geno, ci_total, 2.0);
- geno_mean+=2.0;
- geno_var+=4.0;
- }
- else {
- gsl_vector_set(geno, ci_total, 1.0);
- geno_mean+=1.0;
- geno_var+=1.0;
- }
- }
- else {
- if (b[2*j+1]==1) {
- gsl_vector_set(geno,ci_total,0.0);
- }
- else {
- gsl_vector_set(geno,ci_total,-9.0);
- n_miss++;
- }
- }
-
- ci_total++;
- }
- }
-
- geno_mean/=(double)(ni_total-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_total;
- geno_var-=geno_mean*geno_mean;
-
- for (size_t i=0; i<ni_total; ++i) {
- d=gsl_vector_get(geno,i);
- if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
- }
-
- gsl_vector_add_constant (geno, -1.0*geno_mean);
-
- if (k_mode==2 && geno_var!=0) {
- gsl_vector_scale (geno, 1.0/sqrt(geno_var));
- }
- gsl_vector_view Xlarge_col=
- gsl_matrix_column (Xlarge, ns_test%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, geno);
-
- ns_test++;
-
- if (ns_test%msize==0) {
- eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
- gsl_matrix_set_zero(Xlarge);
- }
- }
-
- if (ns_test%msize!=0) {
- eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
- }
-
- cout<<endl;
-
- gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
-
- for (size_t i=0; i<ni_total; ++i) {
- for (size_t j=0; j<i; ++j) {
- d=gsl_matrix_get (matrix_kin, j, i);
- gsl_matrix_set (matrix_kin, i, j, d);
- }
- }
-
- gsl_vector_free (geno);
- gsl_matrix_free (Xlarge);
-
- infile.close();
- infile.clear();
-
- return true;
+bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace,
+ gsl_matrix *matrix_kin) {
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return false;
+ }
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t n_miss, ci_total;
+ double d, geno_mean, geno_var;
+
+ size_t ni_total = matrix_kin->size1;
+ gsl_vector *geno = gsl_vector_alloc(ni_total);
+
+ size_t ns_test = 0;
+ int n_bit;
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+ gsl_matrix_set_zero(Xlarge);
+
+ // Calculate n_bit and c, the number of bit for each snp.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // print the first three magic numbers
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ geno_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ geno_var = 0.0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == ni_total) {
+ break;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(geno, ci_total, 2.0);
+ geno_mean += 2.0;
+ geno_var += 4.0;
+ } else {
+ gsl_vector_set(geno, ci_total, 1.0);
+ geno_mean += 1.0;
+ geno_var += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(geno, ci_total, 0.0);
+ } else {
+ gsl_vector_set(geno, ci_total, -9.0);
+ n_miss++;
+ }
+ }
+
+ ci_total++;
+ }
+ }
+
+ geno_mean /= (double)(ni_total - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_total;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_total; ++i) {
+ d = gsl_vector_get(geno, i);
+ if (d == -9.0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
+
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+ if (k_mode == 2 && geno_var != 0) {
+ gsl_vector_scale(geno, 1.0 / sqrt(geno_var));
+ }
+ gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, geno);
+
+ ns_test++;
+
+ if (ns_test % msize == 0) {
+ eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ gsl_matrix_set_zero(Xlarge);
+ }
+ }
+
+ if (ns_test % msize != 0) {
+ eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ }
+
+ cout << endl;
+
+ gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
+
+ for (size_t i = 0; i < ni_total; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ d = gsl_matrix_get(matrix_kin, j, i);
+ gsl_matrix_set(matrix_kin, i, j, d);
+ }
+ }
+
+ gsl_vector_free(geno);
+ gsl_matrix_free(Xlarge);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read bimbam mean genotype file, the second time, recode "mean"
// genotype and calculate K.
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
- vector<int> &indicator_snp, gsl_matrix *UtX,
- gsl_matrix *K, const bool calc_K) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- if (calc_K==true) {gsl_matrix_set_zero (K);}
-
- gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
- gsl_vector *genotype_miss=gsl_vector_alloc (UtX->size1);
- double geno, geno_mean;
- size_t n_miss;
-
- int ni_total=(int)indicator_idv.size();
- int ns_total=(int)indicator_snp.size();
- int ni_test=UtX->size1;
- int ns_test=UtX->size2;
-
- int c_idv=0, c_snp=0;
-
- for (int i=0; i<ns_total; ++i) {
- !safeGetline(infile, line).eof();
- if (indicator_snp[i]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- c_idv=0; geno_mean=0; n_miss=0;
- gsl_vector_set_zero (genotype_miss);
- for (int j=0; j<ni_total; ++j) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[j]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set (genotype_miss, c_idv, 1);
- n_miss++;
- } else {
- geno=atof(ch_ptr);
- gsl_vector_set (genotype, c_idv, geno);
- geno_mean+=geno;
- }
- c_idv++;
- }
-
- geno_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {
- geno=0;
- }
- else {
- geno=gsl_vector_get (genotype, i);
- geno-=geno_mean;
- }
-
- gsl_vector_set (genotype, i, geno);
- gsl_matrix_set (UtX, i, c_snp, geno);
- }
-
- if (calc_K==true) {
- gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
- }
-
- c_snp++;
- }
-
- if (calc_K==true) {
- gsl_matrix_scale (K, 1.0/(double)ns_test);
-
- for (size_t i=0; i<genotype->size; ++i) {
- for (size_t j=0; j<i; ++j) {
- geno=gsl_matrix_get (K, j, i);
- gsl_matrix_set (K, i, j, geno);
- }
- }
- }
-
- gsl_vector_free (genotype);
- gsl_vector_free (genotype_miss);
-
- infile.clear();
- infile.close();
-
- return true;
+bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
+ const bool calc_K) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ if (calc_K == true) {
+ gsl_matrix_set_zero(K);
+ }
+
+ gsl_vector *genotype = gsl_vector_alloc(UtX->size1);
+ gsl_vector *genotype_miss = gsl_vector_alloc(UtX->size1);
+ double geno, geno_mean;
+ size_t n_miss;
+
+ int ni_total = (int)indicator_idv.size();
+ int ns_total = (int)indicator_snp.size();
+ int ni_test = UtX->size1;
+ int ns_test = UtX->size2;
+
+ int c_idv = 0, c_snp = 0;
+
+ for (int i = 0; i < ns_total; ++i) {
+ !safeGetline(infile, line).eof();
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ c_idv = 0;
+ geno_mean = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(genotype_miss);
+ for (int j = 0; j < ni_total; ++j) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[j] == 0) {
+ continue;
+ }
+
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(genotype_miss, c_idv, 1);
+ n_miss++;
+ } else {
+ geno = atof(ch_ptr);
+ gsl_vector_set(genotype, c_idv, geno);
+ geno_mean += geno;
+ }
+ c_idv++;
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < genotype->size; ++i) {
+ if (gsl_vector_get(genotype_miss, i) == 1) {
+ geno = 0;
+ } else {
+ geno = gsl_vector_get(genotype, i);
+ geno -= geno_mean;
+ }
+
+ gsl_vector_set(genotype, i, geno);
+ gsl_matrix_set(UtX, i, c_snp, geno);
+ }
+
+ if (calc_K == true) {
+ gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
+ }
+
+ c_snp++;
+ }
+
+ if (calc_K == true) {
+ gsl_matrix_scale(K, 1.0 / (double)ns_test);
+
+ for (size_t i = 0; i < genotype->size; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ geno = gsl_matrix_get(K, j, i);
+ gsl_matrix_set(K, i, j, geno);
+ }
+ }
+ }
+
+ gsl_vector_free(genotype);
+ gsl_vector_free(genotype_miss);
+
+ infile.clear();
+ infile.close();
+
+ return true;
}
// Compact version of the above function, using uchar instead of
// gsl_matrix.
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
- vector<int> &indicator_snp,
- vector<vector<unsigned char> > &Xt,
- gsl_matrix *K, const bool calc_K, const size_t ni_test,
- const size_t ns_test) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return false;
- }
-
- Xt.clear();
- vector<unsigned char> Xt_row;
- for (size_t i=0; i<ni_test; i++) {
- Xt_row.push_back(0);
- }
-
- string line;
- char *ch_ptr;
-
- if (calc_K==true) {gsl_matrix_set_zero (K);}
-
- gsl_vector *genotype=gsl_vector_alloc (ni_test);
- gsl_vector *genotype_miss=gsl_vector_alloc (ni_test);
- double geno, geno_mean;
- size_t n_miss;
-
- size_t ni_total= indicator_idv.size();
- size_t ns_total= indicator_snp.size();
-
- size_t c_idv=0, c_snp=0;
-
- for (size_t i=0; i<ns_total; ++i) {
- !safeGetline(infile, line).eof();
- if (indicator_snp[i]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- c_idv=0; geno_mean=0; n_miss=0;
- gsl_vector_set_zero (genotype_miss);
- for (uint j=0; j<ni_total; ++j) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[j]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set (genotype_miss, c_idv, 1);
- n_miss++;
- } else {
- geno=atof(ch_ptr);
- gsl_vector_set (genotype, c_idv, geno);
- geno_mean+=geno;
- }
- c_idv++;
- }
-
- geno_mean/=(double)(ni_test-n_miss);
-
- for (size_t j=0; j<genotype->size; ++j) {
- if (gsl_vector_get (genotype_miss, j)==1) {
- geno=geno_mean;
- } else {
- geno=gsl_vector_get (genotype, j);
- }
-
- Xt_row[j]=Double02ToUchar(geno);
- gsl_vector_set (genotype, j, (geno-geno_mean));
- }
- Xt.push_back(Xt_row);
-
- if (calc_K==true) {
- gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
- }
-
- c_snp++;
- }
-
- if (calc_K==true) {
- gsl_matrix_scale (K, 1.0/(double)ns_test);
-
- for (size_t i=0; i<genotype->size; ++i) {
- for (size_t j=0; j<i; ++j) {
- geno=gsl_matrix_get (K, j, i);
- gsl_matrix_set (K, i, j, geno);
- }
- }
- }
-
- gsl_vector_free (genotype);
- gsl_vector_free (genotype_miss);
-
- infile.clear();
- infile.close();
-
- return true;
+bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+ vector<int> &indicator_snp,
+ vector<vector<unsigned char>> &Xt, gsl_matrix *K,
+ const bool calc_K, const size_t ni_test,
+ const size_t ns_test) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return false;
+ }
+
+ Xt.clear();
+ vector<unsigned char> Xt_row;
+ for (size_t i = 0; i < ni_test; i++) {
+ Xt_row.push_back(0);
+ }
+
+ string line;
+ char *ch_ptr;
+
+ if (calc_K == true) {
+ gsl_matrix_set_zero(K);
+ }
+
+ gsl_vector *genotype = gsl_vector_alloc(ni_test);
+ gsl_vector *genotype_miss = gsl_vector_alloc(ni_test);
+ double geno, geno_mean;
+ size_t n_miss;
+
+ size_t ni_total = indicator_idv.size();
+ size_t ns_total = indicator_snp.size();
+
+ size_t c_idv = 0, c_snp = 0;
+
+ for (size_t i = 0; i < ns_total; ++i) {
+ !safeGetline(infile, line).eof();
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ c_idv = 0;
+ geno_mean = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(genotype_miss);
+ for (uint j = 0; j < ni_total; ++j) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[j] == 0) {
+ continue;
+ }
+
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(genotype_miss, c_idv, 1);
+ n_miss++;
+ } else {
+ geno = atof(ch_ptr);
+ gsl_vector_set(genotype, c_idv, geno);
+ geno_mean += geno;
+ }
+ c_idv++;
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+
+ for (size_t j = 0; j < genotype->size; ++j) {
+ if (gsl_vector_get(genotype_miss, j) == 1) {
+ geno = geno_mean;
+ } else {
+ geno = gsl_vector_get(genotype, j);
+ }
+
+ Xt_row[j] = Double02ToUchar(geno);
+ gsl_vector_set(genotype, j, (geno - geno_mean));
+ }
+ Xt.push_back(Xt_row);
+
+ if (calc_K == true) {
+ gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
+ }
+
+ c_snp++;
+ }
+
+ if (calc_K == true) {
+ gsl_matrix_scale(K, 1.0 / (double)ns_test);
+
+ for (size_t i = 0; i < genotype->size; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ geno = gsl_matrix_get(K, j, i);
+ gsl_matrix_set(K, i, j, geno);
+ }
+ }
+ }
+
+ gsl_vector_free(genotype);
+ gsl_vector_free(genotype_miss);
+
+ infile.clear();
+ infile.close();
+
+ return true;
}
// Read bimbam mean genotype file, the second time, recode "mean"
// genotype and calculate K.
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
- vector<int> &indicator_snp, gsl_matrix *UtX,
- gsl_matrix *K, const bool calc_K) {
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return false;
- }
-
- char ch[1];
- bitset<8> b;
-
- size_t ni_total=indicator_idv.size();
- size_t ns_total=indicator_snp.size();
- size_t ni_test=UtX->size1;
- size_t ns_test=UtX->size2;
- int n_bit;
-
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1;}
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- if (calc_K==true) {gsl_matrix_set_zero (K);}
-
- gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
-
- double geno, geno_mean;
- size_t n_miss;
- size_t c_idv=0, c_snp=0, c=0;
-
- // Start reading snps and doing association test.
- for (size_t t=0; t<ns_total; ++t) {
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- c_idv=0; geno_mean=0.0; n_miss=0; c=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && c==ni_total) {break;}
- if (indicator_idv[c]==0) {c++; continue;}
- c++;
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(genotype, c_idv, 2.0);
- geno_mean+=2.0;
- }
- else {
- gsl_vector_set(genotype, c_idv, 1.0);
- geno_mean+=1.0;
- }
- }
- else {
- if (b[2*j+1]==1) {
- gsl_vector_set(genotype, c_idv, 0.0);
- geno_mean+=0.0;
- }
- else {
- gsl_vector_set(genotype, c_idv, -9.0);
- n_miss++;
- }
- }
- c_idv++;
- }
- }
-
- geno_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<genotype->size; ++i) {
- geno=gsl_vector_get (genotype, i);
- if (geno==-9) {geno=0;}
- else {geno-=geno_mean;}
-
- gsl_vector_set (genotype, i, geno);
- gsl_matrix_set (UtX, i, c_snp, geno);
- }
-
- if (calc_K==true) {
- gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
- }
-
- c_snp++;
- }
-
- if (calc_K==true) {
- gsl_matrix_scale (K, 1.0/(double)ns_test);
-
- for (size_t i=0; i<genotype->size; ++i) {
- for (size_t j=0; j<i; ++j) {
- geno=gsl_matrix_get (K, j, i);
- gsl_matrix_set (K, i, j, geno);
- }
- }
- }
-
- gsl_vector_free (genotype);
- infile.clear();
- infile.close();
-
- return true;
+bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
+ const bool calc_K) {
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return false;
+ }
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t ni_total = indicator_idv.size();
+ size_t ns_total = indicator_snp.size();
+ size_t ni_test = UtX->size1;
+ size_t ns_test = UtX->size2;
+ int n_bit;
+
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ if (calc_K == true) {
+ gsl_matrix_set_zero(K);
+ }
+
+ gsl_vector *genotype = gsl_vector_alloc(UtX->size1);
+
+ double geno, geno_mean;
+ size_t n_miss;
+ size_t c_idv = 0, c_snp = 0, c = 0;
+
+ // Start reading snps and doing association test.
+ for (size_t t = 0; t < ns_total; ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ c_idv = 0;
+ geno_mean = 0.0;
+ n_miss = 0;
+ c = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && c == ni_total) {
+ break;
+ }
+ if (indicator_idv[c] == 0) {
+ c++;
+ continue;
+ }
+ c++;
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(genotype, c_idv, 2.0);
+ geno_mean += 2.0;
+ } else {
+ gsl_vector_set(genotype, c_idv, 1.0);
+ geno_mean += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(genotype, c_idv, 0.0);
+ geno_mean += 0.0;
+ } else {
+ gsl_vector_set(genotype, c_idv, -9.0);
+ n_miss++;
+ }
+ }
+ c_idv++;
+ }
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < genotype->size; ++i) {
+ geno = gsl_vector_get(genotype, i);
+ if (geno == -9) {
+ geno = 0;
+ } else {
+ geno -= geno_mean;
+ }
+
+ gsl_vector_set(genotype, i, geno);
+ gsl_matrix_set(UtX, i, c_snp, geno);
+ }
+
+ if (calc_K == true) {
+ gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
+ }
+
+ c_snp++;
+ }
+
+ if (calc_K == true) {
+ gsl_matrix_scale(K, 1.0 / (double)ns_test);
+
+ for (size_t i = 0; i < genotype->size; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ geno = gsl_matrix_get(K, j, i);
+ gsl_matrix_set(K, i, j, geno);
+ }
+ }
+ }
+
+ gsl_vector_free(genotype);
+ infile.clear();
+ infile.close();
+
+ return true;
}
// Compact version of the above function, using uchar instead of gsl_matrix.
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
- vector<int> &indicator_snp,
- vector<vector<unsigned char> > &Xt, gsl_matrix *K,
- const bool calc_K, const size_t ni_test,
- const size_t ns_test) {
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return false;
- }
-
- Xt.clear();
- vector<unsigned char> Xt_row;
- for (size_t i=0; i<ni_test; i++) {
- Xt_row.push_back(0);
- }
-
- char ch[1];
- bitset<8> b;
-
- size_t ni_total=indicator_idv.size();
- size_t ns_total=indicator_snp.size();
- int n_bit;
-
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1;}
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- if (calc_K==true) {gsl_matrix_set_zero (K);}
-
- gsl_vector *genotype=gsl_vector_alloc (ni_test);
-
- double geno, geno_mean;
- size_t n_miss;
- size_t c_idv=0, c_snp=0, c=0;
-
- // Start reading SNPs and doing association test.
- for (size_t t=0; t<ns_total; ++t) {
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- c_idv=0; geno_mean=0.0; n_miss=0; c=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && c==ni_total) {break;}
- if (indicator_idv[c]==0) {c++; continue;}
- c++;
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(genotype, c_idv, 2.0);
- geno_mean+=2.0;
- }
- else {
- gsl_vector_set(genotype, c_idv, 1.0);
- geno_mean+=1.0;
- }
- }
- else {
- if (b[2*j+1]==1) {
- gsl_vector_set(genotype, c_idv, 0.0);
- geno_mean+=0.0;
- }
- else {
- gsl_vector_set(genotype, c_idv, -9.0);
- n_miss++;
- }
- }
- c_idv++;
- }
- }
-
- geno_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<genotype->size; ++i) {
- geno=gsl_vector_get (genotype, i);
- if (geno==-9) {geno=geno_mean;}
-
- Xt_row[i]=Double02ToUchar(geno);
-
- geno-=geno_mean;
-
- gsl_vector_set (genotype, i, geno);
- }
- Xt.push_back(Xt_row);
-
- if (calc_K==true) {
- gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);
- }
-
- c_snp++;
- }
-
- if (calc_K==true) {
- gsl_matrix_scale (K, 1.0/(double)ns_test);
-
- for (size_t i=0; i<genotype->size; ++i) {
- for (size_t j=0; j<i; ++j) {
- geno=gsl_matrix_get (K, j, i);
- gsl_matrix_set (K, i, j, geno);
- }
- }
- }
-
- gsl_vector_free (genotype);
- infile.clear();
- infile.close();
-
- return true;
+bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
+ gsl_matrix *K, const bool calc_K, const size_t ni_test,
+ const size_t ns_test) {
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return false;
+ }
+
+ Xt.clear();
+ vector<unsigned char> Xt_row;
+ for (size_t i = 0; i < ni_test; i++) {
+ Xt_row.push_back(0);
+ }
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t ni_total = indicator_idv.size();
+ size_t ns_total = indicator_snp.size();
+ int n_bit;
+
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ if (calc_K == true) {
+ gsl_matrix_set_zero(K);
+ }
+
+ gsl_vector *genotype = gsl_vector_alloc(ni_test);
+
+ double geno, geno_mean;
+ size_t n_miss;
+ size_t c_idv = 0, c_snp = 0, c = 0;
+
+ // Start reading SNPs and doing association test.
+ for (size_t t = 0; t < ns_total; ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ c_idv = 0;
+ geno_mean = 0.0;
+ n_miss = 0;
+ c = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && c == ni_total) {
+ break;
+ }
+ if (indicator_idv[c] == 0) {
+ c++;
+ continue;
+ }
+ c++;
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(genotype, c_idv, 2.0);
+ geno_mean += 2.0;
+ } else {
+ gsl_vector_set(genotype, c_idv, 1.0);
+ geno_mean += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(genotype, c_idv, 0.0);
+ geno_mean += 0.0;
+ } else {
+ gsl_vector_set(genotype, c_idv, -9.0);
+ n_miss++;
+ }
+ }
+ c_idv++;
+ }
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < genotype->size; ++i) {
+ geno = gsl_vector_get(genotype, i);
+ if (geno == -9) {
+ geno = geno_mean;
+ }
+
+ Xt_row[i] = Double02ToUchar(geno);
+
+ geno -= geno_mean;
+
+ gsl_vector_set(genotype, i, geno);
+ }
+ Xt.push_back(Xt_row);
+
+ if (calc_K == true) {
+ gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
+ }
+
+ c_snp++;
+ }
+
+ if (calc_K == true) {
+ gsl_matrix_scale(K, 1.0 / (double)ns_test);
+
+ for (size_t i = 0; i < genotype->size; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ geno = gsl_matrix_get(K, j, i);
+ gsl_matrix_set(K, i, j, geno);
+ }
+ }
+ }
+
+ gsl_vector_free(genotype);
+ infile.clear();
+ infile.close();
+
+ return true;
}
-bool ReadFile_est (const string &file_est, const vector<size_t> &est_column,
- map<string, double> &mapRS2est) {
- mapRS2est.clear();
-
- ifstream infile (file_est.c_str(), ifstream::in);
- if (!infile) {
- cout<<"error opening estimated parameter file: "<<file_est<<endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- string rs;
- double alpha, beta, gamma, d;
-
- // Header.
- getline(infile, line);
-
- size_t n=*max_element(est_column.begin(), est_column.end());
-
- while (getline(infile, line)) {
- ch_ptr=strtok ((char *)line.c_str(), " \t");
-
- alpha=0.0; beta=0.0; gamma=1.0;
- for (size_t i=0; i<n+1; ++i) {
- if (i==est_column[0]-1) {rs=ch_ptr;}
- if (i==est_column[1]-1) {alpha=atof(ch_ptr);}
- if (i==est_column[2]-1) {beta=atof(ch_ptr);}
- if (i==est_column[3]-1) {gamma=atof(ch_ptr);}
- if (i<n) {ch_ptr=strtok (NULL, " \t");}
- }
-
- d=alpha+beta*gamma;
-
- if (mapRS2est.count(rs)==0) {
- mapRS2est[rs]=d;
- }
- else {
- cout << "the same SNP occurs more than once in estimated "<<
- "parameter file: "<<rs<<endl;
- return false;
- }
- }
-
- infile.clear();
- infile.close();
- return true;
+bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
+ map<string, double> &mapRS2est) {
+ mapRS2est.clear();
+
+ ifstream infile(file_est.c_str(), ifstream::in);
+ if (!infile) {
+ cout << "error opening estimated parameter file: " << file_est << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ string rs;
+ double alpha, beta, gamma, d;
+
+ // Header.
+ getline(infile, line);
+
+ size_t n = *max_element(est_column.begin(), est_column.end());
+
+ while (getline(infile, line)) {
+ ch_ptr = strtok((char *)line.c_str(), " \t");
+
+ alpha = 0.0;
+ beta = 0.0;
+ gamma = 1.0;
+ for (size_t i = 0; i < n + 1; ++i) {
+ if (i == est_column[0] - 1) {
+ rs = ch_ptr;
+ }
+ if (i == est_column[1] - 1) {
+ alpha = atof(ch_ptr);
+ }
+ if (i == est_column[2] - 1) {
+ beta = atof(ch_ptr);
+ }
+ if (i == est_column[3] - 1) {
+ gamma = atof(ch_ptr);
+ }
+ if (i < n) {
+ ch_ptr = strtok(NULL, " \t");
+ }
+ }
+
+ d = alpha + beta * gamma;
+
+ if (mapRS2est.count(rs) == 0) {
+ mapRS2est[rs] = d;
+ } else {
+ cout << "the same SNP occurs more than once in estimated "
+ << "parameter file: " << rs << endl;
+ return false;
+ }
+ }
+
+ infile.clear();
+ infile.close();
+ return true;
}
-bool CountFileLines (const string &file_input, size_t &n_lines) {
- igzstream infile (file_input.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error! fail to open file: "<<file_input<<endl;
- return false;
- }
+bool CountFileLines(const string &file_input, size_t &n_lines) {
+ igzstream infile(file_input.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open file: " << file_input << endl;
+ return false;
+ }
- n_lines=count(istreambuf_iterator<char>(infile), istreambuf_iterator<char>(), '\n');
- infile.seekg (0, ios::beg);
+ n_lines = count(istreambuf_iterator<char>(infile),
+ istreambuf_iterator<char>(), '\n');
+ infile.seekg(0, ios::beg);
- return true;
+ return true;
}
// Read gene expression file.
-bool ReadFile_gene (const string &file_gene, vector<double> &vec_read,
- vector<SNPINFO> &snpInfo, size_t &ng_total) {
- vec_read.clear();
- ng_total=0;
-
- igzstream infile (file_gene.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error! fail to open gene expression file: "<<file_gene<<endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
- string rs;
-
- size_t n_idv=0, t=0;
-
- // Header.
- getline(infile, line);
-
- while (getline(infile, line)) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- rs=ch_ptr;
-
- ch_ptr=strtok (NULL, " , \t");
-
- t=0;
- while (ch_ptr!=NULL) {
- if (ng_total==0) {
- vec_read.push_back(0);
- t++;
- n_idv++;
- } else {
- vec_read[t]+=atof(ch_ptr);
- t++;
- }
-
- ch_ptr=strtok (NULL, " , \t");
- }
-
- if (t!=n_idv) {
- cout<<"error! number of columns doesn't match in row: "<<
- ng_total<<endl;
- return false;
- }
-
- SNPINFO sInfo={"-9",rs,-9,-9,"-9","-9",0,-9,-9,0,0,0};
- snpInfo.push_back(sInfo);
-
- ng_total++;
- }
-
- infile.close();
- infile.clear();
-
- return true;
+bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
+ vector<SNPINFO> &snpInfo, size_t &ng_total) {
+ vec_read.clear();
+ ng_total = 0;
+
+ igzstream infile(file_gene.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open gene expression file: " << file_gene << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+ string rs;
+
+ size_t n_idv = 0, t = 0;
+
+ // Header.
+ getline(infile, line);
+
+ while (getline(infile, line)) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ rs = ch_ptr;
+
+ ch_ptr = strtok(NULL, " , \t");
+
+ t = 0;
+ while (ch_ptr != NULL) {
+ if (ng_total == 0) {
+ vec_read.push_back(0);
+ t++;
+ n_idv++;
+ } else {
+ vec_read[t] += atof(ch_ptr);
+ t++;
+ }
+
+ ch_ptr = strtok(NULL, " , \t");
+ }
+
+ if (t != n_idv) {
+ cout << "error! number of columns doesn't match in row: " << ng_total
+ << endl;
+ return false;
+ }
+
+ SNPINFO sInfo = {"-9", rs, -9, -9, "-9", "-9", 0, -9, -9, 0, 0, 0};
+ snpInfo.push_back(sInfo);
+
+ ng_total++;
+ }
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// WJA Added
// Read Oxford sample file.
-bool ReadFile_sample (const string &file_sample,
- vector<vector<int> > &indicator_pheno,
- vector<vector<double> > &pheno,
- const vector<size_t> &p_column,
- vector<int> &indicator_cvt,
- vector<vector<double> > &cvt, size_t &n_cvt) {
- indicator_pheno.clear();
- pheno.clear();
- indicator_cvt.clear();
-
- igzstream infile (file_sample.c_str(), igzstream::in);
-
- if (!infile) {
- cout<<"error! fail to open sample file: "<<file_sample<<endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- string id;
- double p,d;
-
- vector<double> pheno_row;
- vector<int> ind_pheno_row;
- int flag_na=0;
-
- size_t num_cols=0;
- size_t num_p_in_file=0;
- size_t num_cvt_in_file=0;
-
- map<size_t, size_t> mapP2c;
- for (size_t i=0; i<p_column.size(); i++) {
- mapP2c[p_column[i]]=i;
- pheno_row.push_back(-9);
- ind_pheno_row.push_back(0);
- }
-
- // Read header line1.
- if(!safeGetline(infile, line).eof()) {
- ch_ptr=strtok((char *)line.c_str(), " \t");
- if(strcmp(ch_ptr, "ID_1")!=0) {return false;}
- ch_ptr=strtok(NULL, " \t");
- if(strcmp(ch_ptr, "ID_2")!=0) {return false;}
- ch_ptr=strtok(NULL, " \t");
- if(strcmp(ch_ptr, "missing")!=0) {return false;}
- while (ch_ptr!=NULL) {
- num_cols++;
- ch_ptr=strtok (NULL, " \t");
-
- }
- num_cols--;
- }
-
- vector<map<uint32_t, size_t> > cvt_factor_levels;
-
- char col_type[num_cols];
-
- // Read header line2.
- if(!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " \t");
- if(strcmp(ch_ptr, "0")!=0) {return false;}
- ch_ptr=strtok(NULL, " \t");
- if(strcmp(ch_ptr, "0")!=0) {return false;}
- ch_ptr=strtok(NULL, " \t");
- if(strcmp(ch_ptr, "0")!=0) {return false;}
- size_t it=0;
- ch_ptr=strtok (NULL, " \t");
- if(ch_ptr!=NULL)
- while(ch_ptr!=NULL){
- col_type[it++]=ch_ptr[0];
- if(ch_ptr[0]=='D') {
- cvt_factor_levels.push_back(map<uint32_t,size_t>());
- num_cvt_in_file++;
- }
- if(ch_ptr[0]=='C') {num_cvt_in_file++;}
- if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) {
- num_p_in_file++;}
- ch_ptr=strtok(NULL, " \t");
- }
-
- }
-
- while (!safeGetline(infile, line).eof()) {
-
- ch_ptr=strtok ((char *)line.c_str(), " \t");
-
- for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");}
-
- size_t i=0;
- size_t p_i=0;
- size_t fac_cvt_i=0;
-
- while (i<num_cols) {
-
- if((col_type[i]=='P')||(col_type[i]=='B'))
- {
- if (mapP2c.count(p_i+1)!=0) {
- if (strcmp(ch_ptr, "NA")==0) {
- ind_pheno_row[mapP2c[p_i+1]]=0;
- pheno_row[mapP2c[p_i+1]]=-9;
- }
- else {
- p=atof(ch_ptr);
- ind_pheno_row[mapP2c[p_i+1]]=1;
- pheno_row[mapP2c[p_i+1]]=p;
- }
- }
- p_i++;
- }
- if(col_type[i]=='D')
- {
-
- // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL
- // IS INTEGRAL i.e for atoi error.
- if (strcmp(ch_ptr, "NA")!=0) {
- uint32_t level=atoi(ch_ptr);
- if (cvt_factor_levels[fac_cvt_i].count(level)==0) {
- cvt_factor_levels[fac_cvt_i][level]=
- cvt_factor_levels[fac_cvt_i].size();
- }
- }
- fac_cvt_i++;
- }
-
- ch_ptr=strtok (NULL, " \t");
- i++;
- }
-
- indicator_pheno.push_back(ind_pheno_row);
- pheno.push_back(pheno_row);
-
- }
-
- // Close and reopen the file.
- infile.close();
- infile.clear();
-
- if(num_cvt_in_file>0) {
- igzstream infile2 (file_sample.c_str(), igzstream::in);
-
- if (!infile2) {
- cout<<"error! fail to open sample file: "<<
- file_sample<<endl;
- return false;
- }
-
- // Skip header.
- safeGetline(infile2, line);
- safeGetline(infile2, line);
-
- // Pull in the covariates now we now the number of
- // factor levels.
- while (!safeGetline(infile2, line).eof()) {
-
- vector<double> v_d; flag_na=0;
- ch_ptr=strtok ((char *)line.c_str(), " \t");
-
- for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");}
-
- size_t i=0;
- size_t fac_cvt_i=0;
- size_t num_fac_levels;
- while (i<num_cols) {
-
- if(col_type[i]=='C') {
- if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
- else {d=atof(ch_ptr);}
-
- v_d.push_back(d);
- }
-
- if(col_type[i]=='D') {
-
- // NOTE THIS DOES NOT CHECK TO BE SURE
- // LEVEL IS INTEGRAL i.e for atoi error.
- num_fac_levels=cvt_factor_levels[fac_cvt_i].size();
- if(num_fac_levels>1) {
- if (strcmp(ch_ptr, "NA")==0) {
- flag_na=1;
- for(size_t it=0;it<num_fac_levels-1; it++) {
- v_d.push_back(-9);
- }
- }
- else {
- uint32_t level=atoi(ch_ptr);
- for(size_t it=0;it<num_fac_levels-1;it++) {
- cvt_factor_levels[fac_cvt_i][level]==it+1 ?
- v_d.push_back(1.0) :
- v_d.push_back(0.0);
- }
- }
- }
- fac_cvt_i++;
- }
-
- ch_ptr=strtok (NULL, " \t");
- i++;
- }
-
- if (flag_na==0) {
- indicator_cvt.push_back(1);
- } else {
- indicator_cvt.push_back(0);
- }
- cvt.push_back(v_d);
-
-
- }
-
- if (indicator_cvt.empty()) {n_cvt=0;}
- else {
- flag_na=0;
- for (vector<int>::size_type i=0;
- i<indicator_cvt.size();
- ++i) {
- if (indicator_cvt[i]==0) {continue;}
-
- if (flag_na==0) {
- flag_na=1;
- n_cvt=cvt[i].size();
- }
- if (flag_na!=0 && n_cvt!=cvt[i].size()) {
- cout<<"error! number of covariates in row "<<
- i<<" do not match other rows."<<endl;
- return false;
- }
- }
- }
-
- infile2.close();
- infile2.clear();
- }
- return true;
+bool ReadFile_sample(const string &file_sample,
+ vector<vector<int>> &indicator_pheno,
+ vector<vector<double>> &pheno,
+ const vector<size_t> &p_column, vector<int> &indicator_cvt,
+ vector<vector<double>> &cvt, size_t &n_cvt) {
+ indicator_pheno.clear();
+ pheno.clear();
+ indicator_cvt.clear();
+
+ igzstream infile(file_sample.c_str(), igzstream::in);
+
+ if (!infile) {
+ cout << "error! fail to open sample file: " << file_sample << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ string id;
+ double p, d;
+
+ vector<double> pheno_row;
+ vector<int> ind_pheno_row;
+ int flag_na = 0;
+
+ size_t num_cols = 0;
+ size_t num_p_in_file = 0;
+ size_t num_cvt_in_file = 0;
+
+ map<size_t, size_t> mapP2c;
+ for (size_t i = 0; i < p_column.size(); i++) {
+ mapP2c[p_column[i]] = i;
+ pheno_row.push_back(-9);
+ ind_pheno_row.push_back(0);
+ }
+
+ // Read header line1.
+ if (!safeGetline(infile, line).eof()) {
+ ch_ptr = strtok((char *)line.c_str(), " \t");
+ if (strcmp(ch_ptr, "ID_1") != 0) {
+ return false;
+ }
+ ch_ptr = strtok(NULL, " \t");
+ if (strcmp(ch_ptr, "ID_2") != 0) {
+ return false;
+ }
+ ch_ptr = strtok(NULL, " \t");
+ if (strcmp(ch_ptr, "missing") != 0) {
+ return false;
+ }
+ while (ch_ptr != NULL) {
+ num_cols++;
+ ch_ptr = strtok(NULL, " \t");
+ }
+ num_cols--;
+ }
+
+ vector<map<uint32_t, size_t>> cvt_factor_levels;
+
+ char col_type[num_cols];
+
+ // Read header line2.
+ if (!safeGetline(infile, line).eof()) {
+ ch_ptr = strtok((char *)line.c_str(), " \t");
+ if (strcmp(ch_ptr, "0") != 0) {
+ return false;
+ }
+ ch_ptr = strtok(NULL, " \t");
+ if (strcmp(ch_ptr, "0") != 0) {
+ return false;
+ }
+ ch_ptr = strtok(NULL, " \t");
+ if (strcmp(ch_ptr, "0") != 0) {
+ return false;
+ }
+ size_t it = 0;
+ ch_ptr = strtok(NULL, " \t");
+ if (ch_ptr != NULL)
+ while (ch_ptr != NULL) {
+ col_type[it++] = ch_ptr[0];
+ if (ch_ptr[0] == 'D') {
+ cvt_factor_levels.push_back(map<uint32_t, size_t>());
+ num_cvt_in_file++;
+ }
+ if (ch_ptr[0] == 'C') {
+ num_cvt_in_file++;
+ }
+ if ((ch_ptr[0] == 'P') || (ch_ptr[0] == 'B')) {
+ num_p_in_file++;
+ }
+ ch_ptr = strtok(NULL, " \t");
+ }
+ }
+
+ while (!safeGetline(infile, line).eof()) {
+
+ ch_ptr = strtok((char *)line.c_str(), " \t");
+
+ for (int it = 0; it < 3; it++) {
+ ch_ptr = strtok(NULL, " \t");
+ }
+
+ size_t i = 0;
+ size_t p_i = 0;
+ size_t fac_cvt_i = 0;
+
+ while (i < num_cols) {
+
+ if ((col_type[i] == 'P') || (col_type[i] == 'B')) {
+ if (mapP2c.count(p_i + 1) != 0) {
+ if (strcmp(ch_ptr, "NA") == 0) {
+ ind_pheno_row[mapP2c[p_i + 1]] = 0;
+ pheno_row[mapP2c[p_i + 1]] = -9;
+ } else {
+ p = atof(ch_ptr);
+ ind_pheno_row[mapP2c[p_i + 1]] = 1;
+ pheno_row[mapP2c[p_i + 1]] = p;
+ }
+ }
+ p_i++;
+ }
+ if (col_type[i] == 'D') {
+
+ // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL
+ // IS INTEGRAL i.e for atoi error.
+ if (strcmp(ch_ptr, "NA") != 0) {
+ uint32_t level = atoi(ch_ptr);
+ if (cvt_factor_levels[fac_cvt_i].count(level) == 0) {
+ cvt_factor_levels[fac_cvt_i][level] =
+ cvt_factor_levels[fac_cvt_i].size();
+ }
+ }
+ fac_cvt_i++;
+ }
+
+ ch_ptr = strtok(NULL, " \t");
+ i++;
+ }
+
+ indicator_pheno.push_back(ind_pheno_row);
+ pheno.push_back(pheno_row);
+ }
+
+ // Close and reopen the file.
+ infile.close();
+ infile.clear();
+
+ if (num_cvt_in_file > 0) {
+ igzstream infile2(file_sample.c_str(), igzstream::in);
+
+ if (!infile2) {
+ cout << "error! fail to open sample file: " << file_sample << endl;
+ return false;
+ }
+
+ // Skip header.
+ safeGetline(infile2, line);
+ safeGetline(infile2, line);
+
+ // Pull in the covariates now we now the number of
+ // factor levels.
+ while (!safeGetline(infile2, line).eof()) {
+
+ vector<double> v_d;
+ flag_na = 0;
+ ch_ptr = strtok((char *)line.c_str(), " \t");
+
+ for (int it = 0; it < 3; it++) {
+ ch_ptr = strtok(NULL, " \t");
+ }
+
+ size_t i = 0;
+ size_t fac_cvt_i = 0;
+ size_t num_fac_levels;
+ while (i < num_cols) {
+
+ if (col_type[i] == 'C') {
+ if (strcmp(ch_ptr, "NA") == 0) {
+ flag_na = 1;
+ d = -9;
+ } else {
+ d = atof(ch_ptr);
+ }
+
+ v_d.push_back(d);
+ }
+
+ if (col_type[i] == 'D') {
+
+ // NOTE THIS DOES NOT CHECK TO BE SURE
+ // LEVEL IS INTEGRAL i.e for atoi error.
+ num_fac_levels = cvt_factor_levels[fac_cvt_i].size();
+ if (num_fac_levels > 1) {
+ if (strcmp(ch_ptr, "NA") == 0) {
+ flag_na = 1;
+ for (size_t it = 0; it < num_fac_levels - 1; it++) {
+ v_d.push_back(-9);
+ }
+ } else {
+ uint32_t level = atoi(ch_ptr);
+ for (size_t it = 0; it < num_fac_levels - 1; it++) {
+ cvt_factor_levels[fac_cvt_i][level] == it + 1
+ ? v_d.push_back(1.0)
+ : v_d.push_back(0.0);
+ }
+ }
+ }
+ fac_cvt_i++;
+ }
+
+ ch_ptr = strtok(NULL, " \t");
+ i++;
+ }
+
+ if (flag_na == 0) {
+ indicator_cvt.push_back(1);
+ } else {
+ indicator_cvt.push_back(0);
+ }
+ cvt.push_back(v_d);
+ }
+
+ if (indicator_cvt.empty()) {
+ n_cvt = 0;
+ } else {
+ flag_na = 0;
+ for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) {
+ if (indicator_cvt[i] == 0) {
+ continue;
+ }
+
+ if (flag_na == 0) {
+ flag_na = 1;
+ n_cvt = cvt[i].size();
+ }
+ if (flag_na != 0 && n_cvt != cvt[i].size()) {
+ cout << "error! number of covariates in row " << i
+ << " do not match other rows." << endl;
+ return false;
+ }
+ }
+ }
+
+ infile2.close();
+ infile2.clear();
+ }
+ return true;
}
// WJA Added.
// Read bgen file, the first time.
bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
- const double &maf_level, const double &miss_level,
- const double &hwe_level, const double &r2_level,
- size_t &ns_test) {
-
- indicator_snp.clear();
-
- ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bgen file:"<<file_bgen<<endl;
- return false;
- }
-
- gsl_vector *genotype=gsl_vector_alloc (W->size1);
- gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
- infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
- bgen_snp_block_offset-=4;
- infile.ignore(4+bgen_header_length-20);
- bgen_snp_block_offset-=4+bgen_header_length-20;
- infile.read(reinterpret_cast<char*>(&bgen_flags),4);
- bgen_snp_block_offset-=4;
- bool CompressedSNPBlocks=bgen_flags&0x1;
- bool LongIds=bgen_flags&0x4;
-
- if(!LongIds) {return false;}
-
- infile.ignore(bgen_snp_block_offset);
-
- ns_test=0;
-
- size_t ns_total=static_cast<size_t>(bgen_nsnps);
-
- snpInfo.clear();
- string rs;
- long int b_pos;
- string chr;
- string major;
- string minor;
- string id;
-
- double v_x, v_w;
- int c_idv=0;
-
- double maf, geno, geno_old;
- size_t n_miss;
- size_t n_0, n_1, n_2;
- int flag_poly;
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB;
- double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
- // Total number of samples in phenotype file.
- size_t ni_total=indicator_idv.size();
-
- // Number of samples to use in test.
- size_t ni_test=0;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
-
- for (size_t i=0; i<ni_total; ++i) {
- ni_test+=indicator_idv[i];
- }
-
- for (size_t t=0; t<ns_total; ++t) {
-
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char*>(&bgen_N),4);
- infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char*>(&bgen_LR),2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char*>(&bgen_LC),2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
- infile.read(reinterpret_cast<char*>(&bgen_LA),4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
-
- infile.read(reinterpret_cast<char*>(&bgen_LB),4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- // Should we switch according to MAF?
- minor=bgen_B_allele;
- major=bgen_A_allele;
- b_pos=static_cast<long int>(bgen_SNP_pos);
-
- uint16_t unzipped_data[3*bgen_N];
-
- if (setSnps.size()!=0 && setSnps.count(rs)==0) {
- SNPINFO sInfo={"-9", rs, -9, -9, minor, major,
- static_cast<size_t>(-9), -9, (long int) -9};
-
- snpInfo.push_back(sInfo);
- indicator_snp.push_back(0);
- if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- else
- bgen_P=6*bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if(CompressedSNPBlocks)
- {
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size=6*bgen_N;
-
- infile.read(reinterpret_cast<char*>(zipped_data),
- bgen_P);
- int result=
- uncompress(reinterpret_cast<Bytef*>(unzipped_data),
- reinterpret_cast<uLongf*>(&unzipped_data_size),
- reinterpret_cast<Bytef*>(zipped_data),
- static_cast<uLong> (bgen_P));
- assert(result == Z_OK);
-
- }
- else
- {
- bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
-
- }
-
- maf=0; n_miss=0; flag_poly=0; geno_old=-9;
- n_0=0; n_1=0; n_2=0;
- c_idv=0;
- gsl_vector_set_zero (genotype_miss);
- for (size_t i=0; i<bgen_N; ++i) {
-
- // CHECK this set correctly!
- if (indicator_idv[i]==0) {continue;}
-
- bgen_geno_prob_AA=
- static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB=
- static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB=
- static_cast<double>(unzipped_data[i*3+2])/32768.0;
- bgen_geno_prob_non_miss=
- bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
-
- //CHECK 0.1 OK.
- if (bgen_geno_prob_non_miss<0.9) {
- gsl_vector_set (genotype_miss, c_idv, 1);
- n_miss++;
- c_idv++;
- continue;
- }
-
- bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
- bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
- bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
- geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
- if (geno>=0 && geno<=0.5) {n_0++;}
- if (geno>0.5 && geno<1.5) {n_1++;}
- if (geno>=1.5 && geno<=2.0) {n_2++;}
-
- gsl_vector_set (genotype, c_idv, geno);
-
- // CHECK WHAT THIS DOES.
- if (flag_poly==0) {geno_old=geno; flag_poly=2;}
- if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
-
- maf+=geno;
-
- c_idv++;
- }
-
- maf/=2.0*static_cast<double>(ni_test-n_miss);
-
- SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss,
- (double)n_miss/(double)ni_test, maf};
- snpInfo.push_back(sInfo);
-
- if ( (double)n_miss/(double)ni_test > miss_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if (flag_poly!=1) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if (hwe_level!=0 && maf_level!=-1) {
- if (CalcHWE(n_0, n_2, n_1)<hwe_level) {
- indicator_snp.push_back(0);
- continue;
- }
- }
-
- // Filter SNP if it is correlated with W
- // unless W has only one column, of 1s.
- for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {
- geno=maf*2.0;
- gsl_vector_set (genotype, i, geno);
- }
- }
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
- gsl_blas_ddot (genotype, genotype, &v_x);
- gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-
- if (W->size2!=1 && v_w/v_x >= r2_level) {
- indicator_snp.push_back(0); continue;}
-
- indicator_snp.push_back(1);
- ns_test++;
-
- }
-
- return true;
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+ const double &maf_level, const double &miss_level,
+ const double &hwe_level, const double &r2_level,
+ size_t &ns_test) {
+
+ indicator_snp.clear();
+
+ ifstream infile(file_bgen.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bgen file:" << file_bgen << endl;
+ return false;
+ }
+
+ gsl_vector *genotype = gsl_vector_alloc(W->size1);
+ gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ // Read in header.
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+ bgen_snp_block_offset -= 4;
+ infile.ignore(4 + bgen_header_length - 20);
+ bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+ infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+ bgen_snp_block_offset -= 4;
+ bool CompressedSNPBlocks = bgen_flags & 0x1;
+ bool LongIds = bgen_flags & 0x4;
+
+ if (!LongIds) {
+ return false;
+ }
+
+ infile.ignore(bgen_snp_block_offset);
+
+ ns_test = 0;
+
+ size_t ns_total = static_cast<size_t>(bgen_nsnps);
+
+ snpInfo.clear();
+ string rs;
+ long int b_pos;
+ string chr;
+ string major;
+ string minor;
+ string id;
+
+ double v_x, v_w;
+ int c_idv = 0;
+
+ double maf, geno, geno_old;
+ size_t n_miss;
+ size_t n_0, n_1, n_2;
+ int flag_poly;
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB;
+ double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+ // Total number of samples in phenotype file.
+ size_t ni_total = indicator_idv.size();
+
+ // Number of samples to use in test.
+ size_t ni_test = 0;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+
+ for (size_t i = 0; i < ni_total; ++i) {
+ ni_test += indicator_idv[i];
+ }
+
+ for (size_t t = 0; t < ns_total; ++t) {
+
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+ // Should we switch according to MAF?
+ minor = bgen_B_allele;
+ major = bgen_A_allele;
+ b_pos = static_cast<long int>(bgen_SNP_pos);
+
+ uint16_t unzipped_data[3 * bgen_N];
+
+ if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
+ SNPINFO sInfo = {
+ "-9", rs, -9, -9, minor, major, static_cast<size_t>(-9),
+ -9, (long int)-9};
+
+ snpInfo.push_back(sInfo);
+ indicator_snp.push_back(0);
+ if (CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ else
+ bgen_P = 6 * bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+ if (CompressedSNPBlocks) {
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size = 6 * bgen_N;
+
+ infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+ int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+ reinterpret_cast<uLongf *>(&unzipped_data_size),
+ reinterpret_cast<Bytef *>(zipped_data),
+ static_cast<uLong>(bgen_P));
+ assert(result == Z_OK);
+
+ } else {
+ bgen_P = 6 * bgen_N;
+ infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+ }
+
+ maf = 0;
+ n_miss = 0;
+ flag_poly = 0;
+ geno_old = -9;
+ n_0 = 0;
+ n_1 = 0;
+ n_2 = 0;
+ c_idv = 0;
+ gsl_vector_set_zero(genotype_miss);
+ for (size_t i = 0; i < bgen_N; ++i) {
+
+ // CHECK this set correctly!
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+ bgen_geno_prob_AB =
+ static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+ bgen_geno_prob_BB =
+ static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+ bgen_geno_prob_non_miss =
+ bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+
+ // CHECK 0.1 OK.
+ if (bgen_geno_prob_non_miss < 0.9) {
+ gsl_vector_set(genotype_miss, c_idv, 1);
+ n_miss++;
+ c_idv++;
+ continue;
+ }
+
+ bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+ geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+ if (geno >= 0 && geno <= 0.5) {
+ n_0++;
+ }
+ if (geno > 0.5 && geno < 1.5) {
+ n_1++;
+ }
+ if (geno >= 1.5 && geno <= 2.0) {
+ n_2++;
+ }
+
+ gsl_vector_set(genotype, c_idv, geno);
+
+ // CHECK WHAT THIS DOES.
+ if (flag_poly == 0) {
+ geno_old = geno;
+ flag_poly = 2;
+ }
+ if (flag_poly == 2 && geno != geno_old) {
+ flag_poly = 1;
+ }
+
+ maf += geno;
+
+ c_idv++;
+ }
+
+ maf /= 2.0 * static_cast<double>(ni_test - n_miss);
+
+ SNPINFO sInfo = {chr, rs, -9, b_pos,
+ minor, major, n_miss, (double)n_miss / (double)ni_test,
+ maf};
+ snpInfo.push_back(sInfo);
+
+ if ((double)n_miss / (double)ni_test > miss_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if (flag_poly != 1) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ if (hwe_level != 0 && maf_level != -1) {
+ if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+ }
+
+ // Filter SNP if it is correlated with W
+ // unless W has only one column, of 1s.
+ for (size_t i = 0; i < genotype->size; ++i) {
+ if (gsl_vector_get(genotype_miss, i) == 1) {
+ geno = maf * 2.0;
+ gsl_vector_set(genotype, i, geno);
+ }
+ }
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+ gsl_blas_ddot(genotype, genotype, &v_x);
+ gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
+
+ if (W->size2 != 1 && v_w / v_x >= r2_level) {
+ indicator_snp.push_back(0);
+ continue;
+ }
+
+ indicator_snp.push_back(1);
+ ns_test++;
+ }
+
+ return true;
}
// Read oxford genotype file and calculate kinship matrix.
-bool bgenKin (const string &file_oxford, vector<int> &indicator_snp,
- const int k_mode, const int display_pace,
- gsl_matrix *matrix_kin) {
- string file_bgen=file_oxford;
- ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bgen file:"<<file_bgen<<endl;
- return false;
- }
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
- infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
- bgen_snp_block_offset-=4;
- infile.ignore(4+bgen_header_length-20);
- bgen_snp_block_offset-=4+bgen_header_length-20;
- infile.read(reinterpret_cast<char*>(&bgen_flags),4);
- bgen_snp_block_offset-=4;
- bool CompressedSNPBlocks=bgen_flags&0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB;
- double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- double genotype;
-
- size_t n_miss;
- double d, geno_mean, geno_var;
-
- size_t ni_total=matrix_kin->size1;
- gsl_vector *geno=gsl_vector_alloc (ni_total);
- gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
-
- size_t ns_test=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
-
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
- ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
- }
-
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char*>(&bgen_N),4);
- infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char*>(&bgen_LR),2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char*>(&bgen_LC),2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
- infile.read(reinterpret_cast<char*>(&bgen_LA),4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
-
- infile.read(reinterpret_cast<char*>(&bgen_LB),4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3*bgen_N];
-
- if (indicator_snp[t]==0) {
- if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- else
- bgen_P=6*bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if(CompressedSNPBlocks)
- {
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size=6*bgen_N;
-
- infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
-
- int result=
- uncompress(reinterpret_cast<Bytef*>(unzipped_data),
- reinterpret_cast<uLongf*>(&unzipped_data_size),
- reinterpret_cast<Bytef*>(zipped_data),
- static_cast<uLong> (bgen_P));
- assert(result == Z_OK);
-
- }
- else
- {
-
- bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
- }
-
- geno_mean=0.0; n_miss=0; geno_var=0.0;
- gsl_vector_set_all(geno_miss, 0);
-
- for (size_t i=0; i<bgen_N; ++i) {
+bool bgenKin(const string &file_oxford, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace, gsl_matrix *matrix_kin) {
+ string file_bgen = file_oxford;
+ ifstream infile(file_bgen.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bgen file:" << file_bgen << endl;
+ return false;
+ }
+
+ // Read in header.
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+ bgen_snp_block_offset -= 4;
+ infile.ignore(4 + bgen_header_length - 20);
+ bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+ infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+ bgen_snp_block_offset -= 4;
+ bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB;
+ double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ double genotype;
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_total = matrix_kin->size1;
+ gsl_vector *geno = gsl_vector_alloc(ni_total);
+ gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
+
+ size_t ns_test = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+ uint16_t unzipped_data[3 * bgen_N];
+
+ if (indicator_snp[t] == 0) {
+ if (CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ else
+ bgen_P = 6 * bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+ if (CompressedSNPBlocks) {
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ uint8_t zipped_data[bgen_P];
- bgen_geno_prob_AA=
- static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB=
- static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB=
- static_cast<double>(unzipped_data[i*3+2])/32768.0;
- // WJA
- bgen_geno_prob_non_miss=bgen_geno_prob_AA +
- bgen_geno_prob_AB+bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss<0.9) {
- gsl_vector_set(geno_miss, i, 0.0);
- n_miss++;
- }
- else {
+ unzipped_data_size = 6 * bgen_N;
- bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
- bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
- bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+ infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
- genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+ int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+ reinterpret_cast<uLongf *>(&unzipped_data_size),
+ reinterpret_cast<Bytef *>(zipped_data),
+ static_cast<uLong>(bgen_P));
+ assert(result == Z_OK);
- gsl_vector_set(geno, i, genotype);
- gsl_vector_set(geno_miss, i, 1.0);
- geno_mean+=genotype;
- geno_var+=genotype*genotype;
- }
+ } else {
- }
+ bgen_P = 6 * bgen_N;
+ infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+ }
+ geno_mean = 0.0;
+ n_miss = 0;
+ geno_var = 0.0;
+ gsl_vector_set_all(geno_miss, 0);
+
+ for (size_t i = 0; i < bgen_N; ++i) {
+
+ bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+ bgen_geno_prob_AB =
+ static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+ bgen_geno_prob_BB =
+ static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+ // WJA
+ bgen_geno_prob_non_miss =
+ bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss < 0.9) {
+ gsl_vector_set(geno_miss, i, 0.0);
+ n_miss++;
+ } else {
- geno_mean/=(double)(ni_total-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_total;
- geno_var-=geno_mean*geno_mean;
+ bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
- for (size_t i=0; i<ni_total; ++i) {
- if (gsl_vector_get (geno_miss, i)==0) {
- gsl_vector_set(geno, i, geno_mean);
- }
- }
+ genotype = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
- gsl_vector_add_constant (geno, -1.0*geno_mean);
+ gsl_vector_set(geno, i, genotype);
+ gsl_vector_set(geno_miss, i, 1.0);
+ geno_mean += genotype;
+ geno_var += genotype * genotype;
+ }
+ }
+
+ geno_mean /= (double)(ni_total - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_total;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_total; ++i) {
+ if (gsl_vector_get(geno_miss, i) == 0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
- if (geno_var!=0) {
- if (k_mode==1) {
- gsl_blas_dsyr(CblasUpper,1.0,geno,matrix_kin);
- } else if (k_mode==2) {
- gsl_blas_dsyr(CblasUpper,1.0/geno_var,geno,matrix_kin);
- }
- else {
- cout<<"Unknown kinship mode."<<endl;
- }
- }
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
- ns_test++;
+ if (geno_var != 0) {
+ if (k_mode == 1) {
+ gsl_blas_dsyr(CblasUpper, 1.0, geno, matrix_kin);
+ } else if (k_mode == 2) {
+ gsl_blas_dsyr(CblasUpper, 1.0 / geno_var, geno, matrix_kin);
+ } else {
+ cout << "Unknown kinship mode." << endl;
+ }
}
- cout<<endl;
- gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
+ ns_test++;
+ }
+ cout << endl;
- for (size_t i=0; i<ni_total; ++i) {
- for (size_t j=0; j<i; ++j) {
- d=gsl_matrix_get (matrix_kin, j, i);
- gsl_matrix_set (matrix_kin, i, j, d);
- }
- }
+ gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
- gsl_vector_free (geno);
- gsl_vector_free (geno_miss);
+ for (size_t i = 0; i < ni_total; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ d = gsl_matrix_get(matrix_kin, j, i);
+ gsl_matrix_set(matrix_kin, i, j, d);
+ }
+ }
- infile.close();
- infile.clear();
+ gsl_vector_free(geno);
+ gsl_vector_free(geno_miss);
- return true;
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read header to determine which column contains which item.
-bool ReadHeader_io (const string &line, HEADER &header)
-{
- string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID",
- "rsid","RSID","MarkerName"};
- set<string> rs_set(rs_ptr, rs_ptr+11);
- string chr_ptr[]={"chr","CHR"};
- set<string> chr_set(chr_ptr, chr_ptr+2);
- string pos_ptr[]={"ps","PS","pos","POS","base_position",
- "BASE_POSITION", "bp", "BP"};
- set<string> pos_set(pos_ptr, pos_ptr+8);
- string cm_ptr[]={"cm","CM"};
- set<string> cm_set(cm_ptr, cm_ptr+2);
- string a1_ptr[]={"a1","A1","allele1","ALLELE1","Allele1","INC_ALLELE"};
- set<string> a1_set(a1_ptr, a1_ptr+5);
- string a0_ptr[]={"a0","A0","allele0","ALLELE0","Allele0","a2","A2",
- "allele2","ALLELE2","Allele2","DEC_ALLELE"};
- set<string> a0_set(a0_ptr, a0_ptr+10);
-
- string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
- set<string> z_set(z_ptr, z_ptr+6);
- string beta_ptr[]={"beta","BETA","b","B"};
- set<string> beta_set(beta_ptr, beta_ptr+4);
- string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"};
- set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4);
- string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"};
- set<string> chisq_set(chisq_ptr, chisq_ptr+4);
- string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"};
- set<string> p_set(p_ptr, p_ptr+6);
-
- string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"};
- set<string> n_set(n_ptr, n_ptr+6);
- string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"};
- set<string> nmis_set(nmis_ptr, nmis_ptr+6);
- string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"};
- set<string> nobs_set(nobs_ptr, nobs_ptr+4);
- string ncase_ptr[]={"ncase","NCASE","n_case","N_CASE"};
- set<string> ncase_set(ncase_ptr, ncase_ptr+4);
- string ncontrol_ptr[]={"ncontrol","NCONTROL","n_control","N_CONTROL"};
- set<string> ncontrol_set(ncontrol_ptr, ncontrol_ptr+4);
-
- string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq",
- "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY",
- "Freq.Allele1.HapMapCEU","FreqAllele1HapMapCEU",
- "Freq1.Hapmap"};
- set<string> af_set(af_ptr, af_ptr+13);
- string var_ptr[]={"var","VAR"};
- set<string> var_set(var_ptr, var_ptr+2);
-
- string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"};
- set<string> ws_set(ws_ptr, ws_ptr+4);
- string cor_ptr[]={"cor","COR","r","R"};
- set<string> cor_set(cor_ptr, cor_ptr+4);
-
- header.rs_col=0; header.chr_col=0; header.pos_col=0;
- header.cm_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0;
- header.beta_col=0; header.sebeta_col=0; header.chisq_col=0;
- header.p_col=0; header.n_col=0; header.nmis_col=0;
- header.nobs_col=0; header.ncase_col=0; header.ncontrol_col=0;
- header.af_col=0; header.var_col=0; header.ws_col=0;
- header.cor_col=0; header.coln=0;
+bool ReadHeader_io(const string &line, HEADER &header) {
+ string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps", "SNPS",
+ "snpid", "SNPID", "rsid", "RSID", "MarkerName"};
+ set<string> rs_set(rs_ptr, rs_ptr + 11);
+ string chr_ptr[] = {"chr", "CHR"};
+ set<string> chr_set(chr_ptr, chr_ptr + 2);
+ string pos_ptr[] = {
+ "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"};
+ set<string> pos_set(pos_ptr, pos_ptr + 8);
+ string cm_ptr[] = {"cm", "CM"};
+ set<string> cm_set(cm_ptr, cm_ptr + 2);
+ string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1", "Allele1", "INC_ALLELE"};
+ set<string> a1_set(a1_ptr, a1_ptr + 5);
+ string a0_ptr[] = {"a0", "A0", "allele0", "ALLELE0", "Allele0", "a2",
+ "A2", "allele2", "ALLELE2", "Allele2", "DEC_ALLELE"};
+ set<string> a0_set(a0_ptr, a0_ptr + 10);
+
+ string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"};
+ set<string> z_set(z_ptr, z_ptr + 6);
+ string beta_ptr[] = {"beta", "BETA", "b", "B"};
+ set<string> beta_set(beta_ptr, beta_ptr + 4);
+ string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"};
+ set<string> sebeta_set(sebeta_ptr, sebeta_ptr + 4);
+ string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"};
+ set<string> chisq_set(chisq_ptr, chisq_ptr + 4);
+ string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"};
+ set<string> p_set(p_ptr, p_ptr + 6);
+
+ string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"};
+ set<string> n_set(n_ptr, n_ptr + 6);
+ string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"};
+ set<string> nmis_set(nmis_ptr, nmis_ptr + 6);
+ string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"};
+ set<string> nobs_set(nobs_ptr, nobs_ptr + 4);
+ string ncase_ptr[] = {"ncase", "NCASE", "n_case", "N_CASE"};
+ set<string> ncase_set(ncase_ptr, ncase_ptr + 4);
+ string ncontrol_ptr[] = {"ncontrol", "NCONTROL", "n_control", "N_CONTROL"};
+ set<string> ncontrol_set(ncontrol_ptr, ncontrol_ptr + 4);
+
+ string af_ptr[] = {"af",
+ "AF",
+ "maf",
+ "MAF",
+ "f",
+ "F",
+ "allele_freq",
+ "ALLELE_FREQ",
+ "allele_frequency",
+ "ALLELE_FREQUENCY",
+ "Freq.Allele1.HapMapCEU",
+ "FreqAllele1HapMapCEU",
+ "Freq1.Hapmap"};
+ set<string> af_set(af_ptr, af_ptr + 13);
+ string var_ptr[] = {"var", "VAR"};
+ set<string> var_set(var_ptr, var_ptr + 2);
+
+ string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"};
+ set<string> ws_set(ws_ptr, ws_ptr + 4);
+ string cor_ptr[] = {"cor", "COR", "r", "R"};
+ set<string> cor_set(cor_ptr, cor_ptr + 4);
+
+ header.rs_col = 0;
+ header.chr_col = 0;
+ header.pos_col = 0;
+ header.cm_col = 0;
+ header.a1_col = 0;
+ header.a0_col = 0;
+ header.z_col = 0;
+ header.beta_col = 0;
+ header.sebeta_col = 0;
+ header.chisq_col = 0;
+ header.p_col = 0;
+ header.n_col = 0;
+ header.nmis_col = 0;
+ header.nobs_col = 0;
+ header.ncase_col = 0;
+ header.ncontrol_col = 0;
+ header.af_col = 0;
+ header.var_col = 0;
+ header.ws_col = 0;
+ header.cor_col = 0;
+ header.coln = 0;
char *ch_ptr;
string type;
- size_t n_error=0;
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- while (ch_ptr!=NULL) {
- type=ch_ptr;
- if (rs_set.count(type)!=0) {
- if (header.rs_col==0) {
- header.rs_col=header.coln+1;
+ size_t n_error = 0;
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ while (ch_ptr != NULL) {
+ type = ch_ptr;
+ if (rs_set.count(type) != 0) {
+ if (header.rs_col == 0) {
+ header.rs_col = header.coln + 1;
} else {
- cout<<"error! more than two rs columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two rs columns in the file." << endl;
+ n_error++;
}
- } else if (chr_set.count(type)!=0) {
- if (header.chr_col==0) {
- header.chr_col=header.coln+1;
+ } else if (chr_set.count(type) != 0) {
+ if (header.chr_col == 0) {
+ header.chr_col = header.coln + 1;
} else {
- cout<<"error! more than two chr columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two chr columns in the file." << endl;
+ n_error++;
}
- } else if (pos_set.count(type)!=0) {
- if (header.pos_col==0) {
- header.pos_col=header.coln+1;
+ } else if (pos_set.count(type) != 0) {
+ if (header.pos_col == 0) {
+ header.pos_col = header.coln + 1;
} else {
- cout<<"error! more than two pos columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two pos columns in the file." << endl;
+ n_error++;
}
- } else if (cm_set.count(type)!=0) {
- if (header.cm_col==0) {
- header.cm_col=header.coln+1;
+ } else if (cm_set.count(type) != 0) {
+ if (header.cm_col == 0) {
+ header.cm_col = header.coln + 1;
} else {
- cout<<"error! more than two cm columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two cm columns in the file." << endl;
+ n_error++;
}
- } else if (a1_set.count(type)!=0) {
- if (header.a1_col==0) {
- header.a1_col=header.coln+1;
+ } else if (a1_set.count(type) != 0) {
+ if (header.a1_col == 0) {
+ header.a1_col = header.coln + 1;
} else {
- cout<<"error! more than two allele1 columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two allele1 columns in the file." << endl;
+ n_error++;
}
- } else if (a0_set.count(type)!=0) {
- if (header.a0_col==0) {
- header.a0_col=header.coln+1;
+ } else if (a0_set.count(type) != 0) {
+ if (header.a0_col == 0) {
+ header.a0_col = header.coln + 1;
} else {
- cout<<"error! more than two allele0 columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two allele0 columns in the file." << endl;
+ n_error++;
}
- } else if (z_set.count(type)!=0) {
- if (header.z_col==0) {
- header.z_col=header.coln+1;
+ } else if (z_set.count(type) != 0) {
+ if (header.z_col == 0) {
+ header.z_col = header.coln + 1;
} else {
- cout<<"error! more than two z columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two z columns in the file." << endl;
+ n_error++;
}
- } else if (beta_set.count(type)!=0) {
- if (header.beta_col==0) {
- header.beta_col=header.coln+1;
+ } else if (beta_set.count(type) != 0) {
+ if (header.beta_col == 0) {
+ header.beta_col = header.coln + 1;
} else {
- cout<<"error! more than two beta columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two beta columns in the file." << endl;
+ n_error++;
}
- } else if (sebeta_set.count(type)!=0) {
- if (header.sebeta_col==0) {
- header.sebeta_col=header.coln+1;
+ } else if (sebeta_set.count(type) != 0) {
+ if (header.sebeta_col == 0) {
+ header.sebeta_col = header.coln + 1;
} else {
- cout<<"error! more than two se_beta columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two se_beta columns in the file." << endl;
+ n_error++;
}
- } else if (chisq_set.count(type)!=0) {
- if (header.chisq_col==0) {
- header.chisq_col=header.coln+1;
+ } else if (chisq_set.count(type) != 0) {
+ if (header.chisq_col == 0) {
+ header.chisq_col = header.coln + 1;
} else {
- cout<<"error! more than two z columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two z columns in the file." << endl;
+ n_error++;
}
- } else if (p_set.count(type)!=0) {
- if (header.p_col==0) {
- header.p_col=header.coln+1;
+ } else if (p_set.count(type) != 0) {
+ if (header.p_col == 0) {
+ header.p_col = header.coln + 1;
} else {
- cout<<"error! more than two p columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two p columns in the file." << endl;
+ n_error++;
}
- } else if (n_set.count(type)!=0) {
- if (header.n_col==0) {
- header.n_col=header.coln+1;
+ } else if (n_set.count(type) != 0) {
+ if (header.n_col == 0) {
+ header.n_col = header.coln + 1;
} else {
- cout<<"error! more than two n_total columns in the file."<<endl;
- n_error++;}
- } else if (nmis_set.count(type)!=0) {
- if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else {
- cout<<"error! more than two n_mis columns in the file."<<endl;
- n_error++;
- }
- } else if (nobs_set.count(type)!=0) {
- if (header.nobs_col==0) {
- header.nobs_col=header.coln+1;
+ cout << "error! more than two n_total columns in the file." << endl;
+ n_error++;
+ }
+ } else if (nmis_set.count(type) != 0) {
+ if (header.nmis_col == 0) {
+ header.nmis_col = header.coln + 1;
+ } else {
+ cout << "error! more than two n_mis columns in the file." << endl;
+ n_error++;
+ }
+ } else if (nobs_set.count(type) != 0) {
+ if (header.nobs_col == 0) {
+ header.nobs_col = header.coln + 1;
} else {
- cout<<"error! more than two n_obs columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two n_obs columns in the file." << endl;
+ n_error++;
}
- } else if (ncase_set.count(type)!=0) {
- if (header.ncase_col==0) {
- header.ncase_col=header.coln+1;
+ } else if (ncase_set.count(type) != 0) {
+ if (header.ncase_col == 0) {
+ header.ncase_col = header.coln + 1;
} else {
- cout<<"error! more than two n_case columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two n_case columns in the file." << endl;
+ n_error++;
}
- } else if (ncontrol_set.count(type)!=0) {
- if (header.ncontrol_col==0) {
- header.ncontrol_col=header.coln+1;
+ } else if (ncontrol_set.count(type) != 0) {
+ if (header.ncontrol_col == 0) {
+ header.ncontrol_col = header.coln + 1;
} else {
- cout<<"error! more than two n_control columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two n_control columns in the file." << endl;
+ n_error++;
}
- } else if (ws_set.count(type)!=0) {
- if (header.ws_col==0) {
- header.ws_col=header.coln+1;
+ } else if (ws_set.count(type) != 0) {
+ if (header.ws_col == 0) {
+ header.ws_col = header.coln + 1;
} else {
- cout<<"error! more than two window_size columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two window_size columns in the file." << endl;
+ n_error++;
}
- } else if (af_set.count(type)!=0) {
- if (header.af_col==0) {
- header.af_col=header.coln+1;
+ } else if (af_set.count(type) != 0) {
+ if (header.af_col == 0) {
+ header.af_col = header.coln + 1;
} else {
- cout<<"error! more than two af columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two af columns in the file." << endl;
+ n_error++;
}
- } else if (cor_set.count(type)!=0) {
- if (header.cor_col==0) {
- header.cor_col=header.coln+1;
+ } else if (cor_set.count(type) != 0) {
+ if (header.cor_col == 0) {
+ header.cor_col = header.coln + 1;
} else {
- cout<<"error! more than two cor columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two cor columns in the file." << endl;
+ n_error++;
}
} else {
string str = ch_ptr;
- string cat = str.substr(str.size()-2, 2);
+ string cat = str.substr(str.size() - 2, 2);
- if(cat == "_c" || cat =="_C"){
+ if (cat == "_c" || cat == "_C") {
// continuous
- header.catc_col.insert(header.coln+1);
+ header.catc_col.insert(header.coln + 1);
} else {
- // discrete
- header.catd_col.insert(header.coln+1);
+ // discrete
+ header.catd_col.insert(header.coln + 1);
}
}
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
header.coln++;
}
- if (header.cor_col!=0 && header.cor_col!=header.coln) {
- cout<<"error! the cor column should be the last column."<<endl;
+ if (header.cor_col != 0 && header.cor_col != header.coln) {
+ cout << "error! the cor column should be the last column." << endl;
n_error++;
}
- if (header.rs_col==0) {
- if (header.chr_col!=0 && header.pos_col!=0) {
- cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl;
+ if (header.rs_col == 0) {
+ if (header.chr_col != 0 && header.pos_col != 0) {
+ cout << "missing an rs column. rs id will be replaced by chr:pos" << endl;
} else {
- cout<<"error! missing an rs column."<<endl; n_error++;
+ cout << "error! missing an rs column." << endl;
+ n_error++;
}
}
- if (n_error==0) {
+ if (n_error == 0) {
return true;
} else {
return false;
@@ -3026,13 +3210,13 @@ bool ReadHeader_io (const string &line, HEADER &header)
// Read category file, record mapRS2 in the category file does not
// contain a null category so if a snp has 0 for all categories, then
// it is not included in the analysis.
-bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
- size_t &n_vc) {
+bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
+ size_t &n_vc) {
mapRS2cat.clear();
- igzstream infile (file_cat.c_str(), igzstream::in);
+ igzstream infile(file_cat.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open category file: "<<file_cat<<endl;
+ cout << "error! fail to open category file: " << file_cat << endl;
return false;
}
@@ -3045,47 +3229,62 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
// Read header.
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader_io(line, header);
// Use the header to count the number of categories.
- n_vc=header.coln;
- if (header.rs_col!=0) {n_vc--;}
- if (header.chr_col!=0) {n_vc--;}
- if (header.pos_col!=0) {n_vc--;}
- if (header.cm_col!=0) {n_vc--;}
- if (header.a1_col!=0) {n_vc--;}
- if (header.a0_col!=0) {n_vc--;}
+ n_vc = header.coln;
+ if (header.rs_col != 0) {
+ n_vc--;
+ }
+ if (header.chr_col != 0) {
+ n_vc--;
+ }
+ if (header.pos_col != 0) {
+ n_vc--;
+ }
+ if (header.cm_col != 0) {
+ n_vc--;
+ }
+ if (header.a1_col != 0) {
+ n_vc--;
+ }
+ if (header.a0_col != 0) {
+ n_vc--;
+ }
// Read the following lines to record mapRS2cat.
while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
- i_cat=0;
- for (size_t i=0; i<header.coln; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {
- rs=ch_ptr;
- } else if (header.chr_col!=0 && header.chr_col==i+1) {
- chr=ch_ptr;
- } else if (header.pos_col!=0 && header.pos_col==i+1) {
- pos=ch_ptr;
- } else if (header.cm_col!=0 && header.cm_col==i+1) {
- cm=ch_ptr;
- } else if (header.a1_col!=0 && header.a1_col==i+1) {
- a1=ch_ptr;
- } else if (header.a0_col!=0 && header.a0_col==i+1) {
- a0=ch_ptr;
- } else if (atoi(ch_ptr)==1 || atoi(ch_ptr)==0) {
- if (i_cat==0) {
- if (header.rs_col==0) {
- rs=chr+":"+pos;
- }
- }
-
- if (atoi(ch_ptr)==1 && mapRS2cat.count(rs)==0) {mapRS2cat[rs]=i_cat;}
- i_cat++;
- } else {}
-
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+ i_cat = 0;
+ for (size_t i = 0; i < header.coln; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ } else if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ } else if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ } else if (header.cm_col != 0 && header.cm_col == i + 1) {
+ cm = ch_ptr;
+ } else if (header.a1_col != 0 && header.a1_col == i + 1) {
+ a1 = ch_ptr;
+ } else if (header.a0_col != 0 && header.a0_col == i + 1) {
+ a0 = ch_ptr;
+ } else if (atoi(ch_ptr) == 1 || atoi(ch_ptr) == 0) {
+ if (i_cat == 0) {
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
+ }
+ }
+
+ if (atoi(ch_ptr) == 1 && mapRS2cat.count(rs) == 0) {
+ mapRS2cat[rs] = i_cat;
+ }
+ i_cat++;
+ } else {
+ }
+
+ ch_ptr = strtok(NULL, " , \t");
}
}
@@ -3095,25 +3294,29 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
return true;
}
-bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat,
- size_t &n_vc) {
+bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat,
+ size_t &n_vc) {
mapRS2cat.clear();
- igzstream infile (file_mcat.c_str(), igzstream::in);
+ igzstream infile(file_mcat.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open mcategory file: "<<file_mcat<<endl;
+ cout << "error! fail to open mcategory file: " << file_mcat << endl;
return false;
}
string file_name;
map<string, size_t> mapRS2cat_tmp;
- size_t n_vc_tmp, t=0;
+ size_t n_vc_tmp, t = 0;
while (!safeGetline(infile, file_name).eof()) {
mapRS2cat_tmp.clear();
- ReadFile_cat (file_name, mapRS2cat_tmp, n_vc_tmp);
+ ReadFile_cat(file_name, mapRS2cat_tmp, n_vc_tmp);
mapRS2cat.insert(mapRS2cat_tmp.begin(), mapRS2cat_tmp.end());
- if (t==0) {n_vc=n_vc_tmp;} else {n_vc=max(n_vc, n_vc_tmp);}
+ if (t == 0) {
+ n_vc = n_vc_tmp;
+ } else {
+ n_vc = max(n_vc, n_vc_tmp);
+ }
t++;
}
@@ -3123,475 +3326,490 @@ bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat,
// Read bimbam mean genotype file and calculate kinship matrix; this
// time, the kinship matrix is not centered, and can contain multiple
// K matrix.
-bool BimbamKin (const string &file_geno, const int display_pace,
- const vector<int> &indicator_idv,
- const vector<int> &indicator_snp,
- const map<string, double> &mapRS2weight,
- const map<string, size_t> &mapRS2cat,
- const vector<SNPINFO> &snpInfo,
- const gsl_matrix *W, gsl_matrix *matrix_kin,
- gsl_vector *vector_ns) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- size_t n_miss;
- double d, geno_mean, geno_var;
-
- size_t ni_test=matrix_kin->size1;
- gsl_vector *geno=gsl_vector_alloc (ni_test);
- gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
-
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- size_t n_vc=matrix_kin->size2/ni_test, i_vc;
- string rs;
- vector<size_t> ns_vec;
- for (size_t i=0; i<n_vc; i++) {
- ns_vec.push_back(0);
- }
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc);
- gsl_matrix_set_zero(Xlarge);
-
- size_t ns_test=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
- ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- rs=snpInfo[t].rs_number; // This line is new.
-
- geno_mean=0.0; n_miss=0; geno_var=0.0;
- gsl_vector_set_all(geno_miss, 0);
-
- size_t j=0;
- for (size_t i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0) {continue;}
- ch_ptr=strtok (NULL, " , \t");
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(geno_miss, i, 0); n_miss++;
- }
- else {
- d=atof(ch_ptr);
- gsl_vector_set (geno, j, d);
- gsl_vector_set (geno_miss, j, 1);
- geno_mean+=d;
- geno_var+=d*d;
- }
- j++;
- }
-
- geno_mean/=(double)(ni_test-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_test;
- geno_var-=geno_mean*geno_mean;
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (geno_miss, i)==0) {
- gsl_vector_set(geno, i, geno_mean);
- }
- }
-
- gsl_vector_add_constant (geno, -1.0*geno_mean);
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, geno, 0.0, Wtx);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
- gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
- gsl_blas_ddot (geno, geno, &geno_var);
- geno_var/=(double)ni_test;
-
- if (geno_var!=0 && (mapRS2weight.size()==0 ||
- mapRS2weight.count(rs)!=0)) {
- if (mapRS2weight.size()==0) {
- d=1.0/geno_var;
- } else {
- d=mapRS2weight.at(rs)/geno_var;
- }
-
- gsl_vector_scale (geno, sqrt(d));
- if (n_vc==1 || mapRS2cat.size()==0 ) {
- gsl_vector_view Xlarge_col=
- gsl_matrix_column(Xlarge,ns_vec[0]%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, geno);
- ns_vec[0]++;
-
- if (ns_vec[0]%msize==0) {
- eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
- gsl_matrix_set_zero(Xlarge);
- }
- } else if (mapRS2cat.count(rs)!=0) {
- i_vc=mapRS2cat.at(rs);
-
- gsl_vector_view Xlarge_col=
- gsl_matrix_column(Xlarge,msize*i_vc+ns_vec[i_vc]%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, geno);
-
- ns_vec[i_vc]++;
-
- if (ns_vec[i_vc]%msize==0) {
- gsl_matrix_view X_sub=
- gsl_matrix_submatrix(Xlarge,0,msize*i_vc,
- ni_test,msize);
- gsl_matrix_view kin_sub=
- gsl_matrix_submatrix(matrix_kin,0,ni_test*i_vc,
- ni_test,ni_test);
- eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix,
- &X_sub.matrix, 1.0, &kin_sub.matrix);
-
- gsl_matrix_set_zero(&X_sub.matrix);
- }
- }
-
- }
- ns_test++;
-
- }
-
- for (size_t i_vc=0; i_vc<n_vc; i_vc++) {
- if (ns_vec[i_vc]%msize!=0) {
- gsl_matrix_view X_sub=
- gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
- gsl_matrix_view kin_sub=
- gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test,
- ni_test);
- eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix,
- 1.0, &kin_sub.matrix);
- }
- }
-
- cout<<endl;
-
- for (size_t t=0; t<n_vc; t++) {
- gsl_vector_set(vector_ns, t, ns_vec[t]);
-
- for (size_t i=0; i<ni_test; ++i) {
- for (size_t j=0; j<=i; ++j) {
- d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
- d/=(double)ns_vec[t];
- gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
- gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
- }
- }
- }
-
- gsl_vector_free (geno);
- gsl_vector_free (geno_miss);
-
- gsl_vector_free (Wtx);
- gsl_matrix_free (WtW);
- gsl_matrix_free (WtWi);
- gsl_vector_free (WtWiWtx);
- gsl_permutation_free (pmt);
-
- gsl_matrix_free (Xlarge);
-
- infile.close();
- infile.clear();
-
- return true;
+bool BimbamKin(const string &file_geno, const int display_pace,
+ const vector<int> &indicator_idv,
+ const vector<int> &indicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+ gsl_matrix *matrix_kin, gsl_vector *vector_ns) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test = matrix_kin->size1;
+ gsl_vector *geno = gsl_vector_alloc(ni_test);
+ gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
+
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ size_t n_vc = matrix_kin->size2 / ni_test, i_vc;
+ string rs;
+ vector<size_t> ns_vec;
+ for (size_t i = 0; i < n_vc; i++) {
+ ns_vec.push_back(0);
+ }
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
+ gsl_matrix_set_zero(Xlarge);
+
+ size_t ns_test = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ rs = snpInfo[t].rs_number; // This line is new.
+
+ geno_mean = 0.0;
+ n_miss = 0;
+ geno_var = 0.0;
+ gsl_vector_set_all(geno_miss, 0);
+
+ size_t j = 0;
+ for (size_t i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ ch_ptr = strtok(NULL, " , \t");
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(geno_miss, i, 0);
+ n_miss++;
+ } else {
+ d = atof(ch_ptr);
+ gsl_vector_set(geno, j, d);
+ gsl_vector_set(geno_miss, j, 1);
+ geno_mean += d;
+ geno_var += d * d;
+ }
+ j++;
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_test;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(geno_miss, i) == 0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
+
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, geno, 0.0, Wtx);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+ gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
+ gsl_blas_ddot(geno, geno, &geno_var);
+ geno_var /= (double)ni_test;
+
+ if (geno_var != 0 &&
+ (mapRS2weight.size() == 0 || mapRS2weight.count(rs) != 0)) {
+ if (mapRS2weight.size() == 0) {
+ d = 1.0 / geno_var;
+ } else {
+ d = mapRS2weight.at(rs) / geno_var;
+ }
+
+ gsl_vector_scale(geno, sqrt(d));
+ if (n_vc == 1 || mapRS2cat.size() == 0) {
+ gsl_vector_view Xlarge_col =
+ gsl_matrix_column(Xlarge, ns_vec[0] % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, geno);
+ ns_vec[0]++;
+
+ if (ns_vec[0] % msize == 0) {
+ eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ gsl_matrix_set_zero(Xlarge);
+ }
+ } else if (mapRS2cat.count(rs) != 0) {
+ i_vc = mapRS2cat.at(rs);
+
+ gsl_vector_view Xlarge_col =
+ gsl_matrix_column(Xlarge, msize * i_vc + ns_vec[i_vc] % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, geno);
+
+ ns_vec[i_vc]++;
+
+ if (ns_vec[i_vc] % msize == 0) {
+ gsl_matrix_view X_sub =
+ gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
+ gsl_matrix_view kin_sub = gsl_matrix_submatrix(
+ matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
+ eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+ &kin_sub.matrix);
+
+ gsl_matrix_set_zero(&X_sub.matrix);
+ }
+ }
+ }
+ ns_test++;
+ }
+
+ for (size_t i_vc = 0; i_vc < n_vc; i_vc++) {
+ if (ns_vec[i_vc] % msize != 0) {
+ gsl_matrix_view X_sub =
+ gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
+ gsl_matrix_view kin_sub =
+ gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
+ eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+ &kin_sub.matrix);
+ }
+ }
+
+ cout << endl;
+
+ for (size_t t = 0; t < n_vc; t++) {
+ gsl_vector_set(vector_ns, t, ns_vec[t]);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ for (size_t j = 0; j <= i; ++j) {
+ d = gsl_matrix_get(matrix_kin, j, i + ni_test * t);
+ d /= (double)ns_vec[t];
+ gsl_matrix_set(matrix_kin, i, j + ni_test * t, d);
+ gsl_matrix_set(matrix_kin, j, i + ni_test * t, d);
+ }
+ }
+ }
+
+ gsl_vector_free(geno);
+ gsl_vector_free(geno_miss);
+
+ gsl_vector_free(Wtx);
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(WtWiWtx);
+ gsl_permutation_free(pmt);
+
+ gsl_matrix_free(Xlarge);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
-bool PlinkKin (const string &file_bed, const int display_pace,
- const vector<int> &indicator_idv,
- const vector<int> &indicator_snp,
- const map<string, double> &mapRS2weight,
- const map<string, size_t> &mapRS2cat,
- const vector<SNPINFO> &snpInfo,
- const gsl_matrix *W, gsl_matrix *matrix_kin,
- gsl_vector *vector_ns) {
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return false;
- }
-
- char ch[1];
- bitset<8> b;
-
- size_t n_miss, ci_total, ci_test;
- double d, geno_mean, geno_var;
-
- size_t ni_test=matrix_kin->size1;
- size_t ni_total=indicator_idv.size();
- gsl_vector *geno=gsl_vector_alloc (ni_test);
-
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- size_t ns_test=0;
- int n_bit;
-
- size_t n_vc=matrix_kin->size2/ni_test, i_vc;
- string rs;
- vector<size_t> ns_vec;
- for (size_t i=0; i<n_vc; i++) {
- ns_vec.push_back(0);
- }
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc);
- gsl_matrix_set_zero(Xlarge);
-
- // Calculate n_bit and c, the number of bit for each SNP.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
- ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers
- infile.seekg(t*n_bit+3);
-
- rs=snpInfo[t].rs_number; // This line is new.
-
- // Read genotypes.
- geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0;
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(geno, ci_test, 2.0);
- geno_mean+=2.0; geno_var+=4.0;
- }
- else {
- gsl_vector_set(geno, ci_test, 1.0);
- geno_mean+=1.0;
- geno_var+=1.0;
- }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
- else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
- }
-
- ci_test++;
- ci_total++;
- }
- }
-
- geno_mean/=(double)(ni_test-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_test;
- geno_var-=geno_mean*geno_mean;
-
- for (size_t i=0; i<ni_test; ++i) {
- d=gsl_vector_get(geno,i);
- if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
- }
-
- gsl_vector_add_constant (geno, -1.0*geno_mean);
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, geno, 0.0, Wtx);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
- gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
- gsl_blas_ddot (geno, geno, &geno_var);
- geno_var/=(double)ni_test;
-
- if (geno_var!=0 && (mapRS2weight.size()==0 ||
- mapRS2weight.count(rs)!=0)) {
- if (mapRS2weight.size()==0) {
- d=1.0/geno_var;
- } else {
- d=mapRS2weight.at(rs)/geno_var;
- }
-
- gsl_vector_scale (geno, sqrt(d));
- if (n_vc==1 || mapRS2cat.size()==0 ) {
- gsl_vector_view Xlarge_col=
- gsl_matrix_column (Xlarge, ns_vec[0]%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, geno);
- ns_vec[0]++;
-
- if (ns_vec[0]%msize==0) {
- eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin);
- gsl_matrix_set_zero(Xlarge);
- }
- } else if (mapRS2cat.count(rs)!=0) {
- i_vc=mapRS2cat.at(rs);
-
- gsl_vector_view Xlarge_col=
- gsl_matrix_column(Xlarge,msize*i_vc+ns_vec[i_vc]%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, geno);
-
- ns_vec[i_vc]++;
-
- if (ns_vec[i_vc]%msize==0) {
- gsl_matrix_view X_sub=
- gsl_matrix_submatrix(Xlarge,0,msize*i_vc,ni_test,
- msize);
- gsl_matrix_view kin_sub=
- gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc,
- ni_test, ni_test);
- eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix,
- &X_sub.matrix, 1.0, &kin_sub.matrix);
-
- gsl_matrix_set_zero(&X_sub.matrix);
- }
- }
-
-
- }
- ns_test++;
- }
-
- for (size_t i_vc=0; i_vc<n_vc; i_vc++) {
- if (ns_vec[i_vc]%msize!=0) {
- gsl_matrix_view X_sub=
- gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize);
- gsl_matrix_view kin_sub=
- gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc,
- ni_test, ni_test);
- eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix,
- 1.0, &kin_sub.matrix);
- }
- }
-
- cout<<endl;
-
- for (size_t t=0; t<n_vc; t++) {
- gsl_vector_set(vector_ns, t, ns_vec[t]);
-
- for (size_t i=0; i<ni_test; ++i) {
- for (size_t j=0; j<=i; ++j) {
- d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
- d/=(double)ns_vec[t];
- gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
- gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
- }
- }
- }
-
- gsl_vector_free (geno);
-
- gsl_vector_free (Wtx);
- gsl_matrix_free (WtW);
- gsl_matrix_free (WtWi);
- gsl_vector_free (WtWiWtx);
- gsl_permutation_free (pmt);
-
- gsl_matrix_free (Xlarge);
-
- infile.close();
- infile.clear();
-
- return true;
+bool PlinkKin(const string &file_bed, const int display_pace,
+ const vector<int> &indicator_idv,
+ const vector<int> &indicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+ gsl_matrix *matrix_kin, gsl_vector *vector_ns) {
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return false;
+ }
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t n_miss, ci_total, ci_test;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test = matrix_kin->size1;
+ size_t ni_total = indicator_idv.size();
+ gsl_vector *geno = gsl_vector_alloc(ni_test);
+
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ size_t ns_test = 0;
+ int n_bit;
+
+ size_t n_vc = matrix_kin->size2 / ni_test, i_vc;
+ string rs;
+ vector<size_t> ns_vec;
+ for (size_t i = 0; i < n_vc; i++) {
+ ns_vec.push_back(0);
+ }
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
+ gsl_matrix_set_zero(Xlarge);
+
+ // Calculate n_bit and c, the number of bit for each SNP.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers
+ infile.seekg(t * n_bit + 3);
+
+ rs = snpInfo[t].rs_number; // This line is new.
+
+ // Read genotypes.
+ geno_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ geno_var = 0.0;
+ ci_test = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(geno, ci_test, 2.0);
+ geno_mean += 2.0;
+ geno_var += 4.0;
+ } else {
+ gsl_vector_set(geno, ci_test, 1.0);
+ geno_mean += 1.0;
+ geno_var += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(geno, ci_test, 0.0);
+ } else {
+ gsl_vector_set(geno, ci_test, -9.0);
+ n_miss++;
+ }
+ }
+
+ ci_test++;
+ ci_total++;
+ }
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_test;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ d = gsl_vector_get(geno, i);
+ if (d == -9.0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
+
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, geno, 0.0, Wtx);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+ gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno);
+ gsl_blas_ddot(geno, geno, &geno_var);
+ geno_var /= (double)ni_test;
+
+ if (geno_var != 0 &&
+ (mapRS2weight.size() == 0 || mapRS2weight.count(rs) != 0)) {
+ if (mapRS2weight.size() == 0) {
+ d = 1.0 / geno_var;
+ } else {
+ d = mapRS2weight.at(rs) / geno_var;
+ }
+
+ gsl_vector_scale(geno, sqrt(d));
+ if (n_vc == 1 || mapRS2cat.size() == 0) {
+ gsl_vector_view Xlarge_col =
+ gsl_matrix_column(Xlarge, ns_vec[0] % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, geno);
+ ns_vec[0]++;
+
+ if (ns_vec[0] % msize == 0) {
+ eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ gsl_matrix_set_zero(Xlarge);
+ }
+ } else if (mapRS2cat.count(rs) != 0) {
+ i_vc = mapRS2cat.at(rs);
+
+ gsl_vector_view Xlarge_col =
+ gsl_matrix_column(Xlarge, msize * i_vc + ns_vec[i_vc] % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, geno);
+
+ ns_vec[i_vc]++;
+
+ if (ns_vec[i_vc] % msize == 0) {
+ gsl_matrix_view X_sub =
+ gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
+ gsl_matrix_view kin_sub = gsl_matrix_submatrix(
+ matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
+ eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+ &kin_sub.matrix);
+
+ gsl_matrix_set_zero(&X_sub.matrix);
+ }
+ }
+ }
+ ns_test++;
+ }
+
+ for (size_t i_vc = 0; i_vc < n_vc; i_vc++) {
+ if (ns_vec[i_vc] % msize != 0) {
+ gsl_matrix_view X_sub =
+ gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
+ gsl_matrix_view kin_sub =
+ gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
+ eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+ &kin_sub.matrix);
+ }
+ }
+
+ cout << endl;
+
+ for (size_t t = 0; t < n_vc; t++) {
+ gsl_vector_set(vector_ns, t, ns_vec[t]);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ for (size_t j = 0; j <= i; ++j) {
+ d = gsl_matrix_get(matrix_kin, j, i + ni_test * t);
+ d /= (double)ns_vec[t];
+ gsl_matrix_set(matrix_kin, i, j + ni_test * t, d);
+ gsl_matrix_set(matrix_kin, j, i + ni_test * t, d);
+ }
+ }
+ }
+
+ gsl_vector_free(geno);
+
+ gsl_vector_free(Wtx);
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(WtWiWtx);
+ gsl_permutation_free(pmt);
+
+ gsl_matrix_free(Xlarge);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
-bool MFILEKin (const size_t mfile_mode, const string &file_mfile,
- const int display_pace, const vector<int> &indicator_idv,
- const vector<vector<int> > &mindicator_snp,
- const map<string, double> &mapRS2weight,
- const map<string, size_t> &mapRS2cat,
- const vector<vector<SNPINFO> > &msnpInfo,
- const gsl_matrix *W, gsl_matrix *matrix_kin,
- gsl_vector *vector_ns) {
- size_t n_vc=vector_ns->size, ni_test=matrix_kin->size1;
+bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
+ const int display_pace, const vector<int> &indicator_idv,
+ const vector<vector<int>> &mindicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<vector<SNPINFO>> &msnpInfo, const gsl_matrix *W,
+ gsl_matrix *matrix_kin, gsl_vector *vector_ns) {
+ size_t n_vc = vector_ns->size, ni_test = matrix_kin->size1;
gsl_matrix_set_zero(matrix_kin);
gsl_vector_set_zero(vector_ns);
- igzstream infile (file_mfile.c_str(), igzstream::in);
+ igzstream infile(file_mfile.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open mfile file: "<<file_mfile<<endl;
+ cout << "error! fail to open mfile file: " << file_mfile << endl;
return false;
}
string file_name;
- gsl_matrix *kin_tmp=gsl_matrix_alloc (matrix_kin->size1, matrix_kin->size2);
- gsl_vector *ns_tmp=gsl_vector_alloc (vector_ns->size);
+ gsl_matrix *kin_tmp = gsl_matrix_alloc(matrix_kin->size1, matrix_kin->size2);
+ gsl_vector *ns_tmp = gsl_vector_alloc(vector_ns->size);
- size_t l=0;
+ size_t l = 0;
double d;
while (!safeGetline(infile, file_name).eof()) {
gsl_matrix_set_zero(kin_tmp);
gsl_vector_set_zero(ns_tmp);
- if (mfile_mode==1) {
- file_name+=".bed";
- PlinkKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
+ if (mfile_mode == 1) {
+ file_name += ".bed";
+ PlinkKin(file_name, display_pace, indicator_idv, mindicator_snp[l],
+ mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
} else {
- BimbamKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
+ BimbamKin(file_name, display_pace, indicator_idv, mindicator_snp[l],
+ mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
}
// Add ns.
gsl_vector_add(vector_ns, ns_tmp);
// Add kin.
- for (size_t t=0; t<n_vc; t++) {
- for (size_t i=0; i<ni_test; ++i) {
- for (size_t j=0; j<=i; ++j) {
- d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)+gsl_matrix_get (kin_tmp, j, i+ni_test*t)*gsl_vector_get(ns_tmp, t);
-
- gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
- gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
- }
+ for (size_t t = 0; t < n_vc; t++) {
+ for (size_t i = 0; i < ni_test; ++i) {
+ for (size_t j = 0; j <= i; ++j) {
+ d = gsl_matrix_get(matrix_kin, j, i + ni_test * t) +
+ gsl_matrix_get(kin_tmp, j, i + ni_test * t) *
+ gsl_vector_get(ns_tmp, t);
+
+ gsl_matrix_set(matrix_kin, i, j + ni_test * t, d);
+ gsl_matrix_set(matrix_kin, j, i + ni_test * t, d);
+ }
}
}
l++;
}
// Renormalize kin.
- for (size_t t=0; t<n_vc; t++) {
- for (size_t i=0; i<ni_test; ++i) {
- for (size_t j=0; j<=i; ++j) {
- d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)/
- gsl_vector_get(vector_ns, t);
-
- gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
- gsl_matrix_set (matrix_kin, j, i+ni_test*t, d);
-
+ for (size_t t = 0; t < n_vc; t++) {
+ for (size_t i = 0; i < ni_test; ++i) {
+ for (size_t j = 0; j <= i; ++j) {
+ d = gsl_matrix_get(matrix_kin, j, i + ni_test * t) /
+ gsl_vector_get(vector_ns, t);
+
+ gsl_matrix_set(matrix_kin, i, j + ni_test * t, d);
+ gsl_matrix_set(matrix_kin, j, i + ni_test * t, d);
}
}
}
- cout<<endl;
+ cout << endl;
infile.close();
infile.clear();
@@ -3602,15 +3820,13 @@ bool MFILEKin (const size_t mfile_mode, const string &file_mfile,
return true;
}
-
// Read var file, store mapRS2wsnp.
-bool ReadFile_wsnp (const string &file_wsnp,
- map<string, double> &mapRS2weight) {
+bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2weight) {
mapRS2weight.clear();
- igzstream infile (file_wsnp.c_str(), igzstream::in);
+ igzstream infile(file_wsnp.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open snp weight file: "<<file_wsnp<<endl;
+ cout << "error! fail to open snp weight file: " << file_wsnp << endl;
return false;
}
@@ -3619,29 +3835,29 @@ bool ReadFile_wsnp (const string &file_wsnp,
double weight;
while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- rs=ch_ptr;
- ch_ptr=strtok (NULL, " , \t");
- weight=atof(ch_ptr);
- mapRS2weight[rs]=weight;
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ rs = ch_ptr;
+ ch_ptr = strtok(NULL, " , \t");
+ weight = atof(ch_ptr);
+ mapRS2weight[rs] = weight;
}
return true;
}
-bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
- map<string, vector<double> > &mapRS2wvector) {
+bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc,
+ map<string, vector<double>> &mapRS2wvector) {
mapRS2wvector.clear();
- igzstream infile (file_wcat.c_str(), igzstream::in);
+ igzstream infile(file_wcat.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open snp weight file: "<<file_wcat<<endl;
+ cout << "error! fail to open snp weight file: " << file_wcat << endl;
return false;
}
char *ch_ptr;
vector<double> weight;
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
weight.push_back(0.0);
}
@@ -3650,43 +3866,52 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
// Read header.
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader_io(line, header);
while (!safeGetline(infile, line).eof()) {
- if (isBlankLine(line)) {continue;}
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
- size_t t=0;
- for (size_t i=0; i<header.coln; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
- else if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr; }
- else if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr; }
- else if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr; }
- else if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr; }
- else if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr; }
- else {
- weight[t]=atof(ch_ptr); t++;
- if (t>n_vc) {
- cout<<"error! Number of columns in the wcat file does not "<<
- "match that of cat file.";
- return false;
- }
- }
-
- ch_ptr=strtok (NULL, " , \t");
- }
-
- if (t!=n_vc) {
- cout<<"error! Number of columns in the wcat file does not "<<
- "match that of cat file.";
+ if (isBlankLine(line)) {
+ continue;
+ }
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+ size_t t = 0;
+ for (size_t i = 0; i < header.coln; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ } else if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ } else if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ } else if (header.cm_col != 0 && header.cm_col == i + 1) {
+ cm = ch_ptr;
+ } else if (header.a1_col != 0 && header.a1_col == i + 1) {
+ a1 = ch_ptr;
+ } else if (header.a0_col != 0 && header.a0_col == i + 1) {
+ a0 = ch_ptr;
+ } else {
+ weight[t] = atof(ch_ptr);
+ t++;
+ if (t > n_vc) {
+ cout << "error! Number of columns in the wcat file does not "
+ << "match that of cat file.";
+ return false;
+ }
+ }
+
+ ch_ptr = strtok(NULL, " , \t");
+ }
+
+ if (t != n_vc) {
+ cout << "error! Number of columns in the wcat file does not "
+ << "match that of cat file.";
return false;
}
- if (header.rs_col==0) {
- rs=chr+":"+pos;
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
}
- mapRS2wvector[rs]=weight;
+ mapRS2wvector[rs] = weight;
}
return true;
@@ -3700,18 +3925,23 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
// the beta file for the second time, compute q, and Vq based on block
// jacknife use the mapRS2var to select snps (and to ), calculate q do
// a block-wise jacknife, and compute Vq
-void ReadFile_beta (const string &file_beta,
- const map<string, size_t> &mapRS2cat,
- const map<string, double> &mapRS2wA,
- vector<size_t> &vec_cat, vector<size_t> &vec_ni,
- vector<double> &vec_weight, vector<double> &vec_z2,
- size_t &ni_total, size_t &ns_total, size_t &ns_test) {
- vec_cat.clear(); vec_ni.clear(); vec_weight.clear(); vec_z2.clear();
- ni_total=0; ns_total=0; ns_test=0;
-
- igzstream infile (file_beta.c_str(), igzstream::in);
+void ReadFile_beta(const string &file_beta,
+ const map<string, size_t> &mapRS2cat,
+ const map<string, double> &mapRS2wA, vector<size_t> &vec_cat,
+ vector<size_t> &vec_ni, vector<double> &vec_weight,
+ vector<double> &vec_z2, size_t &ni_total, size_t &ns_total,
+ size_t &ns_test) {
+ vec_cat.clear();
+ vec_ni.clear();
+ vec_weight.clear();
+ vec_z2.clear();
+ ni_total = 0;
+ ns_total = 0;
+ ns_test = 0;
+
+ igzstream infile(file_beta.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open beta file: "<<file_beta<<endl;
+ cout << "error! fail to open beta file: " << file_beta << endl;
return;
}
@@ -3720,110 +3950,158 @@ void ReadFile_beta (const string &file_beta,
string type;
string rs, chr, a1, a0, pos, cm;
- double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
- size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;
+ double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0,
+ af = 0, var_x = 0;
+ size_t n_total = 0, n_mis = 0, n_obs = 0, n_case = 0, n_control = 0;
// Read header.
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader_io(line, header);
- if (header.n_col==0 ) {
- if ( (header.nobs_col==0 && header.nmis_col==0) &&
- (header.ncase_col==0 && header.ncontrol_col==0) ) {
- cout<<"error! missing sample size in the beta file."<<endl;
+ if (header.n_col == 0) {
+ if ((header.nobs_col == 0 && header.nmis_col == 0) &&
+ (header.ncase_col == 0 && header.ncontrol_col == 0)) {
+ cout << "error! missing sample size in the beta file." << endl;
} else {
- cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+ cout << "total sample size will be replaced by obs/mis sample size."
+ << endl;
}
}
- if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) &&
- header.chisq_col==0 && header.p_col==0) {
- cout<<"error! missing z scores in the beta file."<<endl;
+ if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0) &&
+ header.chisq_col == 0 && header.p_col == 0) {
+ cout << "error! missing z scores in the beta file." << endl;
}
while (!safeGetline(infile, line).eof()) {
- if (isBlankLine(line)) {continue;}
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
- z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
- n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; af=0; var_x=0;
- for (size_t i=0; i<header.coln; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
- if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
- if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
- if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
- if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
- if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+ if (isBlankLine(line)) {
+ continue;
+ }
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+ z = 0;
+ beta = 0;
+ se_beta = 0;
+ chisq = 0;
+ pvalue = 0;
+ n_total = 0;
+ n_mis = 0;
+ n_obs = 0;
+ n_case = 0;
+ n_control = 0;
+ af = 0;
+ var_x = 0;
+ for (size_t i = 0; i < header.coln; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ }
+ if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ }
+ if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ }
+ if (header.cm_col != 0 && header.cm_col == i + 1) {
+ cm = ch_ptr;
+ }
+ if (header.a1_col != 0 && header.a1_col == i + 1) {
+ a1 = ch_ptr;
+ }
+ if (header.a0_col != 0 && header.a0_col == i + 1) {
+ a0 = ch_ptr;
+ }
- if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
- if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
- if (header.sebeta_col!=0 && header.sebeta_col==i+1) {
- se_beta=atof(ch_ptr);
+ if (header.z_col != 0 && header.z_col == i + 1) {
+ z = atof(ch_ptr);
+ }
+ if (header.beta_col != 0 && header.beta_col == i + 1) {
+ beta = atof(ch_ptr);
+ }
+ if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
+ se_beta = atof(ch_ptr);
+ }
+ if (header.chisq_col != 0 && header.chisq_col == i + 1) {
+ chisq = atof(ch_ptr);
+ }
+ if (header.p_col != 0 && header.p_col == i + 1) {
+ pvalue = atof(ch_ptr);
}
- if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
- if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
- if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
- if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
- if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
- if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);}
- if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {
- n_control=atoi(ch_ptr);
+ if (header.n_col != 0 && header.n_col == i + 1) {
+ n_total = atoi(ch_ptr);
+ }
+ if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+ n_mis = atoi(ch_ptr);
+ }
+ if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+ n_obs = atoi(ch_ptr);
+ }
+ if (header.ncase_col != 0 && header.ncase_col == i + 1) {
+ n_case = atoi(ch_ptr);
+ }
+ if (header.ncontrol_col != 0 && header.ncontrol_col == i + 1) {
+ n_control = atoi(ch_ptr);
+ }
+ if (header.af_col != 0 && header.af_col == i + 1) {
+ af = atof(ch_ptr);
+ }
+ if (header.var_col != 0 && header.var_col == i + 1) {
+ var_x = atof(ch_ptr);
}
- if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
- if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
- if (header.rs_col==0) {
- rs=chr+":"+pos;
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
}
- if (header.n_col==0) {
- if (header.nmis_col!=0 && header.nobs_col!=0) {
- n_total=n_mis+n_obs;
+ if (header.n_col == 0) {
+ if (header.nmis_col != 0 && header.nobs_col != 0) {
+ n_total = n_mis + n_obs;
} else {
- n_total=n_case+n_control;
+ n_total = n_case + n_control;
}
}
// Both z values and beta/se_beta have directions, while
// chisq/pvalue do not.
- if (header.z_col!=0) {
- zsquare=z*z;
- } else if (header.beta_col!=0 && header.sebeta_col!=0) {
- z=beta/se_beta;
- zsquare=z*z;
- } else if (header.chisq_col!=0) {
- zsquare=chisq;
- } else if (header.p_col!=0) {
- zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
- } else {zsquare=0;}
+ if (header.z_col != 0) {
+ zsquare = z * z;
+ } else if (header.beta_col != 0 && header.sebeta_col != 0) {
+ z = beta / se_beta;
+ zsquare = z * z;
+ } else if (header.chisq_col != 0) {
+ zsquare = chisq;
+ } else if (header.p_col != 0) {
+ zsquare = gsl_cdf_chisq_Qinv(pvalue, 1);
+ } else {
+ zsquare = 0;
+ }
// Obtain var_x.
- if (header.var_col==0 && header.af_col!=0) {
- var_x=2.0*af*(1.0-af);
+ if (header.var_col == 0 && header.af_col != 0) {
+ var_x = 2.0 * af * (1.0 - af);
}
// If the SNP is also present in cor file, then do calculations.
- if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) &&
- (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) && zsquare!=0) {
- if (mapRS2cat.size()!=0) {
- vec_cat.push_back(mapRS2cat.at(rs));
+ if ((mapRS2wA.size() == 0 || mapRS2wA.count(rs) != 0) &&
+ (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0) && zsquare != 0) {
+ if (mapRS2cat.size() != 0) {
+ vec_cat.push_back(mapRS2cat.at(rs));
} else {
- vec_cat.push_back(0);
+ vec_cat.push_back(0);
}
vec_ni.push_back(n_total);
- if (mapRS2wA.size()==0) {
- vec_weight.push_back(1);
+ if (mapRS2wA.size() == 0) {
+ vec_weight.push_back(1);
} else {
- vec_weight.push_back(mapRS2wA.at(rs));
+ vec_weight.push_back(mapRS2wA.at(rs));
}
vec_z2.push_back(zsquare);
- ni_total=max(ni_total, n_total);
+ ni_total = max(ni_total, n_total);
ns_test++;
}
@@ -3836,15 +4114,15 @@ void ReadFile_beta (const string &file_beta,
return;
}
-void ReadFile_beta (const string &file_beta,
- const map<string, double> &mapRS2wA,
- map<string, string> &mapRS2A1,
- map<string, double> &mapRS2z) {
- mapRS2A1.clear(); mapRS2z.clear();
+void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
+ map<string, string> &mapRS2A1,
+ map<string, double> &mapRS2z) {
+ mapRS2A1.clear();
+ mapRS2z.clear();
- igzstream infile (file_beta.c_str(), igzstream::in);
+ igzstream infile(file_beta.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open beta file: "<<file_beta<<endl;
+ cout << "error! fail to open beta file: " << file_beta << endl;
return;
}
@@ -3853,92 +4131,137 @@ void ReadFile_beta (const string &file_beta,
string type;
string rs, chr, a1, a0, pos, cm;
- double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, af=0, var_x=0;
- size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0;
- size_t ni_total=0, ns_total=0, ns_test=0;
+ double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, af = 0, var_x = 0;
+ size_t n_total = 0, n_mis = 0, n_obs = 0, n_case = 0, n_control = 0;
+ size_t ni_total = 0, ns_total = 0, ns_test = 0;
// Read header.
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_io (line, header);
+ ReadHeader_io(line, header);
- if (header.n_col==0 ) {
- if ((header.nobs_col==0 && header.nmis_col==0) &&
- (header.ncase_col==0 && header.ncontrol_col==0)) {
- cout<<"error! missing sample size in the beta file."<<endl;
+ if (header.n_col == 0) {
+ if ((header.nobs_col == 0 && header.nmis_col == 0) &&
+ (header.ncase_col == 0 && header.ncontrol_col == 0)) {
+ cout << "error! missing sample size in the beta file." << endl;
} else {
- cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+ cout << "total sample size will be replaced by obs/mis sample size."
+ << endl;
}
}
- if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0)) {
- cout<<"error! missing z scores in the beta file."<<endl;
+ if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0)) {
+ cout << "error! missing z scores in the beta file." << endl;
}
while (!safeGetline(infile, line).eof()) {
- if (isBlankLine(line)) {continue;}
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
- z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
- n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; af=0; var_x=0;
- for (size_t i=0; i<header.coln; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
- if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
- if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
- if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
- if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
- if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+ if (isBlankLine(line)) {
+ continue;
+ }
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+ z = 0;
+ beta = 0;
+ se_beta = 0;
+ chisq = 0;
+ pvalue = 0;
+ n_total = 0;
+ n_mis = 0;
+ n_obs = 0;
+ n_case = 0;
+ n_control = 0;
+ af = 0;
+ var_x = 0;
+ for (size_t i = 0; i < header.coln; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ }
+ if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ }
+ if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ }
+ if (header.cm_col != 0 && header.cm_col == i + 1) {
+ cm = ch_ptr;
+ }
+ if (header.a1_col != 0 && header.a1_col == i + 1) {
+ a1 = ch_ptr;
+ }
+ if (header.a0_col != 0 && header.a0_col == i + 1) {
+ a0 = ch_ptr;
+ }
- if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
- if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
- if (header.sebeta_col!=0 && header.sebeta_col==i+1) {
- se_beta=atof(ch_ptr);
+ if (header.z_col != 0 && header.z_col == i + 1) {
+ z = atof(ch_ptr);
+ }
+ if (header.beta_col != 0 && header.beta_col == i + 1) {
+ beta = atof(ch_ptr);
+ }
+ if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
+ se_beta = atof(ch_ptr);
+ }
+ if (header.chisq_col != 0 && header.chisq_col == i + 1) {
+ chisq = atof(ch_ptr);
+ }
+ if (header.p_col != 0 && header.p_col == i + 1) {
+ pvalue = atof(ch_ptr);
}
- if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
- if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
- if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
- if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
- if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
- if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);}
- if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) {
- n_control=atoi(ch_ptr);
+ if (header.n_col != 0 && header.n_col == i + 1) {
+ n_total = atoi(ch_ptr);
+ }
+ if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+ n_mis = atoi(ch_ptr);
+ }
+ if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+ n_obs = atoi(ch_ptr);
+ }
+ if (header.ncase_col != 0 && header.ncase_col == i + 1) {
+ n_case = atoi(ch_ptr);
+ }
+ if (header.ncontrol_col != 0 && header.ncontrol_col == i + 1) {
+ n_control = atoi(ch_ptr);
}
- if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
- if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+ if (header.af_col != 0 && header.af_col == i + 1) {
+ af = atof(ch_ptr);
+ }
+ if (header.var_col != 0 && header.var_col == i + 1) {
+ var_x = atof(ch_ptr);
+ }
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
- if (header.rs_col==0) {
- rs=chr+":"+pos;
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
}
- if (header.n_col==0) {
- if (header.nmis_col!=0 && header.nobs_col!=0) {
- n_total=n_mis+n_obs;
+ if (header.n_col == 0) {
+ if (header.nmis_col != 0 && header.nobs_col != 0) {
+ n_total = n_mis + n_obs;
} else {
- n_total=n_case+n_control;
+ n_total = n_case + n_control;
}
}
// Both z values and beta/se_beta have directions, while
// chisq/pvalue do not.
- if (header.z_col!=0) {
- z=z;
- } else if (header.beta_col!=0 && header.sebeta_col!=0) {
- z=beta/se_beta;
+ if (header.z_col != 0) {
+ z = z;
+ } else if (header.beta_col != 0 && header.sebeta_col != 0) {
+ z = beta / se_beta;
} else {
- z=0;
+ z = 0;
}
// If the snp is also present in cor file, then do calculations.
- if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) ) {
- mapRS2z[rs]=z;
- mapRS2A1[rs]=a1;
+ if ((mapRS2wA.size() == 0 || mapRS2wA.count(rs) != 0)) {
+ mapRS2z[rs] = z;
+ mapRS2A1[rs] = a1;
- ni_total=max(ni_total, n_total);
+ ni_total = max(ni_total, n_total);
ns_test++;
}
@@ -3951,139 +4274,155 @@ void ReadFile_beta (const string &file_beta,
return;
}
-void Calcq (const size_t n_block, const vector<size_t> &vec_cat,
- const vector<size_t> &vec_ni, const vector<double> &vec_weight,
- const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q,
- gsl_vector *s) {
- gsl_matrix_set_zero (Vq);
- gsl_vector_set_zero (q);
- gsl_vector_set_zero (s);
+void Calcq(const size_t n_block, const vector<size_t> &vec_cat,
+ const vector<size_t> &vec_ni, const vector<double> &vec_weight,
+ const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q,
+ gsl_vector *s) {
+ gsl_matrix_set_zero(Vq);
+ gsl_vector_set_zero(q);
+ gsl_vector_set_zero(s);
size_t cat, n_total;
double w, zsquare;
vector<double> vec_q, vec_s, n_snps;
- for (size_t i=0; i<q->size; i++) {
+ for (size_t i = 0; i < q->size; i++) {
vec_q.push_back(0.0);
vec_s.push_back(0.0);
n_snps.push_back(0.0);
}
- vector<vector<double> > mat_q, mat_s;
- for (size_t i=0; i<n_block; i++) {
+ vector<vector<double>> mat_q, mat_s;
+ for (size_t i = 0; i < n_block; i++) {
mat_q.push_back(vec_q);
mat_s.push_back(vec_s);
}
// Compute q and s.
- for (size_t i=0; i<vec_cat.size(); i++) {
+ for (size_t i = 0; i < vec_cat.size(); i++) {
// Extract quantities.
- cat=vec_cat[i];
- n_total=vec_ni[i];
- w=vec_weight[i];
- zsquare=vec_z2[i];
+ cat = vec_cat[i];
+ n_total = vec_ni[i];
+ w = vec_weight[i];
+ zsquare = vec_z2[i];
// Compute q and s.
- vec_q[cat]+=(zsquare-1.0)*w/(double)n_total;
- vec_s[cat]+=w;
+ vec_q[cat] += (zsquare - 1.0) * w / (double)n_total;
+ vec_s[cat] += w;
n_snps[cat]++;
}
// Update q; vec_q is used again for computing Vq below.
- for (size_t i=0; i<q->size; i++) {
- if (vec_s[i]!=0) {
- gsl_vector_set(q, i, vec_q[i]/vec_s[i]);
+ for (size_t i = 0; i < q->size; i++) {
+ if (vec_s[i] != 0) {
+ gsl_vector_set(q, i, vec_q[i] / vec_s[i]);
}
gsl_vector_set(s, i, vec_s[i]);
}
// Compute Vq; divide SNPs in each category into evenly distributed
// blocks.
- size_t t=0, b=0, n_snp=0;
+ size_t t = 0, b = 0, n_snp = 0;
double d, m, n;
- for (size_t l=0; l<q->size; l++) {
- n_snp=floor(n_snps[l]/n_block); t=0; b=0;
- if (n_snp==0) {continue;}
+ for (size_t l = 0; l < q->size; l++) {
+ n_snp = floor(n_snps[l] / n_block);
+ t = 0;
+ b = 0;
+ if (n_snp == 0) {
+ continue;
+ }
// Initiate everything to zero.
- for (size_t i=0; i<n_block; i++) {
- for (size_t j=0; j<q->size; j++) {
- mat_q[i][j]=0;
- mat_s[i][j]=0;
+ for (size_t i = 0; i < n_block; i++) {
+ for (size_t j = 0; j < q->size; j++) {
+ mat_q[i][j] = 0;
+ mat_s[i][j] = 0;
}
}
// Record values.
- for (size_t i=0; i<vec_cat.size(); i++) {
+ for (size_t i = 0; i < vec_cat.size(); i++) {
// Extract quantities.
- cat=vec_cat[i];
- n_total=vec_ni[i];
- w=vec_weight[i];
- zsquare=vec_z2[i];
+ cat = vec_cat[i];
+ n_total = vec_ni[i];
+ w = vec_weight[i];
+ zsquare = vec_z2[i];
// Save quantities for computing Vq (which is not divided by
// n_total).
- mat_q[b][cat]+=(zsquare-1.0)*w;
- mat_s[b][cat]+=w;
-
- if (cat==l) {
- if (b<n_block-1) {
- if (t<n_snp-1) {t++;} else {b++; t=0;}
- } else {
- t++;
- }
+ mat_q[b][cat] += (zsquare - 1.0) * w;
+ mat_s[b][cat] += w;
+
+ if (cat == l) {
+ if (b < n_block - 1) {
+ if (t < n_snp - 1) {
+ t++;
+ } else {
+ b++;
+ t = 0;
+ }
+ } else {
+ t++;
+ }
}
}
// Center mat_q.
- for (size_t i=0; i<q->size; i++) {
- m=0; n=0;
- for (size_t k=0; k<n_block; k++) {
- if (mat_s[k][i]!=0 && vec_s[i]!=mat_s[k][i]) {
- d=(vec_q[i]-mat_q[k][i])/(vec_s[i]-mat_s[k][i]);
- mat_q[k][i]=d;
- m+=d;
- n++;
- }
+ for (size_t i = 0; i < q->size; i++) {
+ m = 0;
+ n = 0;
+ for (size_t k = 0; k < n_block; k++) {
+ if (mat_s[k][i] != 0 && vec_s[i] != mat_s[k][i]) {
+ d = (vec_q[i] - mat_q[k][i]) / (vec_s[i] - mat_s[k][i]);
+ mat_q[k][i] = d;
+ m += d;
+ n++;
+ }
+ }
+ if (n != 0) {
+ m /= n;
}
- if (n!=0) {m/=n;}
- for (size_t k=0; k<n_block; k++) {
- if (mat_q[k][i]!=0) {
- mat_q[k][i]-=m;
- }
+ for (size_t k = 0; k < n_block; k++) {
+ if (mat_q[k][i] != 0) {
+ mat_q[k][i] -= m;
+ }
}
}
// Compute Vq for l'th row and l'th column only.
- for (size_t i=0; i<q->size; i++) {
- d=0; n=0;
- for (size_t k=0; k<n_block; k++) {
- if (mat_q[k][l]!=0 && mat_q[k][i]!=0) {
- d+=mat_q[k][l]*mat_q[k][i];
- n++;
- }
- }
- if (n!=0) {
- d/=n;
- d*=n-1;
- }
- d+=gsl_matrix_get(Vq, i, l);
+ for (size_t i = 0; i < q->size; i++) {
+ d = 0;
+ n = 0;
+ for (size_t k = 0; k < n_block; k++) {
+ if (mat_q[k][l] != 0 && mat_q[k][i] != 0) {
+ d += mat_q[k][l] * mat_q[k][i];
+ n++;
+ }
+ }
+ if (n != 0) {
+ d /= n;
+ d *= n - 1;
+ }
+ d += gsl_matrix_get(Vq, i, l);
gsl_matrix_set(Vq, i, l, d);
- if (i!=l) {gsl_matrix_set(Vq, l, i, d);}
+ if (i != l) {
+ gsl_matrix_set(Vq, l, i, d);
+ }
}
-
}
- //divide the off diagonal elements of Vq by 2
- for (size_t i=0; i<q->size; i++) {
- for (size_t j=i; j<q->size; j++) {
- if (i==j) {continue;}
- d=gsl_matrix_get(Vq, i, j);
- gsl_matrix_set(Vq, i, j, d/2);
- gsl_matrix_set(Vq, j, i, d/2);
+ // divide the off diagonal elements of Vq by 2
+ for (size_t i = 0; i < q->size; i++) {
+ for (size_t j = i; j < q->size; j++) {
+ if (i == j) {
+ continue;
+ }
+ d = gsl_matrix_get(Vq, i, j);
+ gsl_matrix_set(Vq, i, j, d / 2);
+ gsl_matrix_set(Vq, j, i, d / 2);
}
}
@@ -4091,20 +4430,19 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat,
}
// Read vector file.
-void ReadFile_vector (const string &file_vec, gsl_vector *vec)
-{
- igzstream infile (file_vec.c_str(), igzstream::in);
+void ReadFile_vector(const string &file_vec, gsl_vector *vec) {
+ igzstream infile(file_vec.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open vector file: "<<file_vec<<endl;
+ cout << "error! fail to open vector file: " << file_vec << endl;
return;
}
string line;
char *ch_ptr;
- for (size_t i=0; i<vec->size; i++) {
+ for (size_t i = 0; i < vec->size; i++) {
!safeGetline(infile, line).eof();
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
gsl_vector_set(vec, i, atof(ch_ptr));
}
@@ -4114,22 +4452,22 @@ void ReadFile_vector (const string &file_vec, gsl_vector *vec)
return;
}
-void ReadFile_matrix (const string &file_mat, gsl_matrix *mat) {
- igzstream infile (file_mat.c_str(), igzstream::in);
+void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) {
+ igzstream infile(file_mat.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open matrix file: "<<file_mat<<endl;
+ cout << "error! fail to open matrix file: " << file_mat << endl;
return;
}
string line;
char *ch_ptr;
- for (size_t i=0; i<mat->size1; i++) {
+ for (size_t i = 0; i < mat->size1; i++) {
!safeGetline(infile, line).eof();
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- for (size_t j=0; j<mat->size2; j++) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ for (size_t j = 0; j < mat->size2; j++) {
gsl_matrix_set(mat, i, j, atof(ch_ptr));
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
}
@@ -4139,32 +4477,32 @@ void ReadFile_matrix (const string &file_mat, gsl_matrix *mat) {
return;
}
-void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1,
- gsl_matrix *mat2) {
- igzstream infile (file_mat.c_str(), igzstream::in);
+void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1,
+ gsl_matrix *mat2) {
+ igzstream infile(file_mat.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open matrix file: "<<file_mat<<endl;
+ cout << "error! fail to open matrix file: " << file_mat << endl;
return;
}
string line;
char *ch_ptr;
- for (size_t i=0; i<mat1->size1; i++) {
+ for (size_t i = 0; i < mat1->size1; i++) {
!safeGetline(infile, line).eof();
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- for (size_t j=0; j<mat1->size2; j++) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ for (size_t j = 0; j < mat1->size2; j++) {
gsl_matrix_set(mat1, i, j, atof(ch_ptr));
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
}
- for (size_t i=0; i<mat2->size1; i++) {
+ for (size_t i = 0; i < mat2->size1; i++) {
!safeGetline(infile, line).eof();
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- for (size_t j=0; j<mat2->size2; j++) {
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ for (size_t j = 0; j < mat2->size2; j++) {
gsl_matrix_set(mat2, i, j, atof(ch_ptr));
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
}
@@ -4175,24 +4513,24 @@ void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1,
}
// Read study file.
-void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat,
- gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
- string Vqfile=file_study+".Vq.txt";
- string sfile=file_study+".size.txt";
- string qfile=file_study+".q.txt";
+void ReadFile_study(const string &file_study, gsl_matrix *Vq_mat,
+ gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
+ string Vqfile = file_study + ".Vq.txt";
+ string sfile = file_study + ".size.txt";
+ string qfile = file_study + ".q.txt";
- gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
+ gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
ReadFile_matrix(Vqfile, Vq_mat);
ReadFile_vector(sfile, s);
ReadFile_vector(qfile, q_vec);
double d;
- for (size_t i=0; i<s_vec->size; i++) {
- d=gsl_vector_get (s, i);
- gsl_vector_set (s_vec, i, d);
+ for (size_t i = 0; i < s_vec->size; i++) {
+ d = gsl_vector_get(s, i);
+ gsl_vector_set(s_vec, i, d);
}
- ni=gsl_vector_get (s, s_vec->size);
+ ni = gsl_vector_get(s, s_vec->size);
gsl_vector_free(s);
@@ -4200,22 +4538,22 @@ void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat,
}
// Read reference file.
-void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat,
- gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
- string sfile=file_ref+".size.txt";
- string Sfile=file_ref+".S.txt";
+void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat,
+ gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
+ string sfile = file_ref + ".size.txt";
+ string Sfile = file_ref + ".S.txt";
- gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
+ gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
ReadFile_vector(sfile, s);
ReadFile_matrix(Sfile, S_mat, Svar_mat);
double d;
- for (size_t i=0; i<s_vec->size; i++) {
- d=gsl_vector_get (s, i);
- gsl_vector_set (s_vec, i, d);
+ for (size_t i = 0; i < s_vec->size; i++) {
+ d = gsl_vector_get(s, i);
+ gsl_vector_set(s_vec, i, d);
}
- ni=gsl_vector_get (s, s_vec->size);
+ ni = gsl_vector_get(s, s_vec->size);
gsl_vector_free(s);
@@ -4223,20 +4561,20 @@ void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat,
}
// Read mstudy file.
-void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat,
- gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
+void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq_mat,
+ gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) {
gsl_matrix_set_zero(Vq_mat);
gsl_vector_set_zero(q_vec);
gsl_vector_set_zero(s_vec);
- ni=0;
+ ni = 0;
- gsl_matrix *Vq_sub=gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2);
- gsl_vector *q_sub=gsl_vector_alloc(q_vec->size);
- gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
+ gsl_matrix *Vq_sub = gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2);
+ gsl_vector *q_sub = gsl_vector_alloc(q_vec->size);
+ gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
- igzstream infile (file_mstudy.c_str(), igzstream::in);
+ igzstream infile(file_mstudy.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open mstudy file: "<<file_mstudy<<endl;
+ cout << "error! fail to open mstudy file: " << file_mstudy << endl;
return;
}
@@ -4244,51 +4582,64 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat,
double d1, d2, d;
while (!safeGetline(infile, file_name).eof()) {
- string Vqfile=file_name+".Vq.txt";
- string sfile=file_name+".size.txt";
- string qfile=file_name+".q.txt";
+ string Vqfile = file_name + ".Vq.txt";
+ string sfile = file_name + ".size.txt";
+ string qfile = file_name + ".q.txt";
ReadFile_matrix(Vqfile, Vq_sub);
ReadFile_vector(sfile, s);
ReadFile_vector(qfile, q_sub);
- ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size));
+ ni = max(ni, (size_t)gsl_vector_get(s, s_vec->size));
- for (size_t i=0; i<s_vec->size; i++) {
- d1=gsl_vector_get (s, i);
- if (d1==0) {continue;}
+ for (size_t i = 0; i < s_vec->size; i++) {
+ d1 = gsl_vector_get(s, i);
+ if (d1 == 0) {
+ continue;
+ }
- d=gsl_vector_get(q_vec, i)+gsl_vector_get(q_sub, i)*d1;
+ d = gsl_vector_get(q_vec, i) + gsl_vector_get(q_sub, i) * d1;
gsl_vector_set(q_vec, i, d);
- d=gsl_vector_get(s_vec, i)+d1;
+ d = gsl_vector_get(s_vec, i) + d1;
gsl_vector_set(s_vec, i, d);
- for (size_t j=i; j<s_vec->size; j++) {
- d2=gsl_vector_get (s, j);
- if (d2==0) {continue;}
+ for (size_t j = i; j < s_vec->size; j++) {
+ d2 = gsl_vector_get(s, j);
+ if (d2 == 0) {
+ continue;
+ }
- d=gsl_matrix_get(Vq_mat, i, j)+gsl_matrix_get(Vq_sub, i, j)*d1*d2;
- gsl_matrix_set(Vq_mat, i, j, d);
- if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);}
+ d = gsl_matrix_get(Vq_mat, i, j) +
+ gsl_matrix_get(Vq_sub, i, j) * d1 * d2;
+ gsl_matrix_set(Vq_mat, i, j, d);
+ if (i != j) {
+ gsl_matrix_set(Vq_mat, j, i, d);
+ }
}
}
}
- for (size_t i=0; i<s_vec->size; i++) {
- d1=gsl_vector_get (s_vec, i);
- if (d1==0) {continue;}
+ for (size_t i = 0; i < s_vec->size; i++) {
+ d1 = gsl_vector_get(s_vec, i);
+ if (d1 == 0) {
+ continue;
+ }
- d=gsl_vector_get (q_vec, i);
- gsl_vector_set (q_vec, i, d/d1);
+ d = gsl_vector_get(q_vec, i);
+ gsl_vector_set(q_vec, i, d / d1);
- for (size_t j=i; j<s_vec->size; j++) {
- d2=gsl_vector_get (s_vec, j);
- if (d2==0) {continue;}
+ for (size_t j = i; j < s_vec->size; j++) {
+ d2 = gsl_vector_get(s_vec, j);
+ if (d2 == 0) {
+ continue;
+ }
- d=gsl_matrix_get (Vq_mat, i, j)/(d1*d2);
- gsl_matrix_set (Vq_mat, i, j, d);
- if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);}
+ d = gsl_matrix_get(Vq_mat, i, j) / (d1 * d2);
+ gsl_matrix_set(Vq_mat, i, j, d);
+ if (i != j) {
+ gsl_matrix_set(Vq_mat, j, i, d);
+ }
}
}
@@ -4300,20 +4651,20 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat,
}
// Read reference file.
-void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat,
- gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
+void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat,
+ gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) {
gsl_matrix_set_zero(S_mat);
gsl_matrix_set_zero(Svar_mat);
gsl_vector_set_zero(s_vec);
- ni=0;
+ ni = 0;
- gsl_matrix *S_sub=gsl_matrix_alloc (S_mat->size1, S_mat->size2);
- gsl_matrix *Svar_sub=gsl_matrix_alloc (Svar_mat->size1, Svar_mat->size2);
- gsl_vector *s=gsl_vector_alloc (s_vec->size+1);
+ gsl_matrix *S_sub = gsl_matrix_alloc(S_mat->size1, S_mat->size2);
+ gsl_matrix *Svar_sub = gsl_matrix_alloc(Svar_mat->size1, Svar_mat->size2);
+ gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
- igzstream infile (file_mref.c_str(), igzstream::in);
+ igzstream infile(file_mref.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open mref file: "<<file_mref<<endl;
+ cout << "error! fail to open mref file: " << file_mref << endl;
return;
}
@@ -4321,51 +4672,59 @@ void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat,
double d1, d2, d;
while (!safeGetline(infile, file_name).eof()) {
- string sfile=file_name+".size.txt";
- string Sfile=file_name+".S.txt";
+ string sfile = file_name + ".size.txt";
+ string Sfile = file_name + ".S.txt";
ReadFile_vector(sfile, s);
ReadFile_matrix(Sfile, S_sub, Svar_sub);
// Update s_vec and ni.
- for (size_t i=0; i<s_vec->size; i++) {
- d=gsl_vector_get (s, i)+gsl_vector_get (s_vec, i);
- gsl_vector_set (s_vec, i, d);
+ for (size_t i = 0; i < s_vec->size; i++) {
+ d = gsl_vector_get(s, i) + gsl_vector_get(s_vec, i);
+ gsl_vector_set(s_vec, i, d);
}
- ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size));
+ ni = max(ni, (size_t)gsl_vector_get(s, s_vec->size));
// Update S and Svar from each file.
- for (size_t i=0; i<S_mat->size1; i++) {
- d1=gsl_vector_get(s, i);
- for (size_t j=0; j<S_mat->size2; j++) {
- d2=gsl_vector_get(s, j);
-
- d=gsl_matrix_get(S_sub, i, j)*d1*d2;
- gsl_matrix_set(S_sub, i, j, d);
- d=gsl_matrix_get(Svar_sub, i, j)*d1*d2*d1*d2;
- gsl_matrix_set(Svar_sub, i, j, d);
+ for (size_t i = 0; i < S_mat->size1; i++) {
+ d1 = gsl_vector_get(s, i);
+ for (size_t j = 0; j < S_mat->size2; j++) {
+ d2 = gsl_vector_get(s, j);
+
+ d = gsl_matrix_get(S_sub, i, j) * d1 * d2;
+ gsl_matrix_set(S_sub, i, j, d);
+ d = gsl_matrix_get(Svar_sub, i, j) * d1 * d2 * d1 * d2;
+ gsl_matrix_set(Svar_sub, i, j, d);
}
}
- gsl_matrix_add (S_mat, S_sub);
- gsl_matrix_add (Svar_mat, Svar_sub);
+ gsl_matrix_add(S_mat, S_sub);
+ gsl_matrix_add(Svar_mat, Svar_sub);
}
// Final: update S and Svar.
- for (size_t i=0; i<S_mat->size1; i++) {
- d1=gsl_vector_get(s_vec, i);
- if (d1==0) {continue;}
- for (size_t j=i; j<S_mat->size2; j++) {
- d2=gsl_vector_get(s_vec, j);
- if (d2==0) {continue;}
-
- d=gsl_matrix_get(S_mat, i, j)/(d1*d2);
+ for (size_t i = 0; i < S_mat->size1; i++) {
+ d1 = gsl_vector_get(s_vec, i);
+ if (d1 == 0) {
+ continue;
+ }
+ for (size_t j = i; j < S_mat->size2; j++) {
+ d2 = gsl_vector_get(s_vec, j);
+ if (d2 == 0) {
+ continue;
+ }
+
+ d = gsl_matrix_get(S_mat, i, j) / (d1 * d2);
gsl_matrix_set(S_mat, i, j, d);
- if (i!=j) {gsl_matrix_set(S_mat, j, i, d);}
+ if (i != j) {
+ gsl_matrix_set(S_mat, j, i, d);
+ }
- d=gsl_matrix_get(Svar_mat, i, j)/(d1*d2*d1*d2);
+ d = gsl_matrix_get(Svar_mat, i, j) / (d1 * d2 * d1 * d2);
gsl_matrix_set(Svar_mat, i, j, d);
- if (i!=j) {gsl_matrix_set(Svar_mat, j, i, d);}
+ if (i != j) {
+ gsl_matrix_set(Svar_mat, j, i, d);
+ }
}
}
diff --git a/src/io.h b/src/io.h
index 9d6f8cc..3e1145a 100644
--- a/src/io.h
+++ b/src/io.h
@@ -19,195 +19,172 @@
#ifndef __IO_H__
#define __IO_H__
-#include <vector>
-#include <map>
-#include <algorithm>
-#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
+#include <algorithm>
+#include <map>
+#include <vector>
#include "gzstream.h"
#include "param.h"
using namespace std;
-void ProgressBar (string str, double p, double total);
-void ProgressBar (string str, double p, double total, double ratio);
-std::istream& safeGetline(std::istream& is, std::string& t);
-
-bool ReadFile_snps (const string &file_snps, set<string> &setSnps);
-bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps);
-bool ReadFile_log (const string &file_log, double &pheno_mean);
-
-bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo);
-bool ReadFile_fam (const string &file_fam,
- vector<vector<int> > &indicator_pheno,
- vector<vector<double> > &pheno,
- map<string, int> &mapID2num,
- const vector<size_t> &p_column);
-
-bool ReadFile_cvt (const string &file_cvt,
- vector<int> &indicator_cvt,
- vector<vector<double> > &cvt,
- size_t &n_cvt);
-bool ReadFile_anno (const string &file_bim, map<string, string> &mapRS2chr,
- map<string, long int> &mapRS2bp,
- map<string, double> &mapRS2cM);
-bool ReadFile_pheno (const string &file_pheno,
- vector<vector<int> > &indicator_pheno,
- vector<vector<double> > &pheno,
- const vector<size_t> &p_column);
-bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv,
- vector<double> &pheno, const int &p_column);
-
-bool ReadFile_geno (const string &file_geno, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, const double &maf_level,
- const double &miss_level, const double &hwe_level,
- const double &r2_level, map<string, string> &mapRS2chr,
- map<string, long int> &mapRS2bp,
- map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
- size_t &ns_test);
-bool ReadFile_bed (const string &file_bed, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
- const double &maf_level, const double &miss_level,
- const double &hwe_level, const double &r2_level,
- size_t &ns_test);
-bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv,
- igzstream &infile, gsl_vector *geno,
- double &geno_mean);
-void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv,
- ifstream &infile, gsl_vector *geno, double &geno_mean);
-
-void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv,
- map<string, int> &mapID2num, const size_t k_mode,
- bool &error, gsl_matrix *G);
-void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv,
- map<string, int> &mapID2num, const size_t k_mode,
- bool &error, gsl_matrix *G);
-void ReadFile_eigenU (const string &file_u, bool &error, gsl_matrix *U);
-void ReadFile_eigenD (const string &file_d, bool &error, gsl_vector *eval);
-
-bool BimbamKin (const string &file_geno, vector<int> &indicator_snp,
- const int k_mode, const int display_pace,
- gsl_matrix *matrix_kin);
-bool PlinkKin (const string &file_bed, vector<int> &indicator_snp,
- const int k_mode, const int display_pace,
- gsl_matrix *matrix_kin);
-
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
- vector<int> &indicator_snp, gsl_matrix *UtX,
- gsl_matrix *K, const bool calc_K);
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
- vector<int> &indicator_snp, gsl_matrix *UtX,
- gsl_matrix *K, const bool calc_K);
-bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv,
- vector<int> &indicator_snp,
- vector<vector<unsigned char> > &Xt, gsl_matrix *K,
- const bool calc_K, const size_t ni_test,
- const size_t ns_test);
-bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv,
- vector<int> &indicator_snp,
- vector<vector<unsigned char> > &Xt, gsl_matrix *K,
- const bool calc_K, const size_t ni_test,
- const size_t ns_test);
-
-bool ReadFile_est (const string &file_est, const vector<size_t> &est_column,
- map<string, double> &mapRS2est);
-
-bool CountFileLines (const string &file_input, size_t &n_lines);
-
-bool ReadFile_gene (const string &file_gene, vector<double> &vec_read,
- vector<SNPINFO> &snpInfo, size_t &ng_total);
-
-bool ReadHeader_io (const string &line, HEADER &header);
-bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat,
- size_t &n_vc);
-bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat,
- size_t &n_vc);
-
-bool ReadFile_catc (const string &file_cat,
- map<string, vector<double> > &mapRS2catc,
- size_t &n_cat);
-bool ReadFile_mcatc (const string &file_mcat, map<string,
- vector<double> > &mapRS2catc, size_t &n_cat);
-
-bool BimbamKin (const string &file_geno, const int display_pace,
- const vector<int> &indicator_idv,
- const vector<int> &indicator_snp,
- const map<string, double> &mapRS2weight,
- const map<string, size_t> &mapRS2cat,
- const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
- gsl_matrix *matrix_kin, gsl_vector *vector_ns);
-bool PlinkKin (const string &file_bed, const int display_pace,
- const vector<int> &indicator_idv,
- const vector<int> &indicator_snp,
- const map<string, double> &mapRS2weight,
- const map<string, size_t> &mapRS2cat,
- const vector<SNPINFO> &snpInfo,
- const gsl_matrix *W, gsl_matrix *matrix_kin,
- gsl_vector *vector_ns);
-bool MFILEKin (const size_t mfile_mode, const string &file_mfile,
- const int display_pace, const vector<int> &indicator_idv,
- const vector<vector<int> > &mindicator_snp,
- const map<string, double> &mapRS2weight,
- const map<string, size_t> &mapRS2cat,
- const vector<vector<SNPINFO> > &msnpInfo,
- const gsl_matrix *W, gsl_matrix *matrix_kin,
- gsl_vector *vector_ns);
-
-bool ReadFile_wsnp (const string &file_wsnp,
- map<string, double> &mapRS2double);
-bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc,
- map<string, vector<double> > &mapRS2vector);
-
-void ReadFile_beta (const string &file_beta,
- const map<string, size_t> &mapRS2cat,
- const map<string, double> &mapRS2wA,
- vector<size_t> &vec_cat, vector<size_t> &vec_ni,
- vector<double> &vec_weight, vector<double> &vec_z2,
- size_t &ni_total, size_t &ns_total, size_t &ns_test);
-void ReadFile_beta (const string &file_beta,
- const map<string, double> &mapRS2wA,
- map<string, string> &mapRS2A1,
- map<string, double> &mapRS2z);
-void Calcq (const size_t n_block, const vector<size_t> &vec_cat,
- const vector<size_t> &vec_ni,
- const vector<double> &vec_weight, const vector<double> &vec_z2,
- gsl_matrix *Vq, gsl_vector *q, gsl_vector *s);
-
-void ReadFile_study (const string &file_study, gsl_matrix *Vq,
- gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni);
-void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat,
- gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
-void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq,
- gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni);
-void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat,
- gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
+void ProgressBar(string str, double p, double total);
+void ProgressBar(string str, double p, double total, double ratio);
+std::istream &safeGetline(std::istream &is, std::string &t);
+
+bool ReadFile_snps(const string &file_snps, set<string> &setSnps);
+bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps);
+bool ReadFile_log(const string &file_log, double &pheno_mean);
+
+bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo);
+bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
+ vector<vector<double>> &pheno, map<string, int> &mapID2num,
+ const vector<size_t> &p_column);
+
+bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt,
+ vector<vector<double>> &cvt, size_t &n_cvt);
+bool ReadFile_anno(const string &file_bim, map<string, string> &mapRS2chr,
+ map<string, long int> &mapRS2bp,
+ map<string, double> &mapRS2cM);
+bool ReadFile_pheno(const string &file_pheno,
+ vector<vector<int>> &indicator_pheno,
+ vector<vector<double>> &pheno,
+ const vector<size_t> &p_column);
+bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
+ vector<double> &pheno, const int &p_column);
+
+bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, const double &maf_level,
+ const double &miss_level, const double &hwe_level,
+ const double &r2_level, map<string, string> &mapRS2chr,
+ map<string, long int> &mapRS2bp,
+ map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
+ size_t &ns_test);
+bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+ const double &maf_level, const double &miss_level,
+ const double &hwe_level, const double &r2_level,
+ size_t &ns_test);
+bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
+ igzstream &infile, gsl_vector *geno, double &geno_mean);
+void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv,
+ ifstream &infile, gsl_vector *geno, double &geno_mean);
+
+void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
+ map<string, int> &mapID2num, const size_t k_mode, bool &error,
+ gsl_matrix *G);
+void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
+ map<string, int> &mapID2num, const size_t k_mode, bool &error,
+ gsl_matrix *G);
+void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U);
+void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval);
+
+bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace,
+ gsl_matrix *matrix_kin);
+bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
+
+bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
+ const bool calc_K);
+bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
+ const bool calc_K);
+bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+ vector<int> &indicator_snp,
+ vector<vector<unsigned char>> &Xt, gsl_matrix *K,
+ const bool calc_K, const size_t ni_test,
+ const size_t ns_test);
+bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
+ gsl_matrix *K, const bool calc_K, const size_t ni_test,
+ const size_t ns_test);
+
+bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
+ map<string, double> &mapRS2est);
+
+bool CountFileLines(const string &file_input, size_t &n_lines);
+
+bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
+ vector<SNPINFO> &snpInfo, size_t &ng_total);
+
+bool ReadHeader_io(const string &line, HEADER &header);
+bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
+ size_t &n_vc);
+bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat,
+ size_t &n_vc);
+
+bool ReadFile_catc(const string &file_cat,
+ map<string, vector<double>> &mapRS2catc, size_t &n_cat);
+bool ReadFile_mcatc(const string &file_mcat,
+ map<string, vector<double>> &mapRS2catc, size_t &n_cat);
+
+bool BimbamKin(const string &file_geno, const int display_pace,
+ const vector<int> &indicator_idv,
+ const vector<int> &indicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+ gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+bool PlinkKin(const string &file_bed, const int display_pace,
+ const vector<int> &indicator_idv,
+ const vector<int> &indicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+ gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
+ const int display_pace, const vector<int> &indicator_idv,
+ const vector<vector<int>> &mindicator_snp,
+ const map<string, double> &mapRS2weight,
+ const map<string, size_t> &mapRS2cat,
+ const vector<vector<SNPINFO>> &msnpInfo, const gsl_matrix *W,
+ gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+
+bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2double);
+bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc,
+ map<string, vector<double>> &mapRS2vector);
+
+void ReadFile_beta(const string &file_beta,
+ const map<string, size_t> &mapRS2cat,
+ const map<string, double> &mapRS2wA, vector<size_t> &vec_cat,
+ vector<size_t> &vec_ni, vector<double> &vec_weight,
+ vector<double> &vec_z2, size_t &ni_total, size_t &ns_total,
+ size_t &ns_test);
+void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
+ map<string, string> &mapRS2A1, map<string, double> &mapRS2z);
+void Calcq(const size_t n_block, const vector<size_t> &vec_cat,
+ const vector<size_t> &vec_ni, const vector<double> &vec_weight,
+ const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q,
+ gsl_vector *s);
+
+void ReadFile_study(const string &file_study, gsl_matrix *Vq, gsl_vector *q_vec,
+ gsl_vector *s_vec, size_t &ni);
+void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat,
+ gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
+void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq,
+ gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni);
+void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat,
+ gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
// WJA added.
-bool bgenKin (const string &file_geno, vector<int> &indicator_snp,
- const int k_mode, const int display_pace,
- gsl_matrix *matrix_kin);
+bool bgenKin(const string &file_geno, vector<int> &indicator_snp,
+ const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
- const double &maf_level, const double &miss_level,
- const double &hwe_level, const double &r2_level,
- size_t &ns_test);
+ const gsl_matrix *W, vector<int> &indicator_idv,
+ vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
+ const double &maf_level, const double &miss_level,
+ const double &hwe_level, const double &r2_level,
+ size_t &ns_test);
bool ReadFile_sample(const string &file_sample,
- vector<vector<int> > &indicator_pheno,
- vector<vector<double> > &pheno,
- const vector<size_t> &p_column,
- vector<int> &indicator_cvt,
- vector<vector<double> > &cvt,
- size_t &n_cvt);
+ vector<vector<int>> &indicator_pheno,
+ vector<vector<double>> &pheno,
+ const vector<size_t> &p_column, vector<int> &indicator_cvt,
+ vector<vector<double>> &cvt, size_t &n_cvt);
#endif
-
-
-
-
-
-
-
diff --git a/src/lapack.cpp b/src/lapack.cpp
index 05b85f4..8f6e8ff 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -16,614 +16,612 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
#include <cmath>
+#include <iostream>
#include <vector>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
using namespace std;
extern "C" void sgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K,
- float *ALPHA, float *A, int *LDA, float *B, int *LDB,
- float *BETA, float *C, int *LDC);
+ float *ALPHA, float *A, int *LDA, float *B, int *LDB,
+ float *BETA, float *C, int *LDC);
extern "C" void spotrf_(char *UPLO, int *N, float *A, int *LDA, int *INFO);
extern "C" void spotrs_(char *UPLO, int *N, int *NRHS, float *A, int *LDA,
- float *B, int *LDB, int *INFO);
-extern "C" void ssyev_(char* JOBZ, char* UPLO, int *N, float *A, int *LDA,
- float *W, float *WORK, int *LWORK, int *INFO);
-extern "C" void ssyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N,
- float *A, int *LDA, float *VL, float *VU, int *IL,
- int *IU, float *ABSTOL, int *M, float *W, float *Z,
- int *LDZ, int *ISUPPZ, float *WORK, int *LWORK,
- int *IWORK, int *LIWORK, int *INFO);
+ float *B, int *LDB, int *INFO);
+extern "C" void ssyev_(char *JOBZ, char *UPLO, int *N, float *A, int *LDA,
+ float *W, float *WORK, int *LWORK, int *INFO);
+extern "C" void ssyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, float *A,
+ int *LDA, float *VL, float *VU, int *IL, int *IU,
+ float *ABSTOL, int *M, float *W, float *Z, int *LDZ,
+ int *ISUPPZ, float *WORK, int *LWORK, int *IWORK,
+ int *LIWORK, int *INFO);
extern "C" double sdot_(int *N, float *DX, int *INCX, float *DY, int *INCY);
extern "C" void dgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K,
- double *ALPHA, double *A, int *LDA, double *B,
- int *LDB, double *BETA, double *C, int *LDC);
+ double *ALPHA, double *A, int *LDA, double *B, int *LDB,
+ double *BETA, double *C, int *LDC);
extern "C" void dpotrf_(char *UPLO, int *N, double *A, int *LDA, int *INFO);
extern "C" void dpotrs_(char *UPLO, int *N, int *NRHS, double *A, int *LDA,
- double *B, int *LDB, int *INFO);
-extern "C" void dsyev_(char* JOBZ, char* UPLO, int *N, double *A, int *LDA,
- double *W, double *WORK, int *LWORK, int *INFO);
-extern "C" void dsyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N,
- double *A, int *LDA, double *VL, double *VU,
- int *IL, int *IU, double *ABSTOL, int *M,
- double *W, double *Z, int *LDZ, int *ISUPPZ,
- double *WORK, int *LWORK, int *IWORK,
- int *LIWORK, int *INFO);
+ double *B, int *LDB, int *INFO);
+extern "C" void dsyev_(char *JOBZ, char *UPLO, int *N, double *A, int *LDA,
+ double *W, double *WORK, int *LWORK, int *INFO);
+extern "C" void dsyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, double *A,
+ int *LDA, double *VL, double *VU, int *IL, int *IU,
+ double *ABSTOL, int *M, double *W, double *Z, int *LDZ,
+ int *ISUPPZ, double *WORK, int *LWORK, int *IWORK,
+ int *LIWORK, int *INFO);
extern "C" double ddot_(int *N, double *DX, int *INCX, double *DY, int *INCY);
// Cholesky decomposition, A is destroyed.
-void lapack_float_cholesky_decomp (gsl_matrix_float *A) {
- int N=A->size1, LDA=A->size1, INFO;
- char UPLO='L';
-
- if (N!=(int)A->size2) {
- cout << "Matrix needs to be symmetric and same dimension in " <<
- "lapack_cholesky_decomp." << endl;
- return;
- }
-
- spotrf_(&UPLO, &N, A->data, &LDA, &INFO);
- if (INFO!=0) {
- cout << "Cholesky decomposition unsuccessful in " <<
- "lapack_cholesky_decomp." << endl;
- return;
- }
-
- return;
+void lapack_float_cholesky_decomp(gsl_matrix_float *A) {
+ int N = A->size1, LDA = A->size1, INFO;
+ char UPLO = 'L';
+
+ if (N != (int)A->size2) {
+ cout << "Matrix needs to be symmetric and same dimension in "
+ << "lapack_cholesky_decomp." << endl;
+ return;
+ }
+
+ spotrf_(&UPLO, &N, A->data, &LDA, &INFO);
+ if (INFO != 0) {
+ cout << "Cholesky decomposition unsuccessful in "
+ << "lapack_cholesky_decomp." << endl;
+ return;
+ }
+
+ return;
}
// Cholesky decomposition, A is destroyed.
-void lapack_cholesky_decomp (gsl_matrix *A) {
- int N=A->size1, LDA=A->size1, INFO;
- char UPLO='L';
-
- if (N!=(int)A->size2) {
- cout << "Matrix needs to be symmetric and same dimension in " <<
- "lapack_cholesky_decomp." << endl;
- return;
- }
-
- dpotrf_(&UPLO, &N, A->data, &LDA, &INFO);
- if (INFO!=0) {
- cout << "Cholesky decomposition unsuccessful in " <<
- "lapack_cholesky_decomp."<<endl;
- return;
- }
-
- return;
+void lapack_cholesky_decomp(gsl_matrix *A) {
+ int N = A->size1, LDA = A->size1, INFO;
+ char UPLO = 'L';
+
+ if (N != (int)A->size2) {
+ cout << "Matrix needs to be symmetric and same dimension in "
+ << "lapack_cholesky_decomp." << endl;
+ return;
+ }
+
+ dpotrf_(&UPLO, &N, A->data, &LDA, &INFO);
+ if (INFO != 0) {
+ cout << "Cholesky decomposition unsuccessful in "
+ << "lapack_cholesky_decomp." << endl;
+ return;
+ }
+
+ return;
}
// Cholesky solve, A is decomposed.
-void lapack_float_cholesky_solve (gsl_matrix_float *A,
- const gsl_vector_float *b,
- gsl_vector_float *x) {
- int N=A->size1, NRHS=1, LDA=A->size1, LDB=b->size, INFO;
- char UPLO='L';
-
-
- if (N!=(int)A->size2 || N!=LDB) {
- cout << "Matrix needs to be symmetric and same dimension in " <<
- "lapack_cholesky_solve." << endl;
- return;
- }
-
- gsl_vector_float_memcpy (x, b);
- spotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO);
- if (INFO!=0) {
- cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." <<
- endl;
- return;
- }
-
- return;
+void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b,
+ gsl_vector_float *x) {
+ int N = A->size1, NRHS = 1, LDA = A->size1, LDB = b->size, INFO;
+ char UPLO = 'L';
+
+ if (N != (int)A->size2 || N != LDB) {
+ cout << "Matrix needs to be symmetric and same dimension in "
+ << "lapack_cholesky_solve." << endl;
+ return;
+ }
+
+ gsl_vector_float_memcpy(x, b);
+ spotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO);
+ if (INFO != 0) {
+ cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << endl;
+ return;
+ }
+
+ return;
}
// Cholesky solve, A is decomposed.
-void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b,
- gsl_vector *x) {
- int N=A->size1, NRHS=1, LDA=A->size1, LDB=b->size, INFO;
- char UPLO='L';
-
- if (N!=(int)A->size2 || N!=LDB) {
- cout << "Matrix needs to be symmetric and same dimension in " <<
- "lapack_cholesky_solve." << endl;
- return;
- }
-
- gsl_vector_memcpy (x, b);
- dpotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO);
- if (INFO!=0) {
- cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." <<
- endl;
- return;
- }
-
- return;
-}
+void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x) {
+ int N = A->size1, NRHS = 1, LDA = A->size1, LDB = b->size, INFO;
+ char UPLO = 'L';
+
+ if (N != (int)A->size2 || N != LDB) {
+ cout << "Matrix needs to be symmetric and same dimension in "
+ << "lapack_cholesky_solve." << endl;
+ return;
+ }
+
+ gsl_vector_memcpy(x, b);
+ dpotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO);
+ if (INFO != 0) {
+ cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << endl;
+ return;
+ }
-void lapack_sgemm (char *TransA, char *TransB, float alpha,
- const gsl_matrix_float *A, const gsl_matrix_float *B,
- float beta, gsl_matrix_float *C) {
- int M, N, K1, K2, LDA=A->size1, LDB=B->size1, LDC=C->size2;
-
- if (*TransA=='N' || *TransA=='n') {M=A->size1; K1=A->size2;}
- else if (*TransA=='T' || *TransA=='t') {M=A->size2; K1=A->size1;}
- else {cout<<"need 'N' or 'T' in lapack_sgemm"<<endl; return;}
-
- if (*TransB=='N' || *TransB=='n') {N=B->size2; K2=B->size1;}
- else if (*TransB=='T' || *TransB=='t') {N=B->size1; K2=B->size2;}
- else {cout<<"need 'N' or 'T' in lapack_sgemm"<<endl; return;}
-
- if (K1!=K2) {
- cout<<"A and B not compatible in lapack_sgemm"<<endl;
- return;
- }
- if (C->size1!=(size_t)M || C->size2!=(size_t)N) {
- cout<<"C not compatible in lapack_sgemm"<<endl;
- return;
- }
-
- gsl_matrix_float *A_t=gsl_matrix_float_alloc (A->size2, A->size1);
- gsl_matrix_float_transpose_memcpy (A_t, A);
- gsl_matrix_float *B_t=gsl_matrix_float_alloc (B->size2, B->size1);
- gsl_matrix_float_transpose_memcpy (B_t, B);
- gsl_matrix_float *C_t=gsl_matrix_float_alloc (C->size2, C->size1);
- gsl_matrix_float_transpose_memcpy (C_t, C);
-
- sgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA,
- B_t->data, &LDB, &beta, C_t->data, &LDC);
- gsl_matrix_float_transpose_memcpy (C, C_t);
-
- gsl_matrix_float_free (A_t);
- gsl_matrix_float_free (B_t);
- gsl_matrix_float_free (C_t);
- return;
+ return;
}
+void lapack_sgemm(char *TransA, char *TransB, float alpha,
+ const gsl_matrix_float *A, const gsl_matrix_float *B,
+ float beta, gsl_matrix_float *C) {
+ int M, N, K1, K2, LDA = A->size1, LDB = B->size1, LDC = C->size2;
+
+ if (*TransA == 'N' || *TransA == 'n') {
+ M = A->size1;
+ K1 = A->size2;
+ } else if (*TransA == 'T' || *TransA == 't') {
+ M = A->size2;
+ K1 = A->size1;
+ } else {
+ cout << "need 'N' or 'T' in lapack_sgemm" << endl;
+ return;
+ }
+ if (*TransB == 'N' || *TransB == 'n') {
+ N = B->size2;
+ K2 = B->size1;
+ } else if (*TransB == 'T' || *TransB == 't') {
+ N = B->size1;
+ K2 = B->size2;
+ } else {
+ cout << "need 'N' or 'T' in lapack_sgemm" << endl;
+ return;
+ }
-void lapack_dgemm (char *TransA, char *TransB, double alpha,
- const gsl_matrix *A, const gsl_matrix *B,
- double beta, gsl_matrix *C) {
- int M, N, K1, K2, LDA=A->size1, LDB=B->size1, LDC=C->size2;
+ if (K1 != K2) {
+ cout << "A and B not compatible in lapack_sgemm" << endl;
+ return;
+ }
+ if (C->size1 != (size_t)M || C->size2 != (size_t)N) {
+ cout << "C not compatible in lapack_sgemm" << endl;
+ return;
+ }
- if (*TransA=='N' || *TransA=='n') {M=A->size1; K1=A->size2;}
- else if (*TransA=='T' || *TransA=='t') {M=A->size2; K1=A->size1;}
- else {cout<<"need 'N' or 'T' in lapack_dgemm"<<endl; return;}
+ gsl_matrix_float *A_t = gsl_matrix_float_alloc(A->size2, A->size1);
+ gsl_matrix_float_transpose_memcpy(A_t, A);
+ gsl_matrix_float *B_t = gsl_matrix_float_alloc(B->size2, B->size1);
+ gsl_matrix_float_transpose_memcpy(B_t, B);
+ gsl_matrix_float *C_t = gsl_matrix_float_alloc(C->size2, C->size1);
+ gsl_matrix_float_transpose_memcpy(C_t, C);
+
+ sgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB,
+ &beta, C_t->data, &LDC);
+ gsl_matrix_float_transpose_memcpy(C, C_t);
+
+ gsl_matrix_float_free(A_t);
+ gsl_matrix_float_free(B_t);
+ gsl_matrix_float_free(C_t);
+ return;
+}
- if (*TransB=='N' || *TransB=='n') {N=B->size2; K2=B->size1;}
- else if (*TransB=='T' || *TransB=='t') {N=B->size1; K2=B->size2;}
- else {cout<<"need 'N' or 'T' in lapack_dgemm"<<endl; return;}
+void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A,
+ const gsl_matrix *B, double beta, gsl_matrix *C) {
+ int M, N, K1, K2, LDA = A->size1, LDB = B->size1, LDC = C->size2;
+
+ if (*TransA == 'N' || *TransA == 'n') {
+ M = A->size1;
+ K1 = A->size2;
+ } else if (*TransA == 'T' || *TransA == 't') {
+ M = A->size2;
+ K1 = A->size1;
+ } else {
+ cout << "need 'N' or 'T' in lapack_dgemm" << endl;
+ return;
+ }
+
+ if (*TransB == 'N' || *TransB == 'n') {
+ N = B->size2;
+ K2 = B->size1;
+ } else if (*TransB == 'T' || *TransB == 't') {
+ N = B->size1;
+ K2 = B->size2;
+ } else {
+ cout << "need 'N' or 'T' in lapack_dgemm" << endl;
+ return;
+ }
- if (K1!=K2) {
- cout << "A and B not compatible in lapack_dgemm"<<endl;
- return;
- }
- if (C->size1!=(size_t)M || C->size2!=(size_t)N) {
- cout<<"C not compatible in lapack_dgemm"<<endl;
- return;
- }
+ if (K1 != K2) {
+ cout << "A and B not compatible in lapack_dgemm" << endl;
+ return;
+ }
+ if (C->size1 != (size_t)M || C->size2 != (size_t)N) {
+ cout << "C not compatible in lapack_dgemm" << endl;
+ return;
+ }
- gsl_matrix *A_t=gsl_matrix_alloc (A->size2, A->size1);
- gsl_matrix_transpose_memcpy (A_t, A);
- gsl_matrix *B_t=gsl_matrix_alloc (B->size2, B->size1);
- gsl_matrix_transpose_memcpy (B_t, B);
- gsl_matrix *C_t=gsl_matrix_alloc (C->size2, C->size1);
- gsl_matrix_transpose_memcpy (C_t, C);
+ gsl_matrix *A_t = gsl_matrix_alloc(A->size2, A->size1);
+ gsl_matrix_transpose_memcpy(A_t, A);
+ gsl_matrix *B_t = gsl_matrix_alloc(B->size2, B->size1);
+ gsl_matrix_transpose_memcpy(B_t, B);
+ gsl_matrix *C_t = gsl_matrix_alloc(C->size2, C->size1);
+ gsl_matrix_transpose_memcpy(C_t, C);
- dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA,
- B_t->data, &LDB, &beta, C_t->data, &LDC);
+ dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB,
+ &beta, C_t->data, &LDC);
- gsl_matrix_transpose_memcpy (C, C_t);
+ gsl_matrix_transpose_memcpy(C, C_t);
- gsl_matrix_free (A_t);
- gsl_matrix_free (B_t);
- gsl_matrix_free (C_t);
- return;
+ gsl_matrix_free(A_t);
+ gsl_matrix_free(B_t);
+ gsl_matrix_free(C_t);
+ return;
}
// Eigen value decomposition, matrix A is destroyed, float seems to
// have problem with large matrices (in mac).
-void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
- gsl_matrix_float *evec,
- const size_t flag_largematrix) {
- if (flag_largematrix==1) {
- int N=A->size1, LDA=A->size1, INFO, LWORK=-1;
- char JOBZ='V', UPLO='L';
-
- if (N!=(int)A->size2 || N!=(int)eval->size) {
- cout << "Matrix needs to be symmetric and same " <<
- "dimension in lapack_eigen_symmv."<<endl;
- return;
- }
-
- LWORK=3*N;
- float *WORK=new float [LWORK];
- ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK,
- &LWORK, &INFO);
- if (INFO!=0) {
- cout << "Eigen decomposition unsuccessful in " <<
- "lapack_eigen_symmv."<<endl;
- return;
- }
-
- gsl_matrix_float_view A_sub =
- gsl_matrix_float_submatrix(A, 0, 0, N, N);
- gsl_matrix_float_memcpy (evec, &A_sub.matrix);
- gsl_matrix_float_transpose (evec);
-
- delete [] WORK;
- } else {
- int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO,
- LWORK=-1, LIWORK=-1;
- char JOBZ='V', UPLO='L', RANGE='A';
- float ABSTOL=1.0E-7;
-
- // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'.
- float VL=0.0, VU=0.0;
- int IL=0, IU=0, M;
-
- if (N!=(int)A->size2 || N!=(int)eval->size) {
- cout << "Matrix needs to be symmetric and same " <<
- "dimension in lapack_float_eigen_symmv." << endl;
- return;
- }
-
- int *ISUPPZ=new int [2*N];
-
- float WORK_temp[1];
- int IWORK_temp[1];
- ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL,
- &VU, &IL, &IU, &ABSTOL, &M, eval->data,
- evec->data, &LDZ, ISUPPZ, WORK_temp, &LWORK,
- IWORK_temp, &LIWORK, &INFO);
- if (INFO!=0) {
- cout << "Work space estimate unsuccessful in " <<
- "lapack_float_eigen_symmv." << endl;
- return;
- }
- LWORK=(int)WORK_temp[0]; LIWORK=(int)IWORK_temp[0];
-
- float *WORK=new float [LWORK];
- int *IWORK=new int [LIWORK];
-
- ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL,
- &VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data,
- &LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO);
- if (INFO!=0) {
- cout << "Eigen decomposition unsuccessful in " <<
- "lapack_float_eigen_symmv." << endl;
- return;
- }
-
- gsl_matrix_float_transpose (evec);
-
- delete [] ISUPPZ;
- delete [] WORK;
- delete [] IWORK;
- }
-
-
- return;
-}
-
+void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval,
+ gsl_matrix_float *evec,
+ const size_t flag_largematrix) {
+ if (flag_largematrix == 1) {
+ int N = A->size1, LDA = A->size1, INFO, LWORK = -1;
+ char JOBZ = 'V', UPLO = 'L';
+
+ if (N != (int)A->size2 || N != (int)eval->size) {
+ cout << "Matrix needs to be symmetric and same "
+ << "dimension in lapack_eigen_symmv." << endl;
+ return;
+ }
+
+ LWORK = 3 * N;
+ float *WORK = new float[LWORK];
+ ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, &LWORK, &INFO);
+ if (INFO != 0) {
+ cout << "Eigen decomposition unsuccessful in "
+ << "lapack_eigen_symmv." << endl;
+ return;
+ }
+
+ gsl_matrix_float_view A_sub = gsl_matrix_float_submatrix(A, 0, 0, N, N);
+ gsl_matrix_float_memcpy(evec, &A_sub.matrix);
+ gsl_matrix_float_transpose(evec);
+
+ delete[] WORK;
+ } else {
+ int N = A->size1, LDA = A->size1, LDZ = A->size1, INFO, LWORK = -1,
+ LIWORK = -1;
+ char JOBZ = 'V', UPLO = 'L', RANGE = 'A';
+ float ABSTOL = 1.0E-7;
+
+ // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'.
+ float VL = 0.0, VU = 0.0;
+ int IL = 0, IU = 0, M;
+
+ if (N != (int)A->size2 || N != (int)eval->size) {
+ cout << "Matrix needs to be symmetric and same "
+ << "dimension in lapack_float_eigen_symmv." << endl;
+ return;
+ }
+
+ int *ISUPPZ = new int[2 * N];
+
+ float WORK_temp[1];
+ int IWORK_temp[1];
+ ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU,
+ &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK_temp,
+ &LWORK, IWORK_temp, &LIWORK, &INFO);
+ if (INFO != 0) {
+ cout << "Work space estimate unsuccessful in "
+ << "lapack_float_eigen_symmv." << endl;
+ return;
+ }
+ LWORK = (int)WORK_temp[0];
+ LIWORK = (int)IWORK_temp[0];
+
+ float *WORK = new float[LWORK];
+ int *IWORK = new int[LIWORK];
+
+ ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU,
+ &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK, &LWORK,
+ IWORK, &LIWORK, &INFO);
+ if (INFO != 0) {
+ cout << "Eigen decomposition unsuccessful in "
+ << "lapack_float_eigen_symmv." << endl;
+ return;
+ }
+
+ gsl_matrix_float_transpose(evec);
+
+ delete[] ISUPPZ;
+ delete[] WORK;
+ delete[] IWORK;
+ }
+ return;
+}
// Eigenvalue decomposition, matrix A is destroyed.
-void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
- const size_t flag_largematrix) {
- if (flag_largematrix==1) {
- int N=A->size1, LDA=A->size1, INFO, LWORK=-1;
- char JOBZ='V', UPLO='L';
-
- if (N!=(int)A->size2 || N!=(int)eval->size) {
- cout << "Matrix needs to be symmetric and same " <<
- "dimension in lapack_eigen_symmv." << endl;
- return;
- }
-
- LWORK=3*N;
- double *WORK=new double [LWORK];
- dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK,
- &LWORK, &INFO);
- if (INFO!=0) {
- cout<<"Eigen decomposition unsuccessful in " <<
- "lapack_eigen_symmv." << endl;
- return;
- }
-
- gsl_matrix_view A_sub=gsl_matrix_submatrix(A, 0, 0, N, N);
- gsl_matrix_memcpy (evec, &A_sub.matrix);
- gsl_matrix_transpose (evec);
-
- delete [] WORK;
- } else {
- int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO;
- int LWORK=-1, LIWORK=-1;
- char JOBZ='V', UPLO='L', RANGE='A';
- double ABSTOL=1.0E-7;
-
- // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'.
- double VL=0.0, VU=0.0;
- int IL=0, IU=0, M;
-
- if (N!=(int)A->size2 || N!=(int)eval->size) {
- cout << "Matrix needs to be symmetric and same " <<
- "dimension in lapack_eigen_symmv." << endl;
- return;
- }
-
- int *ISUPPZ=new int [2*N];
-
- double WORK_temp[1];
- int IWORK_temp[1];
-
- dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU,
- &IL, &IU, &ABSTOL, &M, eval->data, evec->data,
- &LDZ, ISUPPZ, WORK_temp, &LWORK, IWORK_temp,
- &LIWORK, &INFO);
- if (INFO!=0) {
- cout << "Work space estimate unsuccessful in " <<
- "lapack_eigen_symmv." << endl;
- return;
- }
- LWORK=(int)WORK_temp[0]; LIWORK=(int)IWORK_temp[0];
-
- double *WORK=new double [LWORK];
- int *IWORK=new int [LIWORK];
-
- dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU,
- &IL, &IU, &ABSTOL, &M, eval->data, evec->data,
- &LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO);
- if (INFO!=0) {
- cout << "Eigen decomposition unsuccessful in " <<
- "lapack_eigen_symmv." << endl;
- return;
- }
-
- gsl_matrix_transpose (evec);
-
- delete [] ISUPPZ;
- delete [] WORK;
- delete [] IWORK;
- }
-
- return;
+void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
+ const size_t flag_largematrix) {
+ if (flag_largematrix == 1) {
+ int N = A->size1, LDA = A->size1, INFO, LWORK = -1;
+ char JOBZ = 'V', UPLO = 'L';
+
+ if (N != (int)A->size2 || N != (int)eval->size) {
+ cout << "Matrix needs to be symmetric and same "
+ << "dimension in lapack_eigen_symmv." << endl;
+ return;
+ }
+
+ LWORK = 3 * N;
+ double *WORK = new double[LWORK];
+ dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, &LWORK, &INFO);
+ if (INFO != 0) {
+ cout << "Eigen decomposition unsuccessful in "
+ << "lapack_eigen_symmv." << endl;
+ return;
+ }
+
+ gsl_matrix_view A_sub = gsl_matrix_submatrix(A, 0, 0, N, N);
+ gsl_matrix_memcpy(evec, &A_sub.matrix);
+ gsl_matrix_transpose(evec);
+
+ delete[] WORK;
+ } else {
+ int N = A->size1, LDA = A->size1, LDZ = A->size1, INFO;
+ int LWORK = -1, LIWORK = -1;
+ char JOBZ = 'V', UPLO = 'L', RANGE = 'A';
+ double ABSTOL = 1.0E-7;
+
+ // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'.
+ double VL = 0.0, VU = 0.0;
+ int IL = 0, IU = 0, M;
+
+ if (N != (int)A->size2 || N != (int)eval->size) {
+ cout << "Matrix needs to be symmetric and same "
+ << "dimension in lapack_eigen_symmv." << endl;
+ return;
+ }
+
+ int *ISUPPZ = new int[2 * N];
+
+ double WORK_temp[1];
+ int IWORK_temp[1];
+
+ dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU,
+ &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK_temp,
+ &LWORK, IWORK_temp, &LIWORK, &INFO);
+ if (INFO != 0) {
+ cout << "Work space estimate unsuccessful in "
+ << "lapack_eigen_symmv." << endl;
+ return;
+ }
+ LWORK = (int)WORK_temp[0];
+ LIWORK = (int)IWORK_temp[0];
+
+ double *WORK = new double[LWORK];
+ int *IWORK = new int[LIWORK];
+
+ dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU,
+ &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK, &LWORK,
+ IWORK, &LIWORK, &INFO);
+ if (INFO != 0) {
+ cout << "Eigen decomposition unsuccessful in "
+ << "lapack_eigen_symmv." << endl;
+ return;
+ }
+
+ gsl_matrix_transpose(evec);
+
+ delete[] ISUPPZ;
+ delete[] WORK;
+ delete[] IWORK;
+ }
+
+ return;
}
// DO NOT set eigenvalues to be positive.
-double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
- const size_t flag_largematrix) {
- lapack_eigen_symmv (G, eval, U, flag_largematrix);
-
- // Calculate track_G=mean(diag(G)).
- double d=0.0;
- for (size_t i=0; i<eval->size; ++i) {
- d+=gsl_vector_get(eval, i);
- }
- d/=(double)eval->size;
-
- return d;
-}
+double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
+ const size_t flag_largematrix) {
+ lapack_eigen_symmv(G, eval, U, flag_largematrix);
+
+ // Calculate track_G=mean(diag(G)).
+ double d = 0.0;
+ for (size_t i = 0; i < eval->size; ++i) {
+ d += gsl_vector_get(eval, i);
+ }
+ d /= (double)eval->size;
+ return d;
+}
// DO NOT set eigen values to be positive.
-double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U,
- gsl_vector_float *eval, const size_t flag_largematrix) {
- lapack_float_eigen_symmv (G, eval, U, flag_largematrix);
-
- // Calculate track_G=mean(diag(G)).
- double d = 0.0;
- for (size_t i=0; i<eval->size; ++i) {
- d+=gsl_vector_float_get(eval, i);
- }
- d/=(double)eval->size;
-
- return d;
-}
+double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U,
+ gsl_vector_float *eval, const size_t flag_largematrix) {
+ lapack_float_eigen_symmv(G, eval, U, flag_largematrix);
+
+ // Calculate track_G=mean(diag(G)).
+ double d = 0.0;
+ for (size_t i = 0; i < eval->size; ++i) {
+ d += gsl_vector_float_get(eval, i);
+ }
+ d /= (double)eval->size;
+ return d;
+}
double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) {
- double logdet_O=0.0;
+ double logdet_O = 0.0;
- lapack_cholesky_decomp(Omega);
- for (size_t i=0; i<Omega->size1; ++i) {
- logdet_O+=log(gsl_matrix_get (Omega, i, i));
- }
- logdet_O*=2.0;
- lapack_cholesky_solve(Omega, Xty, OiXty);
+ lapack_cholesky_decomp(Omega);
+ for (size_t i = 0; i < Omega->size1; ++i) {
+ logdet_O += log(gsl_matrix_get(Omega, i, i));
+ }
+ logdet_O *= 2.0;
+ lapack_cholesky_solve(Omega, Xty, OiXty);
- return logdet_O;
+ return logdet_O;
}
-
double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty,
- gsl_vector_float *OiXty) {
- double logdet_O=0.0;
+ gsl_vector_float *OiXty) {
+ double logdet_O = 0.0;
- lapack_float_cholesky_decomp(Omega);
- for (size_t i=0; i<Omega->size1; ++i) {
- logdet_O+=log(gsl_matrix_float_get (Omega, i, i));
- }
- logdet_O*=2.0;
- lapack_float_cholesky_solve(Omega, Xty, OiXty);
+ lapack_float_cholesky_decomp(Omega);
+ for (size_t i = 0; i < Omega->size1; ++i) {
+ logdet_O += log(gsl_matrix_float_get(Omega, i, i));
+ }
+ logdet_O *= 2.0;
+ lapack_float_cholesky_solve(Omega, Xty, OiXty);
- return logdet_O;
+ return logdet_O;
}
-
// LU decomposition.
-void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum) {
- gsl_linalg_LU_decomp (LU, p, signum);
- return;
+void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) {
+ gsl_linalg_LU_decomp(LU, p, signum);
+ return;
}
-void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum) {
- gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2);
-
- // Copy float matrix to double.
- for (size_t i=0; i<LU->size1; i++) {
- for (size_t j=0; j<LU->size2; j++) {
- gsl_matrix_set (LU_double, i, j,
- gsl_matrix_float_get(LU, i, j));
- }
- }
-
- // LU decomposition.
- gsl_linalg_LU_decomp (LU_double, p, signum);
-
- // Copy float matrix to double.
- for (size_t i=0; i<LU->size1; i++) {
- for (size_t j=0; j<LU->size2; j++) {
- gsl_matrix_float_set (LU, i, j,
- gsl_matrix_get(LU_double, i, j));
- }
- }
-
- // Free matrix.
- gsl_matrix_free (LU_double);
- return;
-}
+void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) {
+ gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
+
+ // Copy float matrix to double.
+ for (size_t i = 0; i < LU->size1; i++) {
+ for (size_t j = 0; j < LU->size2; j++) {
+ gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
+ }
+ }
+ // LU decomposition.
+ gsl_linalg_LU_decomp(LU_double, p, signum);
+
+ // Copy float matrix to double.
+ for (size_t i = 0; i < LU->size1; i++) {
+ for (size_t j = 0; j < LU->size2; j++) {
+ gsl_matrix_float_set(LU, i, j, gsl_matrix_get(LU_double, i, j));
+ }
+ }
+
+ // Free matrix.
+ gsl_matrix_free(LU_double);
+ return;
+}
// LU invert.
-void LUInvert (const gsl_matrix *LU, const gsl_permutation *p,
- gsl_matrix *inverse) {
- gsl_linalg_LU_invert (LU, p, inverse);
- return;
+void LUInvert(const gsl_matrix *LU, const gsl_permutation *p,
+ gsl_matrix *inverse) {
+ gsl_linalg_LU_invert(LU, p, inverse);
+ return;
}
-void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p,
- gsl_matrix_float *inverse) {
- gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2);
- gsl_matrix *inverse_double=gsl_matrix_alloc (inverse->size1,
- inverse->size2);
-
- // Copy float matrix to double.
- for (size_t i=0; i<LU->size1; i++) {
- for (size_t j=0; j<LU->size2; j++) {
- gsl_matrix_set (LU_double, i, j,
- gsl_matrix_float_get(LU, i, j));
- }
- }
-
- // LU decomposition.
- gsl_linalg_LU_invert (LU_double, p, inverse_double);
-
- // Copy float matrix to double.
- for (size_t i=0; i<inverse->size1; i++) {
- for (size_t j=0; j<inverse->size2; j++) {
- gsl_matrix_float_set (inverse, i, j,
- gsl_matrix_get(inverse_double,
- i, j));
- }
- }
-
- // Free matrix.
- gsl_matrix_free (LU_double);
- gsl_matrix_free (inverse_double);
- return;
+void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p,
+ gsl_matrix_float *inverse) {
+ gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
+ gsl_matrix *inverse_double = gsl_matrix_alloc(inverse->size1, inverse->size2);
+
+ // Copy float matrix to double.
+ for (size_t i = 0; i < LU->size1; i++) {
+ for (size_t j = 0; j < LU->size2; j++) {
+ gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
+ }
+ }
+
+ // LU decomposition.
+ gsl_linalg_LU_invert(LU_double, p, inverse_double);
+
+ // Copy float matrix to double.
+ for (size_t i = 0; i < inverse->size1; i++) {
+ for (size_t j = 0; j < inverse->size2; j++) {
+ gsl_matrix_float_set(inverse, i, j, gsl_matrix_get(inverse_double, i, j));
+ }
+ }
+
+ // Free matrix.
+ gsl_matrix_free(LU_double);
+ gsl_matrix_free(inverse_double);
+ return;
}
// LU lndet.
-double LULndet (gsl_matrix *LU) {
- double d;
- d=gsl_linalg_LU_lndet (LU);
- return d;
+double LULndet(gsl_matrix *LU) {
+ double d;
+ d = gsl_linalg_LU_lndet(LU);
+ return d;
}
-double LULndet (gsl_matrix_float *LU) {
- gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2);
- double d;
+double LULndet(gsl_matrix_float *LU) {
+ gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
+ double d;
- // Copy float matrix to double.
- for (size_t i=0; i<LU->size1; i++) {
- for (size_t j=0; j<LU->size2; j++) {
- gsl_matrix_set (LU_double, i, j, gsl_matrix_float_get(LU, i, j));
- }
- }
+ // Copy float matrix to double.
+ for (size_t i = 0; i < LU->size1; i++) {
+ for (size_t j = 0; j < LU->size2; j++) {
+ gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
+ }
+ }
- // LU decomposition.
- d=gsl_linalg_LU_lndet (LU_double);
+ // LU decomposition.
+ d = gsl_linalg_LU_lndet(LU_double);
- // Free matrix
- gsl_matrix_free (LU_double);
- return d;
+ // Free matrix
+ gsl_matrix_free(LU_double);
+ return d;
}
-
// LU solve.
-void LUSolve (const gsl_matrix *LU, const gsl_permutation *p,
- const gsl_vector *b, gsl_vector *x) {
- gsl_linalg_LU_solve (LU, p, b, x);
- return;
+void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
+ const gsl_vector *b, gsl_vector *x) {
+ gsl_linalg_LU_solve(LU, p, b, x);
+ return;
}
-void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p,
- const gsl_vector_float *b, gsl_vector_float *x) {
- gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2);
- gsl_vector *b_double=gsl_vector_alloc (b->size);
- gsl_vector *x_double=gsl_vector_alloc (x->size);
-
- // Copy float matrix to double.
- for (size_t i=0; i<LU->size1; i++) {
- for (size_t j=0; j<LU->size2; j++) {
- gsl_matrix_set (LU_double, i, j,
- gsl_matrix_float_get(LU, i, j));
- }
- }
-
- for (size_t i=0; i<b->size; i++) {
- gsl_vector_set (b_double, i, gsl_vector_float_get(b, i));
- }
-
- for (size_t i=0; i<x->size; i++) {
- gsl_vector_set (x_double, i, gsl_vector_float_get(x, i));
- }
-
- // LU decomposition.
- gsl_linalg_LU_solve (LU_double, p, b_double, x_double);
-
- // Copy float matrix to double.
- for (size_t i=0; i<x->size; i++) {
- gsl_vector_float_set (x, i, gsl_vector_get(x_double, i));
- }
-
- // Free matrix.
- gsl_matrix_free (LU_double);
- gsl_vector_free (b_double);
- gsl_vector_free (x_double);
- return;
-}
+void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p,
+ const gsl_vector_float *b, gsl_vector_float *x) {
+ gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
+ gsl_vector *b_double = gsl_vector_alloc(b->size);
+ gsl_vector *x_double = gsl_vector_alloc(x->size);
+
+ // Copy float matrix to double.
+ for (size_t i = 0; i < LU->size1; i++) {
+ for (size_t j = 0; j < LU->size2; j++) {
+ gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
+ }
+ }
+ for (size_t i = 0; i < b->size; i++) {
+ gsl_vector_set(b_double, i, gsl_vector_float_get(b, i));
+ }
+
+ for (size_t i = 0; i < x->size; i++) {
+ gsl_vector_set(x_double, i, gsl_vector_float_get(x, i));
+ }
+
+ // LU decomposition.
+ gsl_linalg_LU_solve(LU_double, p, b_double, x_double);
+
+ // Copy float matrix to double.
+ for (size_t i = 0; i < x->size; i++) {
+ gsl_vector_float_set(x, i, gsl_vector_get(x_double, i));
+ }
+
+ // Free matrix.
+ gsl_matrix_free(LU_double);
+ gsl_vector_free(b_double);
+ gsl_vector_free(x_double);
+ return;
+}
bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) {
- bool flag=false;
- int incx=1;
- int incy=1;
- int n=(int)x.size();
- if (x.size()==y.size()) {
- v=ddot_(&n, &x[0], &incx, &y[0], &incy);
- flag=true;
+ bool flag = false;
+ int incx = 1;
+ int incy = 1;
+ int n = (int)x.size();
+ if (x.size() == y.size()) {
+ v = ddot_(&n, &x[0], &incx, &y[0], &incy);
+ flag = true;
}
return flag;
}
-
bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) {
- bool flag=false;
- int incx=1;
- int incy=1;
- int n=(int)x.size();
- if (x.size()==y.size()) {
- v=sdot_(&n, &x[0], &incx, &y[0], &incy);
- flag=true;
+ bool flag = false;
+ int incx = 1;
+ int incy = 1;
+ int n = (int)x.size();
+ if (x.size() == y.size()) {
+ v = sdot_(&n, &x[0], &incx, &y[0], &incy);
+ flag = true;
}
return flag;
diff --git a/src/lapack.h b/src/lapack.h
index 5e1db35..ff02b96 100644
--- a/src/lapack.h
+++ b/src/lapack.h
@@ -23,45 +23,43 @@
using namespace std;
-void lapack_float_cholesky_decomp (gsl_matrix_float *A);
-void lapack_cholesky_decomp (gsl_matrix *A);
-void lapack_float_cholesky_solve (gsl_matrix_float *A,
- const gsl_vector_float *b,
- gsl_vector_float *x);
-void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b, gsl_vector *x);
-void lapack_sgemm (char *TransA, char *TransB, float alpha,
- const gsl_matrix_float *A, const gsl_matrix_float *B,
- float beta, gsl_matrix_float *C);
-void lapack_dgemm (char *TransA, char *TransB, double alpha,
- const gsl_matrix *A, const gsl_matrix *B,
- double beta, gsl_matrix *C);
-void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval,
- gsl_matrix_float *evec,
- const size_t flag_largematrix);
-void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
- const size_t flag_largematrix);
+void lapack_float_cholesky_decomp(gsl_matrix_float *A);
+void lapack_cholesky_decomp(gsl_matrix *A);
+void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b,
+ gsl_vector_float *x);
+void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x);
+void lapack_sgemm(char *TransA, char *TransB, float alpha,
+ const gsl_matrix_float *A, const gsl_matrix_float *B,
+ float beta, gsl_matrix_float *C);
+void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A,
+ const gsl_matrix *B, double beta, gsl_matrix *C);
+void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval,
+ gsl_matrix_float *evec,
+ const size_t flag_largematrix);
+void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
+ const size_t flag_largematrix);
-double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
- const size_t flag_largematrix);
-double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U,
- gsl_vector_float *eval, const size_t flag_largematrix);
+double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
+ const size_t flag_largematrix);
+double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U,
+ gsl_vector_float *eval, const size_t flag_largematrix);
double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty);
double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty,
- gsl_vector_float *OiXty);
+ gsl_vector_float *OiXty);
-void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum);
-void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum);
-void LUInvert (const gsl_matrix *LU, const gsl_permutation *p,
- gsl_matrix *inverse);
-void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p,
- gsl_matrix_float *inverse);
-double LULndet (gsl_matrix *LU);
-double LULndet (gsl_matrix_float *LU);
-void LUSolve (const gsl_matrix *LU, const gsl_permutation *p,
- const gsl_vector *b, gsl_vector *x);
-void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p,
- const gsl_vector_float *b, gsl_vector_float *x);
+void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum);
+void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum);
+void LUInvert(const gsl_matrix *LU, const gsl_permutation *p,
+ gsl_matrix *inverse);
+void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p,
+ gsl_matrix_float *inverse);
+double LULndet(gsl_matrix *LU);
+double LULndet(gsl_matrix_float *LU);
+void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
+ const gsl_vector *b, gsl_vector *x);
+void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p,
+ const gsl_vector_float *b, gsl_vector_float *x);
bool lapack_ddot(vector<double> &x, vector<double> &y, double &v);
bool lapack_sdot(vector<float> &x, vector<float> &y, double &v);
diff --git a/src/ldr.cpp b/src/ldr.cpp
index f0a1b37..3554efa 100644
--- a/src/ldr.cpp
+++ b/src/ldr.cpp
@@ -16,67 +16,65 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <sstream>
-#include <iomanip>
+#include <algorithm>
#include <cmath>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
-#include <ctime>
-#include <cstring>
-#include <algorithm>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
+#include "Eigen/Dense"
#include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
#include "gsl/gsl_randist.h"
-#include "gsl/gsl_cdf.h"
#include "gsl/gsl_roots.h"
-#include "Eigen/Dense"
+#include "gsl/gsl_vector.h"
#include "lapack.h"
-#include "param.h"
#include "ldr.h"
#include "lm.h"
#include "mathfunc.h"
+#include "param.h"
using namespace std;
using namespace Eigen;
-void LDR::CopyFromParam (PARAM &cPar) {
- a_mode=cPar.a_mode;
- d_pace=cPar.d_pace;
+void LDR::CopyFromParam(PARAM &cPar) {
+ a_mode = cPar.a_mode;
+ d_pace = cPar.d_pace;
- file_bfile=cPar.file_bfile;
- file_geno=cPar.file_geno;
- file_out=cPar.file_out;
- path_out=cPar.path_out;
+ file_bfile = cPar.file_bfile;
+ file_geno = cPar.file_geno;
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
- ni_total=cPar.ni_total;
- ns_total=cPar.ns_total;
- ni_test=cPar.ni_test;
- ns_test=cPar.ns_test;
- n_cvt=cPar.n_cvt;
+ ni_total = cPar.ni_total;
+ ns_total = cPar.ns_total;
+ ni_test = cPar.ni_test;
+ ns_test = cPar.ns_test;
+ n_cvt = cPar.n_cvt;
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
- snpInfo=cPar.snpInfo;
+ indicator_idv = cPar.indicator_idv;
+ indicator_snp = cPar.indicator_snp;
+ snpInfo = cPar.snpInfo;
- return;
+ return;
}
-void LDR::CopyToParam (PARAM &cPar) {
- return;
-}
+void LDR::CopyToParam(PARAM &cPar) { return; }
-//X is a p by n matrix.
-void LDR::VB (const vector<vector<unsigned char> > &Xt,
- const gsl_matrix *W_gsl, const gsl_vector *y_gsl) {
+// X is a p by n matrix.
+void LDR::VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl,
+ const gsl_vector *y_gsl) {
// Save gsl_vector and gsl_matrix into Eigen library formats.
MatrixXd W(W_gsl->size1, W_gsl->size2);
@@ -84,20 +82,21 @@ void LDR::VB (const vector<vector<unsigned char> > &Xt,
VectorXd x_col(y_gsl->size);
double d;
- for (size_t i=0; i<W_gsl->size1; i++) {
- d=gsl_vector_get(y_gsl, i);
- y(i)=d;
- for (size_t j=0; j<W_gsl->size2; j++) {
- W(i,j)=gsl_matrix_get(W_gsl, i, j);
+ for (size_t i = 0; i < W_gsl->size1; i++) {
+ d = gsl_vector_get(y_gsl, i);
+ y(i) = d;
+ for (size_t j = 0; j < W_gsl->size2; j++) {
+ W(i, j) = gsl_matrix_get(W_gsl, i, j);
}
}
// Initial VB values by lm.
- cout<<indicator_snp[0]<<" "<<indicator_snp[1]<<" "<<indicator_snp[2]<<endl;
- uchar_matrix_get_row (Xt, 0, x_col);
+ cout << indicator_snp[0] << " " << indicator_snp[1] << " " << indicator_snp[2]
+ << endl;
+ uchar_matrix_get_row(Xt, 0, x_col);
- for (size_t j=0; j<10; j++) {
- cout<<x_col(j)<<endl;
+ for (size_t j = 0; j < 10; j++) {
+ cout << x_col(j) << endl;
}
// Run VB iterations.
diff --git a/src/ldr.h b/src/ldr.h
index ab55fe2..6720689 100644
--- a/src/ldr.h
+++ b/src/ldr.h
@@ -19,53 +19,51 @@
#ifndef __LDR_H__
#define __LDR_H__
-#include <vector>
-#include <map>
-#include <gsl/gsl_rng.h>
-#include <gsl/gsl_randist.h>
#include "param.h"
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_rng.h>
+#include <map>
+#include <vector>
using namespace std;
class LDR {
public:
- // IO-related parameters.
- int a_mode;
- size_t d_pace;
+ // IO-related parameters.
+ int a_mode;
+ size_t d_pace;
- string file_bfile;
- string file_geno;
- string file_out;
- string path_out;
+ string file_bfile;
+ string file_geno;
+ string file_out;
+ string path_out;
- // Summary statistics.
- size_t ni_total, ns_total; // Total number of individuals & SNPs.
- size_t ni_test, ns_test; // Number of individuals & SNPs used
- // for analysis
- size_t n_cvt; // Number of covariates.
+ // Summary statistics.
+ size_t ni_total, ns_total; // Total number of individuals & SNPs.
+ size_t ni_test, ns_test; // Number of individuals & SNPs used
+ // for analysis
+ size_t n_cvt; // Number of covariates.
- // Indicator for individuals (phenotypes): 0 missing, 1
- // available for analysis.
- vector<int> indicator_idv;
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_idv;
- // Sequence indicator for SNPs: 0 ignored because of (a) maf,
- // (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
- vector<SNPINFO> snpInfo; // Record SNP information.
+ vector<SNPINFO> snpInfo; // Record SNP information.
- // Not included in PARAM.
- gsl_rng *gsl_r;
+ // Not included in PARAM.
+ gsl_rng *gsl_r;
- // Main functions.
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
+ // Main functions.
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
- void VB(const vector<vector<unsigned char> > &Xt,
- const gsl_matrix *W_gsl, const gsl_vector *y_gsl);
+ void VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl,
+ const gsl_vector *y_gsl);
};
#endif
-
-
diff --git a/src/lm.cpp b/src/lm.cpp
index 94729db..f8fc43d 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -16,28 +16,28 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <sstream>
-#include <iomanip>
+#include <assert.h>
+#include <bitset>
#include <cmath>
+#include <cstring>
+#include <iomanip>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
-#include <assert.h>
-#include <bitset>
-#include <cstring>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
#include "gsl/gsl_cdf.h"
-#include "gsl/gsl_roots.h"
-#include "gsl/gsl_min.h"
#include "gsl/gsl_integration.h"
+#include "gsl/gsl_min.h"
+#include "gsl/gsl_roots.h"
#include "eigenlib.h"
#include "gzstream.h"
@@ -46,783 +46,835 @@
using namespace std;
-void LM::CopyFromParam (PARAM &cPar) {
- a_mode=cPar.a_mode;
- d_pace=cPar.d_pace;
+void LM::CopyFromParam(PARAM &cPar) {
+ a_mode = cPar.a_mode;
+ d_pace = cPar.d_pace;
- file_bfile=cPar.file_bfile;
- file_geno=cPar.file_geno;
- file_out=cPar.file_out;
- path_out=cPar.path_out;
- file_gene=cPar.file_gene;
- // WJA added
- file_oxford=cPar.file_oxford;
+ file_bfile = cPar.file_bfile;
+ file_geno = cPar.file_geno;
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
+ file_gene = cPar.file_gene;
+ // WJA added
+ file_oxford = cPar.file_oxford;
- time_opt=0.0;
+ time_opt = 0.0;
- ni_total=cPar.ni_total;
- ns_total=cPar.ns_total;
- ni_test=cPar.ni_test;
- ns_test=cPar.ns_test;
- n_cvt=cPar.n_cvt;
+ ni_total = cPar.ni_total;
+ ns_total = cPar.ns_total;
+ ni_test = cPar.ni_test;
+ ns_test = cPar.ns_test;
+ n_cvt = cPar.n_cvt;
- ng_total=cPar.ng_total;
- ng_test=0;
+ ng_total = cPar.ng_total;
+ ng_test = 0;
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
- snpInfo=cPar.snpInfo;
+ indicator_idv = cPar.indicator_idv;
+ indicator_snp = cPar.indicator_snp;
+ snpInfo = cPar.snpInfo;
- return;
+ return;
}
-void LM::CopyToParam (PARAM &cPar) {
- cPar.time_opt=time_opt;
- cPar.ng_test=ng_test;
- return;
+void LM::CopyToParam(PARAM &cPar) {
+ cPar.time_opt = time_opt;
+ cPar.ng_test = ng_test;
+ return;
}
-void LM::WriteFiles () {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".assoc.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout << "error writing file: " << file_str.c_str() << endl;
- return;
- }
-
- if (!file_gene.empty()) {
- outfile<<"geneID"<<"\t";
-
- if (a_mode==51) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
- } else if (a_mode==52) {
- outfile<<"p_lrt"<<endl;
- } else if (a_mode==53) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
- } else if (a_mode==54) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<
- "\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
- } else {}
-
- for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
- outfile<<snpInfo[t].rs_number<<"\t";
-
- if (a_mode==51) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].p_wald <<endl;
- } else if (a_mode==52) {
- outfile<<scientific<<setprecision(6)<<
- "\t"<<sumStat[t].p_lrt<<endl;
- } else if (a_mode==53) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].p_score<<endl;
- } else if (a_mode==54) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].p_wald <<"\t"<<
- sumStat[t].p_lrt<<"\t"<<
- sumStat[t].p_score<<endl;
- } else {}
- }
- } else {
- outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"<<
- "\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<
- "af"<<"\t";
-
- if (a_mode==51) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
- } else if (a_mode==52) {
- outfile<<"p_lrt"<<endl;
- } else if (a_mode==53) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
- } else if (a_mode==54) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"
- <<"p_lrt"<<"\t"<<"p_score"<<endl;
- } else {}
-
- size_t t=0;
- for (size_t i=0; i<snpInfo.size(); ++i) {
- if (indicator_snp[i]==0) {continue;}
-
- outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<
- "\t"<<snpInfo[i].base_position<<"\t"<<
- snpInfo[i].n_miss<<"\t"<<ni_test-snpInfo[i].n_miss<<
- "\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<
- "\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
- if (a_mode==51) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].p_wald <<endl;
- } else if (a_mode==52) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].p_lrt<<endl;
- } else if (a_mode==53) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].p_score<<endl;
- } else if (a_mode==54) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].p_wald <<"\t"<<
- sumStat[t].p_lrt<<"\t"<<
- sumStat[t].p_score<<endl;
- } else {}
- t++;
- }
- }
-
- outfile.close();
- outfile.clear();
- return;
+void LM::WriteFiles() {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".assoc.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ if (!file_gene.empty()) {
+ outfile << "geneID"
+ << "\t";
+
+ if (a_mode == 51) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "p_wald" << endl;
+ } else if (a_mode == 52) {
+ outfile << "p_lrt" << endl;
+ } else if (a_mode == 53) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "p_score" << endl;
+ } else if (a_mode == 54) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "p_wald"
+ << "\t"
+ << "p_lrt"
+ << "\t"
+ << "p_score" << endl;
+ } else {
+ }
+
+ for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) {
+ outfile << snpInfo[t].rs_number << "\t";
+
+ if (a_mode == 51) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].p_wald << endl;
+ } else if (a_mode == 52) {
+ outfile << scientific << setprecision(6) << "\t" << sumStat[t].p_lrt
+ << endl;
+ } else if (a_mode == 53) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
+ } else if (a_mode == 54) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].p_wald << "\t"
+ << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
+ } else {
+ }
+ }
+ } else {
+ outfile << "chr"
+ << "\t"
+ << "rs"
+ << "\t"
+ << "ps"
+ << "\t"
+ << "n_mis"
+ << "\t"
+ << "n_obs"
+ << "\t"
+ << "allele1"
+ << "\t"
+ << "allele0"
+ << "\t"
+ << "af"
+ << "\t";
+
+ if (a_mode == 51) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "p_wald" << endl;
+ } else if (a_mode == 52) {
+ outfile << "p_lrt" << endl;
+ } else if (a_mode == 53) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "p_score" << endl;
+ } else if (a_mode == 54) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "p_wald"
+ << "\t"
+ << "p_lrt"
+ << "\t"
+ << "p_score" << endl;
+ } else {
+ }
+
+ size_t t = 0;
+ for (size_t i = 0; i < snpInfo.size(); ++i) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+ << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
+ << ni_test - snpInfo[i].n_miss << "\t" << snpInfo[i].a_minor
+ << "\t" << snpInfo[i].a_major << "\t" << fixed << setprecision(3)
+ << snpInfo[i].maf << "\t";
+
+ if (a_mode == 51) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].p_wald << endl;
+ } else if (a_mode == 52) {
+ outfile << scientific << setprecision(6) << sumStat[t].p_lrt << endl;
+ } else if (a_mode == 53) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
+ } else if (a_mode == 54) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].p_wald << "\t"
+ << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
+ } else {
+ }
+ t++;
+ }
+ }
+
+ outfile.close();
+ outfile.clear();
+ return;
}
void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty,
- const gsl_vector *Wtx, const gsl_vector *y,
- const gsl_vector *x, double &xPwy, double &xPwx) {
- size_t c_size=Wty->size;
- double d;
+ const gsl_vector *Wtx, const gsl_vector *y, const gsl_vector *x,
+ double &xPwy, double &xPwx) {
+ size_t c_size = Wty->size;
+ double d;
- gsl_vector *WtWiWtx=gsl_vector_alloc (c_size);
+ gsl_vector *WtWiWtx = gsl_vector_alloc(c_size);
- gsl_blas_ddot (x, x, &xPwx);
- gsl_blas_ddot (x, y, &xPwy);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+ gsl_blas_ddot(x, x, &xPwx);
+ gsl_blas_ddot(x, y, &xPwy);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
- gsl_blas_ddot (WtWiWtx, Wtx, &d);
- xPwx-=d;
+ gsl_blas_ddot(WtWiWtx, Wtx, &d);
+ xPwx -= d;
- gsl_blas_ddot (WtWiWtx, Wty, &d);
- xPwy-=d;
+ gsl_blas_ddot(WtWiWtx, Wty, &d);
+ xPwy -= d;
- gsl_vector_free (WtWiWtx);
+ gsl_vector_free(WtWiWtx);
- return;
+ return;
}
-void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty,
- const gsl_vector *y, double &yPwy) {
- size_t c_size=Wty->size;
- double d;
+void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y,
+ double &yPwy) {
+ size_t c_size = Wty->size;
+ double d;
- gsl_vector *WtWiWty=gsl_vector_alloc (c_size);
+ gsl_vector *WtWiWty = gsl_vector_alloc(c_size);
- gsl_blas_ddot (y, y, &yPwy);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);
+ gsl_blas_ddot(y, y, &yPwy);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);
- gsl_blas_ddot (WtWiWty, Wty, &d);
- yPwy-=d;
+ gsl_blas_ddot(WtWiWty, Wty, &d);
+ yPwy -= d;
- gsl_vector_free (WtWiWty);
+ gsl_vector_free(WtWiWty);
- return;
+ return;
}
// Calculate p-values and beta/se in a linear model.
-void LmCalcP (const size_t test_mode, const double yPwy,
- const double xPwy, const double xPwx, const double df,
- const size_t n_size, double &beta, double &se,
- double &p_wald, double &p_lrt, double &p_score) {
- double yPxy=yPwy-xPwy*xPwy/xPwx;
- double se_wald, se_score;
-
- beta=xPwy/xPwx;
- se_wald=sqrt(yPxy/(df*xPwx) );
- se_score=sqrt(yPwy/((double)n_size*xPwx) );
-
- p_wald=gsl_cdf_fdist_Q (beta*beta/(se_wald*se_wald), 1.0, df);
- p_score=gsl_cdf_fdist_Q (beta*beta/(se_score*se_score), 1.0, df);
- p_lrt=gsl_cdf_chisq_Q ((double)n_size*(log(yPwy)-log(yPxy)), 1);
-
- if (test_mode==3) {se=se_score;} else {se=se_wald;}
-
- return;
+void LmCalcP(const size_t test_mode, const double yPwy, const double xPwy,
+ const double xPwx, const double df, const size_t n_size,
+ double &beta, double &se, double &p_wald, double &p_lrt,
+ double &p_score) {
+ double yPxy = yPwy - xPwy * xPwy / xPwx;
+ double se_wald, se_score;
+
+ beta = xPwy / xPwx;
+ se_wald = sqrt(yPxy / (df * xPwx));
+ se_score = sqrt(yPwy / ((double)n_size * xPwx));
+
+ p_wald = gsl_cdf_fdist_Q(beta * beta / (se_wald * se_wald), 1.0, df);
+ p_score = gsl_cdf_fdist_Q(beta * beta / (se_score * se_score), 1.0, df);
+ p_lrt = gsl_cdf_chisq_Q((double)n_size * (log(yPwy) - log(yPxy)), 1);
+
+ if (test_mode == 3) {
+ se = se_score;
+ } else {
+ se = se_wald;
+ }
+
+ return;
}
-void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x) {
- ifstream infile (file_gene.c_str(), ifstream::in);
- if (!infile) {
- cout<<"error reading gene expression file:"<<file_gene<<endl;
- return;
- }
+void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
+ ifstream infile(file_gene.c_str(), ifstream::in);
+ if (!infile) {
+ cout << "error reading gene expression file:" << file_gene << endl;
+ return;
+ }
- clock_t time_start=clock();
+ clock_t time_start = clock();
- string line;
- char *ch_ptr;
+ string line;
+ char *ch_ptr;
- double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
- int c_phen;
- string rs; // Gene id.
- double d;
+ double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
+ int c_phen;
+ string rs; // Gene id.
+ double d;
- // Calculate some basic quantities.
- double yPwy, xPwy, xPwx;
- double df=(double)W->size1-(double)W->size2-1.0;
+ // Calculate some basic quantities.
+ double yPwy, xPwy, xPwx;
+ double df = (double)W->size1 - (double)W->size2 - 1.0;
- gsl_vector *y=gsl_vector_alloc (W->size1);
+ gsl_vector *y = gsl_vector_alloc(W->size1);
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *Wty=gsl_vector_alloc (W->size2);
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *Wty = gsl_vector_alloc(W->size2);
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
- gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
- CalcvPv(WtWi, Wtx, x, xPwx);
+ gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+ CalcvPv(WtWi, Wtx, x, xPwx);
- // Header.
- getline(infile, line);
+ // Header.
+ getline(infile, line);
- for (size_t t=0; t<ng_total; t++) {
- getline(infile, line);
- if (t%d_pace==0 || t==ng_total-1) {
- ProgressBar ("Performing Analysis ", t, ng_total-1);
- }
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- rs=ch_ptr;
+ for (size_t t = 0; t < ng_total; t++) {
+ getline(infile, line);
+ if (t % d_pace == 0 || t == ng_total - 1) {
+ ProgressBar("Performing Analysis ", t, ng_total - 1);
+ }
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ rs = ch_ptr;
- c_phen=0;
- for (size_t i=0; i<indicator_idv.size(); ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
+ c_phen = 0;
+ for (size_t i = 0; i < indicator_idv.size(); ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
- d=atof(ch_ptr);
- gsl_vector_set(y, c_phen, d);
+ d = atof(ch_ptr);
+ gsl_vector_set(y, c_phen, d);
- c_phen++;
- }
+ c_phen++;
+ }
- // Calculate statistics.
- time_start=clock();
+ // Calculate statistics.
+ time_start = clock();
- gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
- CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
- beta, se, p_wald, p_lrt, p_score);
+ gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
+ CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy);
+ LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
+ p_lrt, p_score);
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
- // Store summary data.
- SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
- sumStat.push_back(SNPs);
- }
- cout<<endl;
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout << endl;
- gsl_vector_free(y);
+ gsl_vector_free(y);
- gsl_matrix_free(WtW);
- gsl_matrix_free(WtWi);
- gsl_vector_free(Wty);
- gsl_vector_free(Wtx);
- gsl_permutation_free(pmt);
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(Wty);
+ gsl_vector_free(Wtx);
+ gsl_permutation_free(pmt);
- infile.close();
- infile.clear();
+ infile.close();
+ infile.clear();
- return;
+ return;
}
// WJA added
-void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) {
- string file_bgen=file_oxford+".bgen";
- ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bgen file:"<<file_bgen<<endl;
- return;
- }
-
- clock_t time_start=clock();
-
- string line;
- char *ch_ptr;
-
- double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
- int n_miss, c_phen;
- double geno, x_mean;
-
- // Calculate some basic quantities.
- double yPwy, xPwy, xPwx;
- double df=(double)W->size1-(double)W->size2-1.0;
-
- gsl_vector *x=gsl_vector_alloc (W->size1);
- gsl_vector *x_miss=gsl_vector_alloc (W->size1);
-
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *Wty=gsl_vector_alloc (W->size2);
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
- CalcvPv(WtWi, Wty, y, yPwy);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
- infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
- bgen_snp_block_offset-=4;
- infile.ignore(4+bgen_header_length-20);
- bgen_snp_block_offset-=4+bgen_header_length-20;
- infile.read(reinterpret_cast<char*>(&bgen_flags),4);
- bgen_snp_block_offset-=4;
- bool CompressedSNPBlocks=bgen_flags&0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB;
- double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- std::cout << "Warning: WJA hard coded SNP missingness " <<
- "threshold of 10%" << std::endl;
-
- // Start reading genotypes and analyze.
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
-
- // Read SNP header.
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char*>(&bgen_N),4);
- infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char*>(&bgen_LR),2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char*>(&bgen_LC),2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
- infile.read(reinterpret_cast<char*>(&bgen_LA),4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char*>(&bgen_LB),4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3*bgen_N];
-
- if (indicator_snp[t]==0) {
- if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- else
- bgen_P=6*bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if(CompressedSNPBlocks) {
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size=6*bgen_N;
-
- infile.read(reinterpret_cast<char*>(zipped_data),
- bgen_P);
-
- int result=
- uncompress(reinterpret_cast<Bytef*>(unzipped_data),
- reinterpret_cast<uLongf*>(&unzipped_data_size),
- reinterpret_cast<Bytef*>(zipped_data),
- static_cast<uLong> (bgen_P));
- assert(result == Z_OK);
-
- }
- else
- {
-
- bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),
- bgen_P);
- }
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<bgen_N; ++i) {
- if (indicator_idv[i]==0) {continue;}
-
-
- bgen_geno_prob_AA=
- static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB=
- static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB=
- static_cast<double>(unzipped_data[i*3+2])/32768.0;
-
- // WJA
- bgen_geno_prob_non_miss=
- bgen_geno_prob_AA +
- bgen_geno_prob_AB +
- bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss<0.9) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- }
- else {
- bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
- bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
- bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
- geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=static_cast<double>(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {
- gsl_vector_set(x, i, x_mean);
- }
- geno=gsl_vector_get(x, i);
- }
-
- // Calculate statistics.
- time_start=clock();
-
- gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
- CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
- beta, se, p_wald, p_lrt, p_score);
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
- sumStat.push_back(SNPs);
- }
- cout<<endl;
-
- gsl_vector_free(x);
- gsl_vector_free(x_miss);
-
- gsl_matrix_free(WtW);
- gsl_matrix_free(WtWi);
- gsl_vector_free(Wty);
- gsl_vector_free(Wtx);
- gsl_permutation_free(pmt);
-
- infile.close();
- infile.clear();
-
- return;
+void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) {
+ string file_bgen = file_oxford + ".bgen";
+ ifstream infile(file_bgen.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bgen file:" << file_bgen << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+
+ string line;
+ char *ch_ptr;
+
+ double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ // Calculate some basic quantities.
+ double yPwy, xPwy, xPwx;
+ double df = (double)W->size1 - (double)W->size2 - 1.0;
+
+ gsl_vector *x = gsl_vector_alloc(W->size1);
+ gsl_vector *x_miss = gsl_vector_alloc(W->size1);
+
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *Wty = gsl_vector_alloc(W->size2);
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
+ CalcvPv(WtWi, Wty, y, yPwy);
+
+ // Read in header.
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+ bgen_snp_block_offset -= 4;
+ infile.ignore(4 + bgen_header_length - 20);
+ bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+ infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+ bgen_snp_block_offset -= 4;
+ bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB;
+ double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ std::cout << "Warning: WJA hard coded SNP missingness "
+ << "threshold of 10%" << std::endl;
+
+ // Start reading genotypes and analyze.
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+
+ // Read SNP header.
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+ uint16_t unzipped_data[3 * bgen_N];
+
+ if (indicator_snp[t] == 0) {
+ if (CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ else
+ bgen_P = 6 * bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+ if (CompressedSNPBlocks) {
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size = 6 * bgen_N;
+
+ infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+
+ int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+ reinterpret_cast<uLongf *>(&unzipped_data_size),
+ reinterpret_cast<Bytef *>(zipped_data),
+ static_cast<uLong>(bgen_P));
+ assert(result == Z_OK);
+
+ } else {
+
+ bgen_P = 6 * bgen_N;
+ infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+ }
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i = 0; i < bgen_N; ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+ bgen_geno_prob_AB =
+ static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+ bgen_geno_prob_BB =
+ static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+
+ // WJA
+ bgen_geno_prob_non_miss =
+ bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss < 0.9) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+ bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+ geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+
+ x_mean /= static_cast<double>(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean);
+ }
+ geno = gsl_vector_get(x, i);
+ }
+
+ // Calculate statistics.
+ time_start = clock();
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+ CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+ LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
+ p_lrt, p_score);
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(Wty);
+ gsl_vector_free(Wtx);
+ gsl_permutation_free(pmt);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
-void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout << "error reading genotype file:" << file_geno << endl;
- return;
- }
-
- clock_t time_start=clock();
-
- string line;
- char *ch_ptr;
-
- double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
- int n_miss, c_phen;
- double geno, x_mean;
-
- // Calculate some basic quantities.
- double yPwy, xPwy, xPwx;
- double df=(double)W->size1-(double)W->size2-1.0;
-
- gsl_vector *x=gsl_vector_alloc (W->size1);
- gsl_vector *x_miss=gsl_vector_alloc (W->size1);
-
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *Wty=gsl_vector_alloc (W->size2);
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
- CalcvPv(WtWi, Wty, y, yPwy);
-
- // Start reading genotypes and analyze.
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- getline(infile, line);
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- }
- else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {
- gsl_vector_set(x, i, x_mean);
- }
- geno=gsl_vector_get(x, i);
- }
-
- // Calculate statistics.
- time_start=clock();
-
- gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
- CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
- beta, se, p_wald, p_lrt, p_score);
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
- sumStat.push_back(SNPs);
- }
- cout<<endl;
-
- gsl_vector_free(x);
- gsl_vector_free(x_miss);
-
- gsl_matrix_free(WtW);
- gsl_matrix_free(WtWi);
- gsl_vector_free(Wty);
- gsl_vector_free(Wtx);
- gsl_permutation_free(pmt);
-
- infile.close();
- infile.clear();
-
- return;
+void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+
+ string line;
+ char *ch_ptr;
+
+ double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ // Calculate some basic quantities.
+ double yPwy, xPwy, xPwx;
+ double df = (double)W->size1 - (double)W->size2 - 1.0;
+
+ gsl_vector *x = gsl_vector_alloc(W->size1);
+ gsl_vector *x_miss = gsl_vector_alloc(W->size1);
+
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *Wty = gsl_vector_alloc(W->size2);
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
+ CalcvPv(WtWi, Wty, y, yPwy);
+
+ // Start reading genotypes and analyze.
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ getline(infile, line);
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+ geno = atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean);
+ }
+ geno = gsl_vector_get(x, i);
+ }
+
+ // Calculate statistics.
+ time_start = clock();
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+ CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+ LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
+ p_lrt, p_score);
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(Wty);
+ gsl_vector_free(Wtx);
+ gsl_permutation_free(pmt);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
-void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y) {
- string file_bed=file_bfile+".bed";
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return;
- }
-
- clock_t time_start=clock();
-
- char ch[1];
- bitset<8> b;
-
- double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
- int n_bit, n_miss, ci_total, ci_test;
- double geno, x_mean;
-
- // Calculate some basic quantities.
- double yPwy, xPwy, xPwx;
- double df=(double)W->size1-(double)W->size2-1.0;
-
- gsl_vector *x=gsl_vector_alloc (W->size1);
-
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *Wty=gsl_vector_alloc (W->size2);
- gsl_vector *Wtx=gsl_vector_alloc (W->size2);
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
- CalcvPv(WtWi, Wty, y, yPwy);
-
- // Calculate n_bit and c, the number of bit for each SNP.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1;}
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {
- ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0;
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
- break;
- }
- if (indicator_idv[ci_total]==0) {
- ci_total++;
- continue;
- }
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(x, ci_test, 2);
- x_mean+=2.0;
- }
- else {
- gsl_vector_set(x, ci_test, 1);
- x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {
- gsl_vector_set(x, ci_test, 0);
- }
- else {
- gsl_vector_set(x, ci_test, -9);
- n_miss++;
- }
- }
-
- ci_total++;
- ci_test++;
- }
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- geno=gsl_vector_get(x,i);
- if (geno==-9) {
- gsl_vector_set(x, i, x_mean);
- geno=x_mean;
- }
- }
-
- // Calculate statistics.
- time_start=clock();
-
- gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
- CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1,
- beta, se, p_wald, p_lrt, p_score);
-
- //store summary data
- SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
- sumStat.push_back(SNPs);
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
- }
- cout<<endl;
-
- gsl_vector_free(x);
-
- gsl_matrix_free(WtW);
- gsl_matrix_free(WtWi);
- gsl_vector_free(Wty);
- gsl_vector_free(Wtx);
- gsl_permutation_free(pmt);
-
- infile.close();
- infile.clear();
-
- return;
+void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) {
+ string file_bed = file_bfile + ".bed";
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+
+ char ch[1];
+ bitset<8> b;
+
+ double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+
+ // Calculate some basic quantities.
+ double yPwy, xPwy, xPwx;
+ double df = (double)W->size1 - (double)W->size2 - 1.0;
+
+ gsl_vector *x = gsl_vector_alloc(W->size1);
+
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *Wty = gsl_vector_alloc(W->size2);
+ gsl_vector *Wtx = gsl_vector_alloc(W->size2);
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
+ CalcvPv(WtWi, Wty, y, yPwy);
+
+ // Calculate n_bit and c, the number of bit for each SNP.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+ if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+ ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ x_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ ci_test = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean += 2.0;
+ } else {
+ gsl_vector_set(x, ci_test, 1);
+ x_mean += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(x, ci_test, 0);
+ } else {
+ gsl_vector_set(x, ci_test, -9);
+ n_miss++;
+ }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ geno = gsl_vector_get(x, i);
+ if (geno == -9) {
+ gsl_vector_set(x, i, x_mean);
+ geno = x_mean;
+ }
+ }
+
+ // Calculate statistics.
+ time_start = clock();
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+ CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+ LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
+ p_lrt, p_score);
+
+ // store summary data
+ SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(Wty);
+ gsl_vector_free(Wtx);
+ gsl_permutation_free(pmt);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
// Make sure that both y and X are centered already.
-void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y,
- vector<pair<size_t, double> > &pos_loglr) {
- double yty, xty, xtx, log_lr;
- gsl_blas_ddot(y, y, &yty);
+void MatrixCalcLmLR(const gsl_matrix *X, const gsl_vector *y,
+ vector<pair<size_t, double>> &pos_loglr) {
+ double yty, xty, xtx, log_lr;
+ gsl_blas_ddot(y, y, &yty);
- for (size_t i=0; i<X->size2; ++i) {
- gsl_vector_const_view X_col=gsl_matrix_const_column (X, i);
- gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx);
- gsl_blas_ddot(&X_col.vector, y, &xty);
+ for (size_t i = 0; i < X->size2; ++i) {
+ gsl_vector_const_view X_col = gsl_matrix_const_column(X, i);
+ gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx);
+ gsl_blas_ddot(&X_col.vector, y, &xty);
- log_lr=0.5*(double)y->size*(log(yty)-log(yty-xty*xty/xtx));
- pos_loglr.push_back(make_pair(i,log_lr) );
- }
+ log_lr = 0.5 * (double)y->size * (log(yty) - log(yty - xty * xty / xtx));
+ pos_loglr.push_back(make_pair(i, log_lr));
+ }
- return;
+ return;
}
diff --git a/src/lm.h b/src/lm.h
index cf428f0..cb22d3b 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -19,61 +19,61 @@
#ifndef __LM_H__
#define __LM_H__
-#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
#include "io.h"
+#include "param.h"
using namespace std;
class LM {
public:
- // IO-related parameters.
- int a_mode; // Analysis mode: 50+1/2/3/4 for Frequentist tests.
- size_t d_pace; // Display pace.
-
- string file_bfile;
- string file_geno;
- string file_oxford;
- string file_out;
- string path_out;
-
- string file_gene;
-
- // Summary statistics.
- size_t ni_total, ni_test; // Number of individuals.
- size_t ns_total, ns_test; // Number of SNPs.
- size_t ng_total, ng_test; // Number of genes.
- size_t n_cvt;
- double time_opt; // Time spent.
-
- // Indicator for individuals (phenotypes): 0 missing, 1
- // available for analysis.
- vector<int> indicator_idv;
-
- // Sequence indicator for SNPs: 0 ignored because of (a) maf,
- // (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
-
- vector<SNPINFO> snpInfo; // Record SNP information.
-
- // Not included in PARAM.
- vector<SUMSTAT> sumStat; // Output SNPSummary Data.
-
- // Main functions.
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
- void AnalyzeGene (const gsl_matrix *W, const gsl_vector *x);
- void AnalyzePlink (const gsl_matrix *W, const gsl_vector *y);
- void AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y);
- // WJA added.
- void Analyzebgen (const gsl_matrix *W, const gsl_vector *y);
-
- void WriteFiles ();
+ // IO-related parameters.
+ int a_mode; // Analysis mode: 50+1/2/3/4 for Frequentist tests.
+ size_t d_pace; // Display pace.
+
+ string file_bfile;
+ string file_geno;
+ string file_oxford;
+ string file_out;
+ string path_out;
+
+ string file_gene;
+
+ // Summary statistics.
+ size_t ni_total, ni_test; // Number of individuals.
+ size_t ns_total, ns_test; // Number of SNPs.
+ size_t ng_total, ng_test; // Number of genes.
+ size_t n_cvt;
+ double time_opt; // Time spent.
+
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_idv;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
+
+ vector<SNPINFO> snpInfo; // Record SNP information.
+
+ // Not included in PARAM.
+ vector<SUMSTAT> sumStat; // Output SNPSummary Data.
+
+ // Main functions.
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
+ void AnalyzeGene(const gsl_matrix *W, const gsl_vector *x);
+ void AnalyzePlink(const gsl_matrix *W, const gsl_vector *y);
+ void AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y);
+ // WJA added.
+ void Analyzebgen(const gsl_matrix *W, const gsl_vector *y);
+
+ void WriteFiles();
};
-void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y,
- vector<pair<size_t, double> > &pos_loglr);
+void MatrixCalcLmLR(const gsl_matrix *X, const gsl_vector *y,
+ vector<pair<size_t, double>> &pos_loglr);
#endif
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 2b5ca84..3f51073 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -16,2488 +16,2585 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <sstream>
-#include <iomanip>
+#include <assert.h>
+#include <bitset>
#include <cmath>
+#include <cstring>
+#include <iomanip>
#include <iostream>
-#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
-#include <bitset>
-#include <cstring>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
-#include "gsl/gsl_roots.h"
-#include "gsl/gsl_min.h"
#include "gsl/gsl_integration.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_min.h"
+#include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
-#include "io.h"
#include "eigenlib.h"
-#include "lapack.h"
#include "gzstream.h"
+#include "io.h"
+#include "lapack.h"
#include "lmm.h"
using namespace std;
-void LMM::CopyFromParam (PARAM &cPar) {
- a_mode=cPar.a_mode;
- d_pace=cPar.d_pace;
+void LMM::CopyFromParam(PARAM &cPar) {
+ a_mode = cPar.a_mode;
+ d_pace = cPar.d_pace;
- file_bfile=cPar.file_bfile;
- file_geno=cPar.file_geno;
- file_out=cPar.file_out;
- path_out=cPar.path_out;
- file_gene=cPar.file_gene;
+ file_bfile = cPar.file_bfile;
+ file_geno = cPar.file_geno;
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
+ file_gene = cPar.file_gene;
- // WJA added.
- file_oxford=cPar.file_oxford;
+ // WJA added.
+ file_oxford = cPar.file_oxford;
- l_min=cPar.l_min;
- l_max=cPar.l_max;
- n_region=cPar.n_region;
- l_mle_null=cPar.l_mle_null;
- logl_mle_H0=cPar.logl_mle_H0;
+ l_min = cPar.l_min;
+ l_max = cPar.l_max;
+ n_region = cPar.n_region;
+ l_mle_null = cPar.l_mle_null;
+ logl_mle_H0 = cPar.logl_mle_H0;
- time_UtX=0.0;
- time_opt=0.0;
+ time_UtX = 0.0;
+ time_opt = 0.0;
- ni_total=cPar.ni_total;
- ns_total=cPar.ns_total;
- ni_test=cPar.ni_test;
- ns_test=cPar.ns_test;
- n_cvt=cPar.n_cvt;
+ ni_total = cPar.ni_total;
+ ns_total = cPar.ns_total;
+ ni_test = cPar.ni_test;
+ ns_test = cPar.ns_test;
+ n_cvt = cPar.n_cvt;
- ng_total=cPar.ng_total;
- ng_test=0;
+ ng_total = cPar.ng_total;
+ ng_test = 0;
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
- snpInfo=cPar.snpInfo;
+ indicator_idv = cPar.indicator_idv;
+ indicator_snp = cPar.indicator_snp;
+ snpInfo = cPar.snpInfo;
- return;
+ return;
}
-void LMM::CopyToParam (PARAM &cPar) {
- cPar.time_UtX=time_UtX;
- cPar.time_opt=time_opt;
+void LMM::CopyToParam(PARAM &cPar) {
+ cPar.time_UtX = time_UtX;
+ cPar.time_opt = time_opt;
- cPar.ng_test=ng_test;
+ cPar.ng_test = ng_test;
- return;
+ return;
}
-void LMM::WriteFiles () {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".assoc.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- if (!file_gene.empty()) {
- outfile<<"geneID"<<"\t";
-
- if (a_mode==1) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<
- "\t"<<"p_wald"<<endl;
- } else if (a_mode==2) {
- outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl;
- } else if (a_mode==3) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
- } else if (a_mode==4) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<
- "\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<
- "\t"<<"p_score"<<endl;
- } else {}
-
- for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
- outfile<<snpInfo[t].rs_number<<"\t";
-
- if (a_mode==1) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<
- sumStat[t].lambda_remle<<"\t"<<
- sumStat[t].p_wald <<endl;
- } else if (a_mode==2) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].lambda_mle<<"\t"<<
- sumStat[t].p_lrt<<endl;
- } else if (a_mode==3) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].p_score<<endl;
- } else if (a_mode==4) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<
- sumStat[t].lambda_remle<<"\t"<<
- sumStat[t].lambda_mle<<"\t"<<
- sumStat[t].p_wald <<"\t"<<
- sumStat[t].p_lrt<<"\t"<<
- sumStat[t].p_score<<endl;
- } else {}
- }
- } else {
- outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"
- <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
- if (a_mode==1) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"
- <<"p_wald"<<endl;
- } else if (a_mode==2) {
- outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl;
- } else if (a_mode==3) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl;
- } else if (a_mode==4) {
- outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"
- <<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<
- "\t"<<"p_score"<<endl;
- } else {}
-
- size_t t=0;
- for (size_t i=0; i<snpInfo.size(); ++i) {
- if (indicator_snp[i]==0) {continue;}
-
- outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<
- "\t"<<snpInfo[i].base_position<<"\t"<<
- snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<
- snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<
- snpInfo[i].maf<<"\t";
-
- if (a_mode==1) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].lambda_remle<<"\t"<<
- sumStat[t].p_wald <<endl;
- } else if (a_mode==2) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].lambda_mle<<"\t"<<
- sumStat[t].p_lrt<<endl;
- } else if (a_mode==3) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].p_score<<endl;
- } else if (a_mode==4) {
- outfile<<scientific<<setprecision(6)<<
- sumStat[t].beta<<"\t"<<sumStat[t].se<<
- "\t"<<sumStat[t].lambda_remle<<"\t"<<
- sumStat[t].lambda_mle<<"\t"<<
- sumStat[t].p_wald <<"\t"<<
- sumStat[t].p_lrt<<"\t"<<
- sumStat[t].p_score<<endl;
- } else {}
- t++;
- }
- }
-
- outfile.close();
- outfile.clear();
- return;
+void LMM::WriteFiles() {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".assoc.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ if (!file_gene.empty()) {
+ outfile << "geneID"
+ << "\t";
+
+ if (a_mode == 1) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "l_remle"
+ << "\t"
+ << "p_wald" << endl;
+ } else if (a_mode == 2) {
+ outfile << "l_mle"
+ << "\t"
+ << "p_lrt" << endl;
+ } else if (a_mode == 3) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "p_score" << endl;
+ } else if (a_mode == 4) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "l_remle"
+ << "\t"
+ << "l_mle"
+ << "\t"
+ << "p_wald"
+ << "\t"
+ << "p_lrt"
+ << "\t"
+ << "p_score" << endl;
+ } else {
+ }
+
+ for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) {
+ outfile << snpInfo[t].rs_number << "\t";
+
+ if (a_mode == 1) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
+ << sumStat[t].p_wald << endl;
+ } else if (a_mode == 2) {
+ outfile << scientific << setprecision(6) << sumStat[t].lambda_mle
+ << "\t" << sumStat[t].p_lrt << endl;
+ } else if (a_mode == 3) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
+ } else if (a_mode == 4) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
+ << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t"
+ << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
+ } else {
+ }
+ }
+ } else {
+ outfile << "chr"
+ << "\t"
+ << "rs"
+ << "\t"
+ << "ps"
+ << "\t"
+ << "n_miss"
+ << "\t"
+ << "allele1"
+ << "\t"
+ << "allele0"
+ << "\t"
+ << "af"
+ << "\t";
+
+ if (a_mode == 1) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "l_remle"
+ << "\t"
+ << "p_wald" << endl;
+ } else if (a_mode == 2) {
+ outfile << "l_mle"
+ << "\t"
+ << "p_lrt" << endl;
+ } else if (a_mode == 3) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "p_score" << endl;
+ } else if (a_mode == 4) {
+ outfile << "beta"
+ << "\t"
+ << "se"
+ << "\t"
+ << "l_remle"
+ << "\t"
+ << "l_mle"
+ << "\t"
+ << "p_wald"
+ << "\t"
+ << "p_lrt"
+ << "\t"
+ << "p_score" << endl;
+ } else {
+ }
+
+ size_t t = 0;
+ for (size_t i = 0; i < snpInfo.size(); ++i) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+ << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
+ << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t"
+ << fixed << setprecision(3) << snpInfo[i].maf << "\t";
+
+ if (a_mode == 1) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
+ << sumStat[t].p_wald << endl;
+ } else if (a_mode == 2) {
+ outfile << scientific << setprecision(6) << sumStat[t].lambda_mle
+ << "\t" << sumStat[t].p_lrt << endl;
+ } else if (a_mode == 3) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].p_score << endl;
+ } else if (a_mode == 4) {
+ outfile << scientific << setprecision(6) << sumStat[t].beta << "\t"
+ << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t"
+ << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t"
+ << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl;
+ } else {
+ }
+ t++;
+ }
+ }
+
+ outfile.close();
+ outfile.clear();
+ return;
}
-void CalcPab (const size_t n_cvt, const size_t e_mode,
- const gsl_vector *Hi_eval, const gsl_matrix *Uab,
- const gsl_vector *ab, gsl_matrix *Pab) {
- size_t index_ab, index_aw, index_bw, index_ww;
- double p_ab;
- double ps_ab, ps_aw, ps_bw, ps_ww;
-
- for (size_t p=0; p<=n_cvt+1; ++p) {
- for (size_t a=p+1; a<=n_cvt+2; ++a) {
- for (size_t b=a; b<=n_cvt+2; ++b) {
- index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
- gsl_vector_const_view Uab_col=
- gsl_matrix_const_column (Uab, index_ab);
- gsl_blas_ddot(Hi_eval,&Uab_col.vector,&p_ab);
- if (e_mode!=0) {
- p_ab=gsl_vector_get (ab, index_ab)-p_ab;
- }
- gsl_matrix_set (Pab, 0, index_ab, p_ab);
- }
- else {
- index_aw=GetabIndex (a, p, n_cvt);
- index_bw=GetabIndex (b, p, n_cvt);
- index_ww=GetabIndex (p, p, n_cvt);
-
- ps_ab=gsl_matrix_get (Pab, p-1, index_ab);
- ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
- ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
- ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-
- p_ab=ps_ab-ps_aw*ps_bw/ps_ww;
- gsl_matrix_set (Pab, p, index_ab, p_ab);
- }
- }
- }
- }
- return;
+void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval,
+ const gsl_matrix *Uab, const gsl_vector *ab, gsl_matrix *Pab) {
+ size_t index_ab, index_aw, index_bw, index_ww;
+ double p_ab;
+ double ps_ab, ps_aw, ps_bw, ps_ww;
+
+ for (size_t p = 0; p <= n_cvt + 1; ++p) {
+ for (size_t a = p + 1; a <= n_cvt + 2; ++a) {
+ for (size_t b = a; b <= n_cvt + 2; ++b) {
+ index_ab = GetabIndex(a, b, n_cvt);
+ if (p == 0) {
+ gsl_vector_const_view Uab_col =
+ gsl_matrix_const_column(Uab, index_ab);
+ gsl_blas_ddot(Hi_eval, &Uab_col.vector, &p_ab);
+ if (e_mode != 0) {
+ p_ab = gsl_vector_get(ab, index_ab) - p_ab;
+ }
+ gsl_matrix_set(Pab, 0, index_ab, p_ab);
+ } else {
+ index_aw = GetabIndex(a, p, n_cvt);
+ index_bw = GetabIndex(b, p, n_cvt);
+ index_ww = GetabIndex(p, p, n_cvt);
+
+ ps_ab = gsl_matrix_get(Pab, p - 1, index_ab);
+ ps_aw = gsl_matrix_get(Pab, p - 1, index_aw);
+ ps_bw = gsl_matrix_get(Pab, p - 1, index_bw);
+ ps_ww = gsl_matrix_get(Pab, p - 1, index_ww);
+
+ p_ab = ps_ab - ps_aw * ps_bw / ps_ww;
+ gsl_matrix_set(Pab, p, index_ab, p_ab);
+ }
+ }
+ }
+ }
+ return;
}
-void CalcPPab (const size_t n_cvt, const size_t e_mode,
- const gsl_vector *HiHi_eval, const gsl_matrix *Uab,
- const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) {
- size_t index_ab, index_aw, index_bw, index_ww;
- double p2_ab;
- double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww;
-
- for (size_t p=0; p<=n_cvt+1; ++p) {
- for (size_t a=p+1; a<=n_cvt+2; ++a) {
- for (size_t b=a; b<=n_cvt+2; ++b) {
- index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
- gsl_vector_const_view Uab_col=
- gsl_matrix_const_column (Uab, index_ab);
- gsl_blas_ddot (HiHi_eval, &Uab_col.vector,
- &p2_ab);
- if (e_mode!=0) {
- p2_ab=p2_ab-gsl_vector_get(ab,index_ab) +
- 2.0*gsl_matrix_get (Pab, 0, index_ab);
- }
- gsl_matrix_set (PPab, 0, index_ab, p2_ab);
- }
- else {
- index_aw=GetabIndex (a, p, n_cvt);
- index_bw=GetabIndex (b, p, n_cvt);
- index_ww=GetabIndex (p, p, n_cvt);
-
- ps2_ab=gsl_matrix_get (PPab, p-1, index_ab);
- ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
- ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
- ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
- ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
- ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
- ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
-
- p2_ab=ps2_ab+ps_aw*ps_bw*
- ps2_ww/(ps_ww*ps_ww);
- p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww;
- gsl_matrix_set (PPab, p, index_ab, p2_ab);
- }
- }
- }
- }
- return;
+void CalcPPab(const size_t n_cvt, const size_t e_mode,
+ const gsl_vector *HiHi_eval, const gsl_matrix *Uab,
+ const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) {
+ size_t index_ab, index_aw, index_bw, index_ww;
+ double p2_ab;
+ double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww;
+
+ for (size_t p = 0; p <= n_cvt + 1; ++p) {
+ for (size_t a = p + 1; a <= n_cvt + 2; ++a) {
+ for (size_t b = a; b <= n_cvt + 2; ++b) {
+ index_ab = GetabIndex(a, b, n_cvt);
+ if (p == 0) {
+ gsl_vector_const_view Uab_col =
+ gsl_matrix_const_column(Uab, index_ab);
+ gsl_blas_ddot(HiHi_eval, &Uab_col.vector, &p2_ab);
+ if (e_mode != 0) {
+ p2_ab = p2_ab - gsl_vector_get(ab, index_ab) +
+ 2.0 * gsl_matrix_get(Pab, 0, index_ab);
+ }
+ gsl_matrix_set(PPab, 0, index_ab, p2_ab);
+ } else {
+ index_aw = GetabIndex(a, p, n_cvt);
+ index_bw = GetabIndex(b, p, n_cvt);
+ index_ww = GetabIndex(p, p, n_cvt);
+
+ ps2_ab = gsl_matrix_get(PPab, p - 1, index_ab);
+ ps_aw = gsl_matrix_get(Pab, p - 1, index_aw);
+ ps_bw = gsl_matrix_get(Pab, p - 1, index_bw);
+ ps_ww = gsl_matrix_get(Pab, p - 1, index_ww);
+ ps2_aw = gsl_matrix_get(PPab, p - 1, index_aw);
+ ps2_bw = gsl_matrix_get(PPab, p - 1, index_bw);
+ ps2_ww = gsl_matrix_get(PPab, p - 1, index_ww);
+
+ p2_ab = ps2_ab + ps_aw * ps_bw * ps2_ww / (ps_ww * ps_ww);
+ p2_ab -= (ps_aw * ps2_bw + ps_bw * ps2_aw) / ps_ww;
+ gsl_matrix_set(PPab, p, index_ab, p2_ab);
+ }
+ }
+ }
+ }
+ return;
}
-void CalcPPPab (const size_t n_cvt, const size_t e_mode,
- const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab,
- const gsl_vector *ab, const gsl_matrix *Pab,
- const gsl_matrix *PPab, gsl_matrix *PPPab) {
- size_t index_ab, index_aw, index_bw, index_ww;
- double p3_ab;
- double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww,
- ps3_aw, ps3_bw, ps3_ww;
-
- for (size_t p=0; p<=n_cvt+1; ++p) {
- for (size_t a=p+1; a<=n_cvt+2; ++a) {
- for (size_t b=a; b<=n_cvt+2; ++b) {
- index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
- gsl_vector_const_view Uab_col=
- gsl_matrix_const_column (Uab, index_ab);
- gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector,
- &p3_ab);
- if (e_mode!=0) {
- p3_ab=gsl_vector_get (ab, index_ab)-
- p3_ab+3.0*gsl_matrix_get(PPab,0,index_ab)
- -3.0*gsl_matrix_get (Pab, 0, index_ab);
- }
- gsl_matrix_set (PPPab, 0, index_ab, p3_ab);
- }
- else {
- index_aw=GetabIndex (a, p, n_cvt);
- index_bw=GetabIndex (b, p, n_cvt);
- index_ww=GetabIndex (p, p, n_cvt);
-
- ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab);
- ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
- ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
- ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
- ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
- ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
- ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
- ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw);
- ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw);
- ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww);
-
- p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww
- /(ps_ww*ps_ww*ps_ww);
- p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw +
- ps2_aw*ps2_bw)/ps_ww;
- p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*
- ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/
- (ps_ww*ps_ww);
-
- gsl_matrix_set (PPPab, p, index_ab, p3_ab);
- }
- }
- }
- }
- return;
+void CalcPPPab(const size_t n_cvt, const size_t e_mode,
+ const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab,
+ const gsl_vector *ab, const gsl_matrix *Pab,
+ const gsl_matrix *PPab, gsl_matrix *PPPab) {
+ size_t index_ab, index_aw, index_bw, index_ww;
+ double p3_ab;
+ double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw,
+ ps3_ww;
+
+ for (size_t p = 0; p <= n_cvt + 1; ++p) {
+ for (size_t a = p + 1; a <= n_cvt + 2; ++a) {
+ for (size_t b = a; b <= n_cvt + 2; ++b) {
+ index_ab = GetabIndex(a, b, n_cvt);
+ if (p == 0) {
+ gsl_vector_const_view Uab_col =
+ gsl_matrix_const_column(Uab, index_ab);
+ gsl_blas_ddot(HiHiHi_eval, &Uab_col.vector, &p3_ab);
+ if (e_mode != 0) {
+ p3_ab = gsl_vector_get(ab, index_ab) - p3_ab +
+ 3.0 * gsl_matrix_get(PPab, 0, index_ab) -
+ 3.0 * gsl_matrix_get(Pab, 0, index_ab);
+ }
+ gsl_matrix_set(PPPab, 0, index_ab, p3_ab);
+ } else {
+ index_aw = GetabIndex(a, p, n_cvt);
+ index_bw = GetabIndex(b, p, n_cvt);
+ index_ww = GetabIndex(p, p, n_cvt);
+
+ ps3_ab = gsl_matrix_get(PPPab, p - 1, index_ab);
+ ps_aw = gsl_matrix_get(Pab, p - 1, index_aw);
+ ps_bw = gsl_matrix_get(Pab, p - 1, index_bw);
+ ps_ww = gsl_matrix_get(Pab, p - 1, index_ww);
+ ps2_aw = gsl_matrix_get(PPab, p - 1, index_aw);
+ ps2_bw = gsl_matrix_get(PPab, p - 1, index_bw);
+ ps2_ww = gsl_matrix_get(PPab, p - 1, index_ww);
+ ps3_aw = gsl_matrix_get(PPPab, p - 1, index_aw);
+ ps3_bw = gsl_matrix_get(PPPab, p - 1, index_bw);
+ ps3_ww = gsl_matrix_get(PPPab, p - 1, index_ww);
+
+ p3_ab = ps3_ab -
+ ps_aw * ps_bw * ps2_ww * ps2_ww / (ps_ww * ps_ww * ps_ww);
+ p3_ab -= (ps_aw * ps3_bw + ps_bw * ps3_aw + ps2_aw * ps2_bw) / ps_ww;
+ p3_ab += (ps_aw * ps2_bw * ps2_ww + ps_bw * ps2_aw * ps2_ww +
+ ps_aw * ps_bw * ps3_ww) /
+ (ps_ww * ps_ww);
+
+ gsl_matrix_set(PPPab, p, index_ab, p3_ab);
+ }
+ }
+ }
+ }
+ return;
}
-double LogL_f (double l, void *params) {
- FUNC_PARAM *p=(FUNC_PARAM *) params;
- size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- size_t nc_total;
- if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
- double f=0.0, logdet_h=0.0, d;
- size_t index_yy;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
- gsl_vector_memcpy (v_temp, p->eval);
- gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- for (size_t i=0; i<(p->eval)->size; ++i) {
- d=gsl_vector_get (v_temp, i);
- logdet_h+=log(fabs(d));
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-
- double c=0.5*(double)ni_test*(log((double)ni_test)-log(2*M_PI)-1.0);
-
- index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
- f=c-0.5*logdet_h-0.5*(double)ni_test*log(P_yy);
-
- gsl_matrix_free (Pab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (v_temp);
- return f;
+double LogL_f(double l, void *params) {
+ FUNC_PARAM *p = (FUNC_PARAM *)params;
+ size_t n_cvt = p->n_cvt;
+ size_t ni_test = p->ni_test;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ size_t nc_total;
+ if (p->calc_null == true) {
+ nc_total = n_cvt;
+ } else {
+ nc_total = n_cvt + 1;
+ }
+
+ double f = 0.0, logdet_h = 0.0, d;
+ size_t index_yy;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+ gsl_vector_memcpy(v_temp, p->eval);
+ gsl_vector_scale(v_temp, l);
+ if (p->e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ for (size_t i = 0; i < (p->eval)->size; ++i) {
+ d = gsl_vector_get(v_temp, i);
+ logdet_h += log(fabs(d));
+ }
+
+ CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+
+ double c =
+ 0.5 * (double)ni_test * (log((double)ni_test) - log(2 * M_PI) - 1.0);
+
+ index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
+ f = c - 0.5 * logdet_h - 0.5 * (double)ni_test * log(P_yy);
+
+ gsl_matrix_free(Pab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(v_temp);
+ return f;
}
-double LogL_dev1 (double l, void *params) {
- FUNC_PARAM *p=(FUNC_PARAM *) params;
- size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
+double LogL_dev1(double l, void *params) {
+ FUNC_PARAM *p = (FUNC_PARAM *)params;
+ size_t n_cvt = p->n_cvt;
+ size_t ni_test = p->ni_test;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ size_t nc_total;
+ if (p->calc_null == true) {
+ nc_total = n_cvt;
+ } else {
+ nc_total = n_cvt + 1;
+ }
+
+ double dev1 = 0.0, trace_Hi = 0.0;
+ size_t index_yy;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+ gsl_vector_memcpy(v_temp, p->eval);
+ gsl_vector_scale(v_temp, l);
+ if (p->e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ gsl_vector_memcpy(HiHi_eval, Hi_eval);
+ gsl_vector_mul(HiHi_eval, Hi_eval);
+
+ gsl_vector_set_all(v_temp, 1.0);
+ gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+
+ if (p->e_mode != 0) {
+ trace_Hi = (double)ni_test - trace_Hi;
+ }
+
+ CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+
+ double trace_HiK = ((double)ni_test - trace_Hi) / l;
+
+ index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+
+ double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
+ double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy);
+ double yPKPy = (P_yy - PP_yy) / l;
+ dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy;
+
+ gsl_matrix_free(Pab);
+ gsl_matrix_free(PPab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(HiHi_eval);
+ gsl_vector_free(v_temp);
+
+ return dev1;
+}
- size_t nc_total;
- if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
+double LogL_dev2(double l, void *params) {
+ FUNC_PARAM *p = (FUNC_PARAM *)params;
+ size_t n_cvt = p->n_cvt;
+ size_t ni_test = p->ni_test;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ size_t nc_total;
+ if (p->calc_null == true) {
+ nc_total = n_cvt;
+ } else {
+ nc_total = n_cvt + 1;
+ }
+
+ double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
+ size_t index_yy;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+ gsl_vector_memcpy(v_temp, p->eval);
+ gsl_vector_scale(v_temp, l);
+ if (p->e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ gsl_vector_memcpy(HiHi_eval, Hi_eval);
+ gsl_vector_mul(HiHi_eval, Hi_eval);
+ gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+ gsl_vector_mul(HiHiHi_eval, Hi_eval);
+
+ gsl_vector_set_all(v_temp, 1.0);
+ gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+ gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
+
+ if (p->e_mode != 0) {
+ trace_Hi = (double)ni_test - trace_Hi;
+ trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
+ }
+
+ CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
+ double trace_HiKHiK = ((double)ni_test + trace_HiHi - 2 * trace_Hi) / (l * l);
+
+ index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
+ double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy);
+ double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_yy);
+
+ double yPKPy = (P_yy - PP_yy) / l;
+ double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
+
+ dev2 = 0.5 * trace_HiKHiK -
+ 0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) /
+ (P_yy * P_yy);
+
+ gsl_matrix_free(Pab);
+ gsl_matrix_free(PPab);
+ gsl_matrix_free(PPPab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(HiHi_eval);
+ gsl_vector_free(HiHiHi_eval);
+ gsl_vector_free(v_temp);
+
+ return dev2;
+}
- double dev1=0.0, trace_Hi=0.0;
- size_t index_yy;
+void LogL_dev12(double l, void *params, double *dev1, double *dev2) {
+ FUNC_PARAM *p = (FUNC_PARAM *)params;
+ size_t n_cvt = p->n_cvt;
+ size_t ni_test = p->ni_test;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ size_t nc_total;
+ if (p->calc_null == true) {
+ nc_total = n_cvt;
+ } else {
+ nc_total = n_cvt + 1;
+ }
+
+ double trace_Hi = 0.0, trace_HiHi = 0.0;
+ size_t index_yy;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+ gsl_vector_memcpy(v_temp, p->eval);
+ gsl_vector_scale(v_temp, l);
+ if (p->e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ gsl_vector_memcpy(HiHi_eval, Hi_eval);
+ gsl_vector_mul(HiHi_eval, Hi_eval);
+ gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+ gsl_vector_mul(HiHiHi_eval, Hi_eval);
+
+ gsl_vector_set_all(v_temp, 1.0);
+ gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+ gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
+
+ if (p->e_mode != 0) {
+ trace_Hi = (double)ni_test - trace_Hi;
+ trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
+ }
+
+ CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
+ double trace_HiK = ((double)ni_test - trace_Hi) / l;
+ double trace_HiKHiK = ((double)ni_test + trace_HiHi - 2 * trace_Hi) / (l * l);
+
+ index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+
+ double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
+ double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy);
+ double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_yy);
+
+ double yPKPy = (P_yy - PP_yy) / l;
+ double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
+
+ *dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy;
+ *dev2 = 0.5 * trace_HiKHiK -
+ 0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) /
+ (P_yy * P_yy);
+
+ gsl_matrix_free(Pab);
+ gsl_matrix_free(PPab);
+ gsl_matrix_free(PPPab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(HiHi_eval);
+ gsl_vector_free(HiHiHi_eval);
+ gsl_vector_free(v_temp);
+
+ return;
+}
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
+double LogRL_f(double l, void *params) {
+ FUNC_PARAM *p = (FUNC_PARAM *)params;
+ size_t n_cvt = p->n_cvt;
+ size_t ni_test = p->ni_test;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ double df;
+ size_t nc_total;
+ if (p->calc_null == true) {
+ nc_total = n_cvt;
+ df = (double)ni_test - (double)n_cvt;
+ } else {
+ nc_total = n_cvt + 1;
+ df = (double)ni_test - (double)n_cvt - 1.0;
+ }
+
+ double f = 0.0, logdet_h = 0.0, logdet_hiw = 0.0, d;
+ size_t index_ww;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *Iab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+ gsl_vector_memcpy(v_temp, p->eval);
+ gsl_vector_scale(v_temp, l);
+ if (p->e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ for (size_t i = 0; i < (p->eval)->size; ++i) {
+ d = gsl_vector_get(v_temp, i);
+ logdet_h += log(fabs(d));
+ }
+
+ CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ gsl_vector_set_all(v_temp, 1.0);
+ CalcPab(n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
+
+ // Calculate |WHiW|-|WW|.
+ logdet_hiw = 0.0;
+ for (size_t i = 0; i < nc_total; ++i) {
+ index_ww = GetabIndex(i + 1, i + 1, n_cvt);
+ d = gsl_matrix_get(Pab, i, index_ww);
+ logdet_hiw += log(d);
+ d = gsl_matrix_get(Iab, i, index_ww);
+ logdet_hiw -= log(d);
+ }
+ index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, nc_total, index_ww);
+
+ double c = 0.5 * df * (log(df) - log(2 * M_PI) - 1.0);
+ f = c - 0.5 * logdet_h - 0.5 * logdet_hiw - 0.5 * df * log(P_yy);
+
+ gsl_matrix_free(Pab);
+ gsl_matrix_free(Iab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(v_temp);
+ return f;
+}
- gsl_vector_memcpy (v_temp, p->eval);
- gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
+double LogRL_dev1(double l, void *params) {
+ FUNC_PARAM *p = (FUNC_PARAM *)params;
+ size_t n_cvt = p->n_cvt;
+ size_t ni_test = p->ni_test;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ double df;
+ size_t nc_total;
+ if (p->calc_null == true) {
+ nc_total = n_cvt;
+ df = (double)ni_test - (double)n_cvt;
+ } else {
+ nc_total = n_cvt + 1;
+ df = (double)ni_test - (double)n_cvt - 1.0;
+ }
+
+ double dev1 = 0.0, trace_Hi = 0.0;
+ size_t index_ww;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+ gsl_vector_memcpy(v_temp, p->eval);
+ gsl_vector_scale(v_temp, l);
+ if (p->e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ gsl_vector_memcpy(HiHi_eval, Hi_eval);
+ gsl_vector_mul(HiHi_eval, Hi_eval);
+
+ gsl_vector_set_all(v_temp, 1.0);
+ gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+
+ if (p->e_mode != 0) {
+ trace_Hi = (double)ni_test - trace_Hi;
+ }
+
+ CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+
+ // Calculate tracePK and trace PKPK.
+ double trace_P = trace_Hi;
+ double ps_ww, ps2_ww;
+ for (size_t i = 0; i < nc_total; ++i) {
+ index_ww = GetabIndex(i + 1, i + 1, n_cvt);
+ ps_ww = gsl_matrix_get(Pab, i, index_ww);
+ ps2_ww = gsl_matrix_get(PPab, i, index_ww);
+ trace_P -= ps2_ww / ps_ww;
+ }
+ double trace_PK = (df - trace_P) / l;
+
+ // Calculate yPKPy, yPKPKPy.
+ index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, nc_total, index_ww);
+ double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww);
+ double yPKPy = (P_yy - PP_yy) / l;
+
+ dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy;
+
+ gsl_matrix_free(Pab);
+ gsl_matrix_free(PPab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(HiHi_eval);
+ gsl_vector_free(v_temp);
+
+ return dev1;
+}
- gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+double LogRL_dev2(double l, void *params) {
+ FUNC_PARAM *p = (FUNC_PARAM *)params;
+ size_t n_cvt = p->n_cvt;
+ size_t ni_test = p->ni_test;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ double df;
+ size_t nc_total;
+ if (p->calc_null == true) {
+ nc_total = n_cvt;
+ df = (double)ni_test - (double)n_cvt;
+ } else {
+ nc_total = n_cvt + 1;
+ df = (double)ni_test - (double)n_cvt - 1.0;
+ }
+
+ double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
+ size_t index_ww;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+ gsl_vector_memcpy(v_temp, p->eval);
+ gsl_vector_scale(v_temp, l);
+ if (p->e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ gsl_vector_memcpy(HiHi_eval, Hi_eval);
+ gsl_vector_mul(HiHi_eval, Hi_eval);
+ gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+ gsl_vector_mul(HiHiHi_eval, Hi_eval);
+
+ gsl_vector_set_all(v_temp, 1.0);
+ gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+ gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
+
+ if (p->e_mode != 0) {
+ trace_Hi = (double)ni_test - trace_Hi;
+ trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
+ }
+
+ CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
+ // Calculate tracePK and trace PKPK.
+ double trace_P = trace_Hi, trace_PP = trace_HiHi;
+ double ps_ww, ps2_ww, ps3_ww;
+ for (size_t i = 0; i < nc_total; ++i) {
+ index_ww = GetabIndex(i + 1, i + 1, n_cvt);
+ ps_ww = gsl_matrix_get(Pab, i, index_ww);
+ ps2_ww = gsl_matrix_get(PPab, i, index_ww);
+ ps3_ww = gsl_matrix_get(PPPab, i, index_ww);
+ trace_P -= ps2_ww / ps_ww;
+ trace_PP += ps2_ww * ps2_ww / (ps_ww * ps_ww) - 2.0 * ps3_ww / ps_ww;
+ }
+ double trace_PKPK = (df + trace_PP - 2.0 * trace_P) / (l * l);
+
+ // Calculate yPKPy, yPKPKPy.
+ index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, nc_total, index_ww);
+ double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww);
+ double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_ww);
+ double yPKPy = (P_yy - PP_yy) / l;
+ double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
+
+ dev2 = 0.5 * trace_PKPK -
+ 0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy);
+
+ gsl_matrix_free(Pab);
+ gsl_matrix_free(PPab);
+ gsl_matrix_free(PPPab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(HiHi_eval);
+ gsl_vector_free(HiHiHi_eval);
+ gsl_vector_free(v_temp);
+
+ return dev2;
+}
- gsl_vector_set_all (v_temp, 1.0);
- gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
+void LogRL_dev12(double l, void *params, double *dev1, double *dev2) {
+ FUNC_PARAM *p = (FUNC_PARAM *)params;
+ size_t n_cvt = p->n_cvt;
+ size_t ni_test = p->ni_test;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ double df;
+ size_t nc_total;
+ if (p->calc_null == true) {
+ nc_total = n_cvt;
+ df = (double)ni_test - (double)n_cvt;
+ } else {
+ nc_total = n_cvt + 1;
+ df = (double)ni_test - (double)n_cvt - 1.0;
+ }
+
+ double trace_Hi = 0.0, trace_HiHi = 0.0;
+ size_t index_ww;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+
+ gsl_vector_memcpy(v_temp, p->eval);
+ gsl_vector_scale(v_temp, l);
+ if (p->e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ gsl_vector_memcpy(HiHi_eval, Hi_eval);
+ gsl_vector_mul(HiHi_eval, Hi_eval);
+ gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+ gsl_vector_mul(HiHiHi_eval, Hi_eval);
+
+ gsl_vector_set_all(v_temp, 1.0);
+ gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi);
+ gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi);
+
+ if (p->e_mode != 0) {
+ trace_Hi = (double)ni_test - trace_Hi;
+ trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test;
+ }
+
+ CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
+ // Calculate tracePK and trace PKPK.
+ double trace_P = trace_Hi, trace_PP = trace_HiHi;
+ double ps_ww, ps2_ww, ps3_ww;
+ for (size_t i = 0; i < nc_total; ++i) {
+ index_ww = GetabIndex(i + 1, i + 1, n_cvt);
+ ps_ww = gsl_matrix_get(Pab, i, index_ww);
+ ps2_ww = gsl_matrix_get(PPab, i, index_ww);
+ ps3_ww = gsl_matrix_get(PPPab, i, index_ww);
+ trace_P -= ps2_ww / ps_ww;
+ trace_PP += ps2_ww * ps2_ww / (ps_ww * ps_ww) - 2.0 * ps3_ww / ps_ww;
+ }
+ double trace_PK = (df - trace_P) / l;
+ double trace_PKPK = (df + trace_PP - 2.0 * trace_P) / (l * l);
+
+ // Calculate yPKPy, yPKPKPy.
+ index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, nc_total, index_ww);
+ double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww);
+ double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_ww);
+ double yPKPy = (P_yy - PP_yy) / l;
+ double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l);
+
+ *dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy;
+ *dev2 = 0.5 * trace_PKPK -
+ 0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy);
+
+ gsl_matrix_free(Pab);
+ gsl_matrix_free(PPab);
+ gsl_matrix_free(PPPab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(HiHi_eval);
+ gsl_vector_free(HiHiHi_eval);
+ gsl_vector_free(v_temp);
+
+ return;
+}
- if (p->e_mode!=0) {trace_Hi=(double)ni_test-trace_Hi;}
+void LMM::CalcRLWald(const double &l, const FUNC_PARAM &params, double &beta,
+ double &se, double &p_wald) {
+ size_t n_cvt = params.n_cvt;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ int df = (int)ni_test - (int)n_cvt - 1;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
+ gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+
+ gsl_vector_memcpy(v_temp, params.eval);
+ gsl_vector_scale(v_temp, l);
+ if (params.e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ CalcPab(n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
+
+ size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ size_t index_xx = GetabIndex(n_cvt + 1, n_cvt + 1, n_cvt);
+ size_t index_xy = GetabIndex(n_cvt + 2, n_cvt + 1, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy);
+ double P_xx = gsl_matrix_get(Pab, n_cvt, index_xx);
+ double P_xy = gsl_matrix_get(Pab, n_cvt, index_xy);
+ double Px_yy = gsl_matrix_get(Pab, n_cvt + 1, index_yy);
+
+ beta = P_xy / P_xx;
+ double tau = (double)df / Px_yy;
+ se = sqrt(1.0 / (tau * P_xx));
+ p_wald = gsl_cdf_fdist_Q((P_yy - Px_yy) * tau, 1.0, df);
+
+ gsl_matrix_free(Pab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(v_temp);
+ return;
+}
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+void LMM::CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
+ double &se, double &p_score) {
+ size_t n_cvt = params.n_cvt;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ int df = (int)ni_test - (int)n_cvt - 1;
+
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
+ gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+
+ gsl_vector_memcpy(v_temp, params.eval);
+ gsl_vector_scale(v_temp, l);
+ if (params.e_mode == 0) {
+ gsl_vector_set_all(Hi_eval, 1.0);
+ } else {
+ gsl_vector_memcpy(Hi_eval, v_temp);
+ }
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ CalcPab(n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
+
+ size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ size_t index_xx = GetabIndex(n_cvt + 1, n_cvt + 1, n_cvt);
+ size_t index_xy = GetabIndex(n_cvt + 2, n_cvt + 1, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy);
+ double P_xx = gsl_matrix_get(Pab, n_cvt, index_xx);
+ double P_xy = gsl_matrix_get(Pab, n_cvt, index_xy);
+ double Px_yy = gsl_matrix_get(Pab, n_cvt + 1, index_yy);
+
+ beta = P_xy / P_xx;
+ double tau = (double)df / Px_yy;
+ se = sqrt(1.0 / (tau * P_xx));
+
+ p_score =
+ gsl_cdf_fdist_Q((double)ni_test * P_xy * P_xy / (P_yy * P_xx), 1.0, df);
+
+ gsl_matrix_free(Pab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(v_temp);
+ return;
+}
- double trace_HiK=((double)ni_test-trace_Hi)/l;
+void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
+ size_t index_ab;
+ size_t n_cvt = UtW->size2;
- index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ gsl_vector *u_a = gsl_vector_alloc(Uty->size);
- double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
- double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double yPKPy=(P_yy-PP_yy)/l;
- dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
+ for (size_t a = 1; a <= n_cvt + 2; ++a) {
+ if (a == n_cvt + 1) {
+ continue;
+ }
- gsl_matrix_free (Pab);
- gsl_matrix_free (PPab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (HiHi_eval);
- gsl_vector_free (v_temp);
+ if (a == n_cvt + 2) {
+ gsl_vector_memcpy(u_a, Uty);
+ } else {
+ gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, a - 1);
+ gsl_vector_memcpy(u_a, &UtW_col.vector);
+ }
- return dev1;
-}
+ for (size_t b = a; b >= 1; --b) {
+ if (b == n_cvt + 1) {
+ continue;
+ }
-double LogL_dev2 (double l, void *params) {
- FUNC_PARAM *p=(FUNC_PARAM *) params;
- size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- size_t nc_total;
- if (p->calc_null==true) {
- nc_total=n_cvt;
- } else {
- nc_total=n_cvt+1;
- }
-
- double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
- size_t index_yy;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
- gsl_vector_memcpy (v_temp, p->eval);
- gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
- gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
- gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
- gsl_vector_set_all (v_temp, 1.0);
- gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
- gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
- trace_Hi=(double)ni_test-trace_Hi;
- trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab,
- PPPab);
-
- double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
-
- index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
- double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
-
- double yPKPy=(P_yy-PP_yy)/l;
- double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
- dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*
- (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
- gsl_matrix_free (Pab);
- gsl_matrix_free (PPab);
- gsl_matrix_free (PPPab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (HiHi_eval);
- gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
- return dev2;
-}
+ index_ab = GetabIndex(a, b, n_cvt);
+ gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab);
-void LogL_dev12 (double l, void *params, double *dev1, double *dev2) {
- FUNC_PARAM *p=(FUNC_PARAM *) params;
- size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- size_t nc_total;
- if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
- double trace_Hi=0.0, trace_HiHi=0.0;
- size_t index_yy;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
- gsl_vector_memcpy (v_temp, p->eval);
- gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
- gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
- gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
- gsl_vector_set_all (v_temp, 1.0);
- gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
- gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
- trace_Hi=(double)ni_test-trace_Hi;
- trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab,
- PPPab);
-
- double trace_HiK=((double)ni_test-trace_Hi)/l;
- double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
-
- index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-
- double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
- double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
-
- double yPKPy=(P_yy-PP_yy)/l;
- double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
- *dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
- *dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*
- (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
- gsl_matrix_free (Pab);
- gsl_matrix_free (PPab);
- gsl_matrix_free (PPPab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (HiHi_eval);
- gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
- return;
-}
+ if (b == n_cvt + 2) {
+ gsl_vector_memcpy(&Uab_col.vector, Uty);
+ } else {
+ gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1);
+ gsl_vector_memcpy(&Uab_col.vector, &UtW_col.vector);
+ }
-double LogRL_f (double l, void *params) {
- FUNC_PARAM *p=(FUNC_PARAM *) params;
- size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- double df;
- size_t nc_total;
- if (p->calc_null==true) {
- nc_total=n_cvt; df=(double)ni_test-(double)n_cvt;
- }
- else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
- double f=0.0, logdet_h=0.0, logdet_hiw=0.0, d;
- size_t index_ww;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *Iab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
- gsl_vector_memcpy (v_temp, p->eval);
- gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- for (size_t i=0; i<(p->eval)->size; ++i) {
- d=gsl_vector_get (v_temp, i);
- logdet_h+=log(fabs(d));
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- gsl_vector_set_all (v_temp, 1.0);
- CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
-
- // Calculate |WHiW|-|WW|.
- logdet_hiw=0.0;
- for (size_t i=0; i<nc_total; ++i) {
- index_ww=GetabIndex (i+1, i+1, n_cvt);
- d=gsl_matrix_get (Pab, i, index_ww);
- logdet_hiw+=log(d);
- d=gsl_matrix_get (Iab, i, index_ww);
- logdet_hiw-=log(d);
- }
- index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
-
- double c=0.5*df*(log(df)-log(2*M_PI)-1.0);
- f=c-0.5*logdet_h-0.5*logdet_hiw-0.5*df*log(P_yy);
-
- gsl_matrix_free (Pab);
- gsl_matrix_free (Iab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (v_temp);
- return f;
-}
+ gsl_vector_mul(&Uab_col.vector, u_a);
+ }
+ }
-double LogRL_dev1 (double l, void *params) {
- FUNC_PARAM *p=(FUNC_PARAM *) params;
- size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- double df;
- size_t nc_total;
- if (p->calc_null==true) {
- nc_total=n_cvt;
- df=(double)ni_test-(double)n_cvt;
- }
- else {
- nc_total=n_cvt+1;
- df=(double)ni_test-(double)n_cvt-1.0;
- }
-
- double dev1=0.0, trace_Hi=0.0;
- size_t index_ww;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
- gsl_vector_memcpy (v_temp, p->eval);
- gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
-
- gsl_vector_set_all (v_temp, 1.0);
- gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-
- if (p->e_mode!=0) {
- trace_Hi=(double)ni_test-trace_Hi;
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-
- // Calculate tracePK and trace PKPK.
- double trace_P=trace_Hi;
- double ps_ww, ps2_ww;
- for (size_t i=0; i<nc_total; ++i) {
- index_ww=GetabIndex (i+1, i+1, n_cvt);
- ps_ww=gsl_matrix_get (Pab, i, index_ww);
- ps2_ww=gsl_matrix_get (PPab, i, index_ww);
- trace_P-=ps2_ww/ps_ww;
- }
- double trace_PK=(df-trace_P)/l;
-
- // Calculate yPKPy, yPKPKPy.
- index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
- double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
-
- dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
-
- gsl_matrix_free (Pab);
- gsl_matrix_free (PPab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (HiHi_eval);
- gsl_vector_free (v_temp);
-
- return dev1;
+ gsl_vector_free(u_a);
+ return;
}
-double LogRL_dev2 (double l, void *params) {
- FUNC_PARAM *p=(FUNC_PARAM *) params;
- size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- double df;
- size_t nc_total;
- if (p->calc_null==true) {
- nc_total=n_cvt;
- df=(double)ni_test-(double)n_cvt;
- }
- else {
- nc_total=n_cvt+1;
- df=(double)ni_test-(double)n_cvt-1.0;
- }
-
- double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
- size_t index_ww;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
- gsl_vector_memcpy (v_temp, p->eval);
- gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
- gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
- gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
- gsl_vector_set_all (v_temp, 1.0);
- gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
- gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
- trace_Hi=(double)ni_test-trace_Hi;
- trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab,
- PPab, PPPab);
-
- // Calculate tracePK and trace PKPK.
- double trace_P=trace_Hi, trace_PP=trace_HiHi;
- double ps_ww, ps2_ww, ps3_ww;
- for (size_t i=0; i<nc_total; ++i) {
- index_ww=GetabIndex (i+1, i+1, n_cvt);
- ps_ww=gsl_matrix_get (Pab, i, index_ww);
- ps2_ww=gsl_matrix_get (PPab, i, index_ww);
- ps3_ww=gsl_matrix_get (PPPab, i, index_ww);
- trace_P-=ps2_ww/ps_ww;
- trace_PP+=ps2_ww*ps2_ww/(ps_ww*ps_ww)-2.0*ps3_ww/ps_ww;
- }
- double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
-
- // Calculate yPKPy, yPKPKPy.
- index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
- double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
- double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
- dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
- gsl_matrix_free (Pab);
- gsl_matrix_free (PPab);
- gsl_matrix_free (PPPab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (HiHi_eval);
- gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
- return dev2;
-}
+void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_vector *Utx, gsl_matrix *Uab) {
+ size_t index_ab;
+ size_t n_cvt = UtW->size2;
+
+ for (size_t b = 1; b <= n_cvt + 2; ++b) {
+ index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
+ gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab);
+
+ if (b == n_cvt + 2) {
+ gsl_vector_memcpy(&Uab_col.vector, Uty);
+ } else if (b == n_cvt + 1) {
+ gsl_vector_memcpy(&Uab_col.vector, Utx);
+ } else {
+ gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1);
+ gsl_vector_memcpy(&Uab_col.vector, &UtW_col.vector);
+ }
-void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) {
- FUNC_PARAM *p=(FUNC_PARAM *) params;
- size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- double df;
- size_t nc_total;
- if (p->calc_null==true) {
- nc_total=n_cvt;
- df=(double)ni_test-(double)n_cvt;
- }
- else {
- nc_total=n_cvt+1;
- df=(double)ni_test-(double)n_cvt-1.0;
- }
-
- double trace_Hi=0.0, trace_HiHi=0.0;
- size_t index_ww;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
- gsl_vector_memcpy (v_temp, p->eval);
- gsl_vector_scale (v_temp, l);
- if (p->e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
- gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
- gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
- gsl_vector_set_all (v_temp, 1.0);
- gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
- gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
- trace_Hi=(double)ni_test-trace_Hi;
- trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab,
- PPab, PPPab);
-
- // Calculate tracePK and trace PKPK.
- double trace_P=trace_Hi, trace_PP=trace_HiHi;
- double ps_ww, ps2_ww, ps3_ww;
- for (size_t i=0; i<nc_total; ++i) {
- index_ww=GetabIndex (i+1, i+1, n_cvt);
- ps_ww=gsl_matrix_get (Pab, i, index_ww);
- ps2_ww=gsl_matrix_get (PPab, i, index_ww);
- ps3_ww=gsl_matrix_get (PPPab, i, index_ww);
- trace_P-=ps2_ww/ps_ww;
- trace_PP+=ps2_ww*ps2_ww/(ps_ww*ps_ww)-2.0*ps3_ww/ps_ww;
- }
- double trace_PK=(df-trace_P)/l;
- double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
-
- // Calculate yPKPy, yPKPKPy.
- index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
- double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
- double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
- *dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
- *dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/
- (P_yy*P_yy);
-
- gsl_matrix_free (Pab);
- gsl_matrix_free (PPab);
- gsl_matrix_free (PPPab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (HiHi_eval);
- gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
- return;
-}
+ gsl_vector_mul(&Uab_col.vector, Utx);
+ }
-void LMM::CalcRLWald (const double &l, const FUNC_PARAM &params,
- double &beta, double &se, double &p_wald) {
- size_t n_cvt=params.n_cvt;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- int df=(int)ni_test-(int)n_cvt-1;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size);
- gsl_vector *v_temp=gsl_vector_alloc(params.eval->size);
-
- gsl_vector_memcpy (v_temp, params.eval);
- gsl_vector_scale (v_temp, l);
- if (params.e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt);
- size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt);
- double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
- double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx);
- double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
- double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
-
- beta=P_xy/P_xx;
- double tau=(double)df/Px_yy;
- se=sqrt(1.0/(tau*P_xx));
- p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df);
-
- gsl_matrix_free (Pab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (v_temp);
- return;
+ return;
}
-void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params,
- double &beta, double &se, double &p_score) {
- size_t n_cvt=params.n_cvt;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- int df=(int)ni_test-(int)n_cvt-1;
-
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size);
- gsl_vector *v_temp=gsl_vector_alloc(params.eval->size);
-
- gsl_vector_memcpy (v_temp, params.eval);
- gsl_vector_scale (v_temp, l);
- if (params.e_mode==0) {
- gsl_vector_set_all (Hi_eval, 1.0);
- } else {
- gsl_vector_memcpy (Hi_eval, v_temp);
- }
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt);
- size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt);
- double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
- double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx);
- double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
- double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
-
- beta=P_xy/P_xx;
- double tau=(double)df/Px_yy;
- se=sqrt(1.0/(tau*P_xx));
-
- p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx),
- 1.0, df);
-
- gsl_matrix_free (Pab);
- gsl_vector_free (Hi_eval);
- gsl_vector_free (v_temp);
- return;
-}
+void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
+ size_t index_ab;
+ size_t n_cvt = W->size2;
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
- size_t index_ab;
- size_t n_cvt=UtW->size2;
-
- gsl_vector *u_a=gsl_vector_alloc (Uty->size);
-
- for (size_t a=1; a<=n_cvt+2; ++a) {
- if (a==n_cvt+1) {continue;}
-
- if (a==n_cvt+2) {gsl_vector_memcpy (u_a, Uty);}
- else {
- gsl_vector_const_view UtW_col=
- gsl_matrix_const_column (UtW, a-1);
- gsl_vector_memcpy (u_a, &UtW_col.vector);
- }
-
- for (size_t b=a; b>=1; --b) {
- if (b==n_cvt+1) {continue;}
-
- index_ab=GetabIndex (a, b, n_cvt);
- gsl_vector_view Uab_col=
- gsl_matrix_column (Uab, index_ab);
-
- if (b==n_cvt+2) {
- gsl_vector_memcpy (&Uab_col.vector, Uty);
- }
- else {
- gsl_vector_const_view UtW_col=
- gsl_matrix_const_column (UtW, b-1);
- gsl_vector_memcpy (&Uab_col.vector,
- &UtW_col.vector);
- }
-
- gsl_vector_mul(&Uab_col.vector, u_a);
- }
- }
-
- gsl_vector_free (u_a);
- return;
-}
+ double d;
+ gsl_vector *v_a = gsl_vector_alloc(y->size);
+ gsl_vector *v_b = gsl_vector_alloc(y->size);
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_vector *Utx, gsl_matrix *Uab) {
- size_t index_ab;
- size_t n_cvt=UtW->size2;
-
- for (size_t b=1; b<=n_cvt+2; ++b) {
- index_ab=GetabIndex (n_cvt+1, b, n_cvt);
- gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab);
-
- if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);}
- else if (b==n_cvt+1) {
- gsl_vector_memcpy (&Uab_col.vector, Utx);
- }
- else {
- gsl_vector_const_view UtW_col=
- gsl_matrix_const_column (UtW, b-1);
- gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
- }
-
- gsl_vector_mul(&Uab_col.vector, Utx);
- }
-
- return;
-}
+ for (size_t a = 1; a <= n_cvt + 2; ++a) {
+ if (a == n_cvt + 1) {
+ continue;
+ }
-void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
- size_t index_ab;
- size_t n_cvt=W->size2;
-
- double d;
- gsl_vector *v_a=gsl_vector_alloc (y->size);
- gsl_vector *v_b=gsl_vector_alloc (y->size);
-
- for (size_t a=1; a<=n_cvt+2; ++a) {
- if (a==n_cvt+1) {continue;}
-
- if (a==n_cvt+2) {
- gsl_vector_memcpy (v_a, y);
- }
- else {
- gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1);
- gsl_vector_memcpy (v_a, &W_col.vector);
- }
-
- for (size_t b=a; b>=1; --b) {
- if (b==n_cvt+1) {continue;}
-
- index_ab=GetabIndex (a, b, n_cvt);
-
- if (b==n_cvt+2) {
- gsl_vector_memcpy (v_b, y);
- }
- else {
- gsl_vector_const_view W_col=
- gsl_matrix_const_column (W, b-1);
- gsl_vector_memcpy (v_b, &W_col.vector);
- }
-
- gsl_blas_ddot (v_a, v_b, &d);
- gsl_vector_set(ab, index_ab, d);
- }
- }
-
- gsl_vector_free (v_a);
- gsl_vector_free (v_b);
- return;
+ if (a == n_cvt + 2) {
+ gsl_vector_memcpy(v_a, y);
+ } else {
+ gsl_vector_const_view W_col = gsl_matrix_const_column(W, a - 1);
+ gsl_vector_memcpy(v_a, &W_col.vector);
+ }
+
+ for (size_t b = a; b >= 1; --b) {
+ if (b == n_cvt + 1) {
+ continue;
+ }
+
+ index_ab = GetabIndex(a, b, n_cvt);
+
+ if (b == n_cvt + 2) {
+ gsl_vector_memcpy(v_b, y);
+ } else {
+ gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1);
+ gsl_vector_memcpy(v_b, &W_col.vector);
+ }
+
+ gsl_blas_ddot(v_a, v_b, &d);
+ gsl_vector_set(ab, index_ab, d);
+ }
+ }
+
+ gsl_vector_free(v_a);
+ gsl_vector_free(v_b);
+ return;
}
-void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
- gsl_vector *ab) {
- size_t index_ab;
- size_t n_cvt=W->size2;
+void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
+ gsl_vector *ab) {
+ size_t index_ab;
+ size_t n_cvt = W->size2;
- double d;
- gsl_vector *v_b=gsl_vector_alloc (y->size);
+ double d;
+ gsl_vector *v_b = gsl_vector_alloc(y->size);
- for (size_t b=1; b<=n_cvt+2; ++b) {
- index_ab=GetabIndex (n_cvt+1, b, n_cvt);
+ for (size_t b = 1; b <= n_cvt + 2; ++b) {
+ index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
- if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);}
- else if (b==n_cvt+1) {gsl_vector_memcpy (v_b, x);}
- else {
- gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
- gsl_vector_memcpy (v_b, &W_col.vector);
- }
+ if (b == n_cvt + 2) {
+ gsl_vector_memcpy(v_b, y);
+ } else if (b == n_cvt + 1) {
+ gsl_vector_memcpy(v_b, x);
+ } else {
+ gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1);
+ gsl_vector_memcpy(v_b, &W_col.vector);
+ }
- gsl_blas_ddot (x, v_b, &d);
- gsl_vector_set(ab, index_ab, d);
- }
+ gsl_blas_ddot(x, v_b, &d);
+ gsl_vector_set(ab, index_ab, d);
+ }
- gsl_vector_free (v_b);
- return;
+ gsl_vector_free(v_b);
+ return;
}
-void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Utx,
- const gsl_matrix *W, const gsl_vector *x) {
- igzstream infile (file_gene.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading gene expression file:"<<file_gene<<endl;
- return;
- }
-
- clock_t time_start=clock();
-
- string line;
- char *ch_ptr;
-
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
- double p_lrt=0, p_score=0;
- double logl_H1=0.0, logl_H0=0.0, l_H0;
- int c_phen;
- string rs; // Gene id.
- double d;
-
- // Calculate basic quantities.
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_vector *y=gsl_vector_alloc (U->size1);
- gsl_vector *Uty=gsl_vector_alloc (U->size2);
- gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
- // Header.
- getline(infile, line);
-
- for (size_t t=0; t<ng_total; t++) {
- !safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==ng_total-1) {
- ProgressBar ("Performing Analysis ", t, ng_total-1);
- }
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- rs=ch_ptr;
-
- c_phen=0;
- for (size_t i=0; i<indicator_idv.size(); ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- d=atof(ch_ptr);
- gsl_vector_set(y, c_phen, d);
-
- c_phen++;
- }
-
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Calculate null.
- time_start=clock();
-
- gsl_matrix_set_zero (Uab);
-
- CalcUab (UtW, Uty, Uab);
- FUNC_PARAM param0={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
- if (a_mode==2 || a_mode==3 || a_mode==4) {
- CalcLambda('L', param0, l_min, l_max, n_region,
- l_H0, logl_H0);
- }
-
- // Calculate alternative.
- CalcUab(UtW, Uty, Utx, Uab);
- FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
- //3 is before 1.
- if (a_mode==3 || a_mode==4) {
- CalcRLScore (l_H0, param1, beta, se, p_score);
- }
-
- if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region,
- lambda_remle, logl_H1);
- CalcRLWald (lambda_remle, param1, beta, se, p_wald);
- }
-
- if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region,
- lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
- }
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score};
- sumStat.push_back(SNPs);
- }
- cout<<endl;
-
- gsl_vector_free (y);
- gsl_vector_free (Uty);
- gsl_matrix_free (Uab);
- gsl_vector_free (ab);
-
- infile.close();
- infile.clear();
+void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Utx,
+ const gsl_matrix *W, const gsl_vector *x) {
+ igzstream infile(file_gene.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading gene expression file:" << file_gene << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+
+ string line;
+ char *ch_ptr;
+
+ double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+ double p_lrt = 0, p_score = 0;
+ double logl_H1 = 0.0, logl_H0 = 0.0, l_H0;
+ int c_phen;
+ string rs; // Gene id.
+ double d;
+
+ // Calculate basic quantities.
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ gsl_vector *y = gsl_vector_alloc(U->size1);
+ gsl_vector *Uty = gsl_vector_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
+
+ // Header.
+ getline(infile, line);
+
+ for (size_t t = 0; t < ng_total; t++) {
+ !safeGetline(infile, line).eof();
+ if (t % d_pace == 0 || t == ng_total - 1) {
+ ProgressBar("Performing Analysis ", t, ng_total - 1);
+ }
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ rs = ch_ptr;
+
+ c_phen = 0;
+ for (size_t i = 0; i < indicator_idv.size(); ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ d = atof(ch_ptr);
+ gsl_vector_set(y, c_phen, d);
+
+ c_phen++;
+ }
+
+ time_start = clock();
+ gsl_blas_dgemv(CblasTrans, 1.0, U, y, 0.0, Uty);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Calculate null.
+ time_start = clock();
+
+ gsl_matrix_set_zero(Uab);
+
+ CalcUab(UtW, Uty, Uab);
+ FUNC_PARAM param0 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+ if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
+ CalcLambda('L', param0, l_min, l_max, n_region, l_H0, logl_H0);
+ }
+
+ // Calculate alternative.
+ CalcUab(UtW, Uty, Utx, Uab);
+ FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ CalcRLScore(l_H0, param1, beta, se, p_score);
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout << endl;
+
+ gsl_vector_free(y);
+ gsl_vector_free(Uty);
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+
+ infile.close();
+ infile.clear();
- return;
+ return;
}
-void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return;
- }
-
- clock_t time_start=clock();
-
- string line;
- char *ch_ptr;
-
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
- double p_lrt=0, p_score=0;
- double logl_H1=0.0;
- int n_miss, c_phen;
- double geno, x_mean;
-
- // Calculate basic quantities.
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_vector *x=gsl_vector_alloc (U->size1);
- gsl_vector *x_miss=gsl_vector_alloc (U->size1);
- gsl_vector *Utx=gsl_vector_alloc (U->size2);
- gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
-
- gsl_matrix_set_zero (Uab);
- CalcUab (UtW, Uty, Uab);
-
- //start reading genotypes and analyze
- size_t c=0, t_last=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (indicator_snp[t]==0) {continue;}
- t_last++;
- }
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;
- }
- else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {
- gsl_vector_set(x, i, x_mean);
- }
- }
-
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, x);
- c++;
-
- if (c%msize==0 || c==t_last) {
- size_t l=0;
- if (c%msize==0) {l=msize;} else {l=c%msize;}
-
- gsl_matrix_view Xlarge_sub=
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub=
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
- 0.0, &UtXlarge_sub.matrix);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Xlarge);
-
- for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=
- gsl_matrix_column (UtXlarge, i);
- gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
-
- CalcUab(UtW, Uty, Utx, Uab);
-
- time_start=clock();
- FUNC_PARAM param1=
- {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
- // 3 is before 1.
- if (a_mode==3 || a_mode==4) {
- CalcRLScore (l_mle_null, param1, beta, se, p_score);
- }
-
- if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region,
- lambda_remle, logl_H1);
- CalcRLWald (lambda_remle, param1, beta, se, p_wald);
- }
-
- if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region,
- lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
- }
-
- time_opt+=(clock()-time_start)/
- (double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score};
-
- sumStat.push_back(SNPs);
- }
- }
- }
- cout<<endl;
-
- gsl_vector_free (x);
- gsl_vector_free (x_miss);
- gsl_vector_free (Utx);
- gsl_matrix_free (Uab);
- gsl_vector_free (ab);
-
- gsl_matrix_free (Xlarge);
- gsl_matrix_free (UtXlarge);
-
- infile.close();
- infile.clear();
-
- return;
+void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+
+ string line;
+ char *ch_ptr;
+
+ double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+ double p_lrt = 0, p_score = 0;
+ double logl_H1 = 0.0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ // Calculate basic quantities.
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ gsl_vector *x = gsl_vector_alloc(U->size1);
+ gsl_vector *x_miss = gsl_vector_alloc(U->size1);
+ gsl_vector *Utx = gsl_vector_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix_set_zero(Xlarge);
+
+ gsl_matrix_set_zero(Uab);
+ CalcUab(UtW, Uty, Uab);
+
+ // start reading genotypes and analyze
+ size_t c = 0, t_last = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+ t_last++;
+ }
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+ geno = atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean);
+ }
+ }
+
+ gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, x);
+ c++;
+
+ if (c % msize == 0 || c == t_last) {
+ size_t l = 0;
+ if (c % msize == 0) {
+ l = msize;
+ } else {
+ l = c % msize;
+ }
+
+ gsl_matrix_view Xlarge_sub =
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub =
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+ time_start = clock();
+ eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_matrix_set_zero(Xlarge);
+
+ for (size_t i = 0; i < l; i++) {
+ gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+ gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
+
+ CalcUab(UtW, Uty, Utx, Uab);
+
+ time_start = clock();
+ FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ CalcRLScore(l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
+ logl_H1);
+ CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle,
+ p_wald, p_lrt, p_score};
+
+ sumStat.push_back(SNPs);
+ }
+ }
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+ gsl_vector_free(Utx);
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+
+ gsl_matrix_free(Xlarge);
+ gsl_matrix_free(UtXlarge);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
-void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y) {
- string file_bed=file_bfile+".bed";
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
- clock_t time_start=clock();
-
- char ch[1];
- bitset<8> b;
-
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
- double p_lrt=0, p_score=0;
- double logl_H1=0.0;
- int n_bit, n_miss, ci_total, ci_test;
- double geno, x_mean;
-
- // Calculate basic quantities.
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_vector *x=gsl_vector_alloc (U->size1);
- gsl_vector *Utx=gsl_vector_alloc (U->size2);
- gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
-
- gsl_matrix_set_zero (Uab);
- CalcUab (UtW, Uty, Uab);
-
- // Calculate n_bit and c, the number of bit for each SNP.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- size_t c=0, t_last=0;
- for (size_t t=0; t<snpInfo.size(); ++t) {
- if (indicator_snp[t]==0) {continue;}
- t_last++;
- }
- for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {
- ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
- break;
- }
- if (indicator_idv[ci_total]==0) {
- ci_total++;
- continue;
- }
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(x, ci_test, 2);
- x_mean+=2.0;
- }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
-
- ci_total++;
- ci_test++;
- }
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- geno=gsl_vector_get(x,i);
- if (geno==-9) {
- gsl_vector_set(x, i, x_mean);
- geno=x_mean;
- }
- }
-
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, x);
- c++;
-
- if (c%msize==0 || c==t_last) {
- size_t l=0;
- if (c%msize==0) {l=msize;} else {l=c%msize;}
-
- gsl_matrix_view Xlarge_sub=
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub=
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
- 0.0, &UtXlarge_sub.matrix);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Xlarge);
-
- for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=
- gsl_matrix_column (UtXlarge, i);
- gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
-
- CalcUab(UtW, Uty, Utx, Uab);
-
- time_start=clock();
- FUNC_PARAM param1={false, ni_test, n_cvt, eval,
- Uab, ab, 0};
-
- // 3 is before 1, for beta.
- if (a_mode==3 || a_mode==4) {
- CalcRLScore (l_mle_null, param1, beta, se, p_score);
- }
-
- if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region,
- lambda_remle, logl_H1);
- CalcRLWald (lambda_remle, param1, beta, se, p_wald);
- }
-
- if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region,
- lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
- }
-
- time_opt+=(clock()-time_start)/
- (double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score};
- sumStat.push_back(SNPs);
- }
- }
- }
- cout<<endl;
-
- gsl_vector_free (x);
- gsl_vector_free (Utx);
- gsl_matrix_free (Uab);
- gsl_vector_free (ab);
-
- gsl_matrix_free(Xlarge);
- gsl_matrix_free(UtXlarge);
-
- infile.close();
- infile.clear();
-
- return;
+void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y) {
+ string file_bed = file_bfile + ".bed";
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+
+ char ch[1];
+ bitset<8> b;
+
+ double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+ double p_lrt = 0, p_score = 0;
+ double logl_H1 = 0.0;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+
+ // Calculate basic quantities.
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ gsl_vector *x = gsl_vector_alloc(U->size1);
+ gsl_vector *Utx = gsl_vector_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix_set_zero(Xlarge);
+
+ gsl_matrix_set_zero(Uab);
+ CalcUab(UtW, Uty, Uab);
+
+ // Calculate n_bit and c, the number of bit for each SNP.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ size_t c = 0, t_last = 0;
+ for (size_t t = 0; t < snpInfo.size(); ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+ t_last++;
+ }
+ for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+ if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+ ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ x_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ ci_test = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean += 2.0;
+ } else {
+ gsl_vector_set(x, ci_test, 1);
+ x_mean += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(x, ci_test, 0);
+ } else {
+ gsl_vector_set(x, ci_test, -9);
+ n_miss++;
+ }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ geno = gsl_vector_get(x, i);
+ if (geno == -9) {
+ gsl_vector_set(x, i, x_mean);
+ geno = x_mean;
+ }
+ }
+
+ gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, x);
+ c++;
+
+ if (c % msize == 0 || c == t_last) {
+ size_t l = 0;
+ if (c % msize == 0) {
+ l = msize;
+ } else {
+ l = c % msize;
+ }
+
+ gsl_matrix_view Xlarge_sub =
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub =
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+ time_start = clock();
+ eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_matrix_set_zero(Xlarge);
+
+ for (size_t i = 0; i < l; i++) {
+ gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+ gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
+
+ CalcUab(UtW, Uty, Utx, Uab);
+
+ time_start = clock();
+ FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+ // 3 is before 1, for beta.
+ if (a_mode == 3 || a_mode == 4) {
+ CalcRLScore(l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
+ logl_H1);
+ CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle,
+ p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ }
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(Utx);
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+
+ gsl_matrix_free(Xlarge);
+ gsl_matrix_free(UtXlarge);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
// WJA added.
-void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y) {
- string file_bgen=file_oxford+".bgen";
- ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bgen file:"<<file_bgen<<endl;
- return;
- }
-
- clock_t time_start=clock();
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
- double p_lrt=0, p_score=0;
- double logl_H1=0.0;
- int n_miss, c_phen;
- double geno, x_mean;
-
- // Calculate basic quantities.
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_vector *x=gsl_vector_alloc (U->size1);
- gsl_vector *x_miss=gsl_vector_alloc (U->size1);
- gsl_vector *Utx=gsl_vector_alloc (U->size2);
- gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
-
- gsl_matrix_set_zero (Uab);
- CalcUab (UtW, Uty, Uab);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
- infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
- bgen_snp_block_offset-=4;
- infile.ignore(4+bgen_header_length-20);
- bgen_snp_block_offset-=4+bgen_header_length-20;
- infile.read(reinterpret_cast<char*>(&bgen_flags),4);
- bgen_snp_block_offset-=4;
- bool CompressedSNPBlocks=bgen_flags&0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
- double bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- std::cout << "Warning: WJA hard coded SNP missingness " <<
- "threshold of 10%"<<std::endl;
-
- // Start reading genotypes and analyze.
- size_t c=0, t_last=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (indicator_snp[t]==0) {continue;}
- t_last++;
- }
- for (size_t t=0; t<indicator_snp.size(); ++t)
- {
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // Read SNP header.
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char*>(&bgen_N),4);
- infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char*>(&bgen_LR),2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char*>(&bgen_LC),2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
- infile.read(reinterpret_cast<char*>(&bgen_LA),4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
-
- infile.read(reinterpret_cast<char*>(&bgen_LB),4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3*bgen_N];
-
- if (indicator_snp[t]==0) {
- if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- else
- bgen_P=6*bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if(CompressedSNPBlocks) {
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size=6*bgen_N;
-
- infile.read(reinterpret_cast<char*>(zipped_data),
- bgen_P);
-
- int result=
- uncompress(reinterpret_cast<Bytef*>(unzipped_data),
- reinterpret_cast<uLongf*>(&unzipped_data_size),
- reinterpret_cast<Bytef*>(zipped_data),
- static_cast<uLong> (bgen_P));
- assert(result == Z_OK);
-
- }
- else
- {
-
- bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
- }
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<bgen_N; ++i) {
- if (indicator_idv[i]==0) {continue;}
-
- bgen_geno_prob_AA=
- static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB=
- static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB=
- static_cast<double>(unzipped_data[i*3+2])/32768.0;
-
- // WJA.
- bgen_geno_prob_non_miss = bgen_geno_prob_AA +
- bgen_geno_prob_AB+bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss<0.9) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- }
- else {
-
- bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
- bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
- bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
- geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=static_cast<double>(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {
- gsl_vector_set(x, i, x_mean);
- }
- geno=gsl_vector_get(x, i);
- }
-
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, x);
- c++;
-
- if (c%msize==0 || c==t_last ) {
- size_t l=0;
- if (c%msize==0) {l=msize;} else {l=c%msize;}
-
- gsl_matrix_view Xlarge_sub=
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub=
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix,
- 0.0, &UtXlarge_sub.matrix);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Xlarge);
-
- for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=
- gsl_matrix_column (UtXlarge, i);
- gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
-
- CalcUab(UtW, Uty, Utx, Uab);
-
- time_start=clock();
- FUNC_PARAM param1={false,ni_test,n_cvt,eval,Uab,ab,0};
-
- // 3 is before 1.
- if (a_mode==3 || a_mode==4) {
- CalcRLScore (l_mle_null, param1, beta, se, p_score);
- }
-
- if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region,
- lambda_remle, logl_H1);
- CalcRLWald (lambda_remle, param1, beta, se, p_wald);
- }
-
- if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region,
- lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
- }
-
- time_opt+=(clock()-time_start)/
- (double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score};
- sumStat.push_back(SNPs);
- }
- }
- }
- cout<<endl;
-
- gsl_vector_free (x);
- gsl_vector_free (x_miss);
- gsl_vector_free (Utx);
- gsl_matrix_free (Uab);
- gsl_vector_free (ab);
-
- gsl_matrix_free(Xlarge);
- gsl_matrix_free(UtXlarge);
-
- infile.close();
- infile.clear();
-
- return;
+void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y) {
+ string file_bgen = file_oxford + ".bgen";
+ ifstream infile(file_bgen.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bgen file:" << file_bgen << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+ double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+ double p_lrt = 0, p_score = 0;
+ double logl_H1 = 0.0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ // Calculate basic quantities.
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ gsl_vector *x = gsl_vector_alloc(U->size1);
+ gsl_vector *x_miss = gsl_vector_alloc(U->size1);
+ gsl_vector *Utx = gsl_vector_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix_set_zero(Xlarge);
+
+ gsl_matrix_set_zero(Uab);
+ CalcUab(UtW, Uty, Uab);
+
+ // Read in header.
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+ bgen_snp_block_offset -= 4;
+ infile.ignore(4 + bgen_header_length - 20);
+ bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+ infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+ bgen_snp_block_offset -= 4;
+ bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
+ double bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ std::cout << "Warning: WJA hard coded SNP missingness "
+ << "threshold of 10%" << std::endl;
+
+ // Start reading genotypes and analyze.
+ size_t c = 0, t_last = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+ t_last++;
+ }
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // Read SNP header.
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+ uint16_t unzipped_data[3 * bgen_N];
+
+ if (indicator_snp[t] == 0) {
+ if (CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ else
+ bgen_P = 6 * bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+ if (CompressedSNPBlocks) {
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size = 6 * bgen_N;
+
+ infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+
+ int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+ reinterpret_cast<uLongf *>(&unzipped_data_size),
+ reinterpret_cast<Bytef *>(zipped_data),
+ static_cast<uLong>(bgen_P));
+ assert(result == Z_OK);
+
+ } else {
+
+ bgen_P = 6 * bgen_N;
+ infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+ }
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i = 0; i < bgen_N; ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+ bgen_geno_prob_AB =
+ static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+ bgen_geno_prob_BB =
+ static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+
+ // WJA.
+ bgen_geno_prob_non_miss =
+ bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss < 0.9) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+
+ bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+ geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+
+ x_mean /= static_cast<double>(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean);
+ }
+ geno = gsl_vector_get(x, i);
+ }
+
+ gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, x);
+ c++;
+
+ if (c % msize == 0 || c == t_last) {
+ size_t l = 0;
+ if (c % msize == 0) {
+ l = msize;
+ } else {
+ l = c % msize;
+ }
+
+ gsl_matrix_view Xlarge_sub =
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub =
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+ time_start = clock();
+ eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_matrix_set_zero(Xlarge);
+
+ for (size_t i = 0; i < l; i++) {
+ gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+ gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
+
+ CalcUab(UtW, Uty, Utx, Uab);
+
+ time_start = clock();
+ FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ CalcRLScore(l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
+ logl_H1);
+ CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle,
+ p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ }
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+ gsl_vector_free(Utx);
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+
+ gsl_matrix_free(Xlarge);
+ gsl_matrix_free(UtXlarge);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
-void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const double l_min, const double l_max,
- const size_t n_region,
- vector<pair<size_t, double> > &pos_loglr) {
- double logl_H0, logl_H1, log_lr, lambda0, lambda1;
+void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ const double l_min, const double l_max, const size_t n_region,
+ vector<pair<size_t, double>> &pos_loglr) {
+ double logl_H0, logl_H1, log_lr, lambda0, lambda1;
- gsl_vector *w=gsl_vector_alloc (Uty->size);
- gsl_matrix *Utw=gsl_matrix_alloc (Uty->size, 1);
- gsl_matrix *Uab=gsl_matrix_alloc (Uty->size, 6);
- gsl_vector *ab=gsl_vector_alloc (6);
+ gsl_vector *w = gsl_vector_alloc(Uty->size);
+ gsl_matrix *Utw = gsl_matrix_alloc(Uty->size, 1);
+ gsl_matrix *Uab = gsl_matrix_alloc(Uty->size, 6);
+ gsl_vector *ab = gsl_vector_alloc(6);
- gsl_vector_set_zero(ab);
- gsl_vector_set_all (w, 1.0);
- gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0);
- gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
+ gsl_vector_set_zero(ab);
+ gsl_vector_set_all(w, 1.0);
+ gsl_vector_view Utw_col = gsl_matrix_column(Utw, 0);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
- CalcUab (Utw, Uty, Uab);
- FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0};
+ CalcUab(Utw, Uty, Uab);
+ FUNC_PARAM param0 = {true, Uty->size, 1, K_eval, Uab, ab, 0};
- CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0);
+ CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0);
- for (size_t i=0; i<UtX->size2; ++i) {
- gsl_vector_const_view UtX_col=gsl_matrix_const_column (UtX, i);
- CalcUab(Utw, Uty, &UtX_col.vector, Uab);
- FUNC_PARAM param1={false, UtX->size1, 1, K_eval, Uab, ab, 0};
+ for (size_t i = 0; i < UtX->size2; ++i) {
+ gsl_vector_const_view UtX_col = gsl_matrix_const_column(UtX, i);
+ CalcUab(Utw, Uty, &UtX_col.vector, Uab);
+ FUNC_PARAM param1 = {false, UtX->size1, 1, K_eval, Uab, ab, 0};
- CalcLambda ('L', param1, l_min, l_max, n_region, lambda1,
- logl_H1);
- log_lr=logl_H1-logl_H0;
+ CalcLambda('L', param1, l_min, l_max, n_region, lambda1, logl_H1);
+ log_lr = logl_H1 - logl_H0;
- pos_loglr.push_back(make_pair(i,log_lr) );
- }
+ pos_loglr.push_back(make_pair(i, log_lr));
+ }
- gsl_vector_free (w);
- gsl_matrix_free (Utw);
- gsl_matrix_free (Uab);
- gsl_vector_free (ab);
+ gsl_vector_free(w);
+ gsl_matrix_free(Utw);
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
- return;
+ return;
}
-void CalcLambda (const char func_name, FUNC_PARAM &params,
- const double l_min, const double l_max,
- const size_t n_region, double &lambda, double &logf) {
- if (func_name!='R' && func_name!='L' && func_name!='r' &&
- func_name!='l') {
- cout << "func_name only takes 'R' or 'L': 'R' for " <<
- "log-restricted likelihood, 'L' for log-likelihood." << endl;
- return;
- }
-
- vector<pair<double, double> > lambda_lh;
-
- // Evaluate first-order derivates in different intervals.
- double lambda_l, lambda_h, lambda_interval=
- log(l_max/l_min)/(double)n_region;
- double dev1_l, dev1_h, logf_l, logf_h;
-
- for (size_t i=0; i<n_region; ++i) {
- lambda_l=l_min*exp(lambda_interval*i);
- lambda_h=l_min*exp(lambda_interval*(i+1.0));
-
- if (func_name=='R' || func_name=='r') {
- dev1_l=LogRL_dev1 (lambda_l, &params);
- dev1_h=LogRL_dev1 (lambda_h, &params);
- }
- else {
- dev1_l=LogL_dev1 (lambda_l, &params);
- dev1_h=LogL_dev1 (lambda_h, &params);
- }
-
- if (dev1_l*dev1_h<=0) {
- lambda_lh.push_back(make_pair(lambda_l, lambda_h));
- }
- }
-
- // If derivates do not change signs in any interval.
- if (lambda_lh.empty()) {
- if (func_name=='R' || func_name=='r') {
- logf_l=LogRL_f (l_min, &params);
- logf_h=LogRL_f (l_max, &params);
- }
- else {
- logf_l=LogL_f (l_min, &params);
- logf_h=LogL_f (l_max, &params);
- }
-
- if (logf_l>=logf_h) {
- lambda=l_min;
- logf=logf_l;
- } else {
- lambda=l_max;
- logf=logf_h;
- }
- }
- else {
-
- // If derivates change signs.
- int status;
- int iter=0, max_iter=100;
- double l, l_temp;
-
- gsl_function F;
- gsl_function_fdf FDF;
-
- F.params=&params;
- FDF.params=&params;
-
- if (func_name=='R' || func_name=='r') {
- F.function=&LogRL_dev1;
- FDF.f=&LogRL_dev1;
- FDF.df=&LogRL_dev2;
- FDF.fdf=&LogRL_dev12;
- }
- else {
- F.function=&LogL_dev1;
- FDF.f=&LogL_dev1;
- FDF.df=&LogL_dev2;
- FDF.fdf=&LogL_dev12;
- }
-
- const gsl_root_fsolver_type *T_f;
- gsl_root_fsolver *s_f;
- T_f=gsl_root_fsolver_brent;
- s_f=gsl_root_fsolver_alloc (T_f);
-
- const gsl_root_fdfsolver_type *T_fdf;
- gsl_root_fdfsolver *s_fdf;
- T_fdf=gsl_root_fdfsolver_newton;
- s_fdf=gsl_root_fdfsolver_alloc(T_fdf);
-
- for (vector<double>::size_type i=0; i<lambda_lh.size(); ++i) {
- lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second;
- gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h);
-
- do {
- iter++;
- status=gsl_root_fsolver_iterate (s_f);
- l=gsl_root_fsolver_root (s_f);
- lambda_l=gsl_root_fsolver_x_lower (s_f);
- lambda_h=gsl_root_fsolver_x_upper (s_f);
- status=gsl_root_test_interval(lambda_l,lambda_h,0,1e-1);
- }
- while (status==GSL_CONTINUE && iter<max_iter);
-
- iter=0;
-
- gsl_root_fdfsolver_set (s_fdf, &FDF, l);
-
- do {
- iter++;
- status=gsl_root_fdfsolver_iterate (s_fdf);
- l_temp=l;
- l=gsl_root_fdfsolver_root (s_fdf);
- status=gsl_root_test_delta (l, l_temp, 0, 1e-5);
- }
- while (status==GSL_CONTINUE &&
- iter<max_iter &&
- l>l_min && l<l_max);
-
- l=l_temp;
- if (l<l_min) {l=l_min;}
- if (l>l_max) {l=l_max;}
- if (func_name=='R' || func_name=='r') {
- logf_l=LogRL_f (l, &params);
- } else {
- logf_l=LogL_f (l, &params);
- }
-
- if (i==0) {logf=logf_l; lambda=l;}
- else if (logf<logf_l) {logf=logf_l; lambda=l;}
- else {}
- }
- gsl_root_fsolver_free (s_f);
- gsl_root_fdfsolver_free (s_fdf);
-
- if (func_name=='R' || func_name=='r') {
- logf_l=LogRL_f (l_min, &params);
- logf_h=LogRL_f (l_max, &params);
- }
- else {
- logf_l=LogL_f (l_min, &params);
- logf_h=LogL_f (l_max, &params);
- }
-
- if (logf_l>logf) {lambda=l_min; logf=logf_l;}
- if (logf_h>logf) {lambda=l_max; logf=logf_h;}
- }
-
- return;
+void CalcLambda(const char func_name, FUNC_PARAM &params, const double l_min,
+ const double l_max, const size_t n_region, double &lambda,
+ double &logf) {
+ if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+ func_name != 'l') {
+ cout << "func_name only takes 'R' or 'L': 'R' for "
+ << "log-restricted likelihood, 'L' for log-likelihood." << endl;
+ return;
+ }
+
+ vector<pair<double, double>> lambda_lh;
+
+ // Evaluate first-order derivates in different intervals.
+ double lambda_l, lambda_h,
+ lambda_interval = log(l_max / l_min) / (double)n_region;
+ double dev1_l, dev1_h, logf_l, logf_h;
+
+ for (size_t i = 0; i < n_region; ++i) {
+ lambda_l = l_min * exp(lambda_interval * i);
+ lambda_h = l_min * exp(lambda_interval * (i + 1.0));
+
+ if (func_name == 'R' || func_name == 'r') {
+ dev1_l = LogRL_dev1(lambda_l, &params);
+ dev1_h = LogRL_dev1(lambda_h, &params);
+ } else {
+ dev1_l = LogL_dev1(lambda_l, &params);
+ dev1_h = LogL_dev1(lambda_h, &params);
+ }
+
+ if (dev1_l * dev1_h <= 0) {
+ lambda_lh.push_back(make_pair(lambda_l, lambda_h));
+ }
+ }
+
+ // If derivates do not change signs in any interval.
+ if (lambda_lh.empty()) {
+ if (func_name == 'R' || func_name == 'r') {
+ logf_l = LogRL_f(l_min, &params);
+ logf_h = LogRL_f(l_max, &params);
+ } else {
+ logf_l = LogL_f(l_min, &params);
+ logf_h = LogL_f(l_max, &params);
+ }
+
+ if (logf_l >= logf_h) {
+ lambda = l_min;
+ logf = logf_l;
+ } else {
+ lambda = l_max;
+ logf = logf_h;
+ }
+ } else {
+
+ // If derivates change signs.
+ int status;
+ int iter = 0, max_iter = 100;
+ double l, l_temp;
+
+ gsl_function F;
+ gsl_function_fdf FDF;
+
+ F.params = &params;
+ FDF.params = &params;
+
+ if (func_name == 'R' || func_name == 'r') {
+ F.function = &LogRL_dev1;
+ FDF.f = &LogRL_dev1;
+ FDF.df = &LogRL_dev2;
+ FDF.fdf = &LogRL_dev12;
+ } else {
+ F.function = &LogL_dev1;
+ FDF.f = &LogL_dev1;
+ FDF.df = &LogL_dev2;
+ FDF.fdf = &LogL_dev12;
+ }
+
+ const gsl_root_fsolver_type *T_f;
+ gsl_root_fsolver *s_f;
+ T_f = gsl_root_fsolver_brent;
+ s_f = gsl_root_fsolver_alloc(T_f);
+
+ const gsl_root_fdfsolver_type *T_fdf;
+ gsl_root_fdfsolver *s_fdf;
+ T_fdf = gsl_root_fdfsolver_newton;
+ s_fdf = gsl_root_fdfsolver_alloc(T_fdf);
+
+ for (vector<double>::size_type i = 0; i < lambda_lh.size(); ++i) {
+ lambda_l = lambda_lh[i].first;
+ lambda_h = lambda_lh[i].second;
+ gsl_root_fsolver_set(s_f, &F, lambda_l, lambda_h);
+
+ do {
+ iter++;
+ status = gsl_root_fsolver_iterate(s_f);
+ l = gsl_root_fsolver_root(s_f);
+ lambda_l = gsl_root_fsolver_x_lower(s_f);
+ lambda_h = gsl_root_fsolver_x_upper(s_f);
+ status = gsl_root_test_interval(lambda_l, lambda_h, 0, 1e-1);
+ } while (status == GSL_CONTINUE && iter < max_iter);
+
+ iter = 0;
+
+ gsl_root_fdfsolver_set(s_fdf, &FDF, l);
+
+ do {
+ iter++;
+ status = gsl_root_fdfsolver_iterate(s_fdf);
+ l_temp = l;
+ l = gsl_root_fdfsolver_root(s_fdf);
+ status = gsl_root_test_delta(l, l_temp, 0, 1e-5);
+ } while (status == GSL_CONTINUE && iter < max_iter && l > l_min &&
+ l < l_max);
+
+ l = l_temp;
+ if (l < l_min) {
+ l = l_min;
+ }
+ if (l > l_max) {
+ l = l_max;
+ }
+ if (func_name == 'R' || func_name == 'r') {
+ logf_l = LogRL_f(l, &params);
+ } else {
+ logf_l = LogL_f(l, &params);
+ }
+
+ if (i == 0) {
+ logf = logf_l;
+ lambda = l;
+ } else if (logf < logf_l) {
+ logf = logf_l;
+ lambda = l;
+ } else {
+ }
+ }
+ gsl_root_fsolver_free(s_f);
+ gsl_root_fdfsolver_free(s_fdf);
+
+ if (func_name == 'R' || func_name == 'r') {
+ logf_l = LogRL_f(l_min, &params);
+ logf_h = LogRL_f(l_max, &params);
+ } else {
+ logf_l = LogL_f(l_min, &params);
+ logf_h = LogL_f(l_max, &params);
+ }
+
+ if (logf_l > logf) {
+ lambda = l_min;
+ logf = logf_l;
+ }
+ if (logf_h > logf) {
+ lambda = l_max;
+ logf = logf_h;
+ }
+ }
+
+ return;
}
// Calculate lambda in the null model.
-void CalcLambda (const char func_name, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const double l_min, const double l_max,
- const size_t n_region, double &lambda, double &logl_H0) {
- if (func_name!='R' && func_name!='L' && func_name!='r' &&
- func_name!='l') {
- cout<<"func_name only takes 'R' or 'L': 'R' for " <<
- "log-restricted likelihood, 'L' for log-likelihood." << endl;
- return;
- }
+void CalcLambda(const char func_name, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const double l_min, const double l_max, const size_t n_region,
+ double &lambda, double &logl_H0) {
+ if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+ func_name != 'l') {
+ cout << "func_name only takes 'R' or 'L': 'R' for "
+ << "log-restricted likelihood, 'L' for log-likelihood." << endl;
+ return;
+ }
- size_t n_cvt=UtW->size2, ni_test=UtW->size1;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
+ size_t n_cvt = UtW->size2, ni_test = UtW->size1;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
+ gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
- gsl_matrix_set_zero (Uab);
- CalcUab (UtW, Uty, Uab);
+ gsl_matrix_set_zero(Uab);
+ CalcUab(UtW, Uty, Uab);
- FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
+ FUNC_PARAM param0 = {true, ni_test, n_cvt, eval, Uab, ab, 0};
- CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0);
+ CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0);
- gsl_matrix_free(Uab);
- gsl_vector_free(ab);
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
- return;
+ return;
}
// Obtain REMLE estimate for PVE using lambda_remle.
-void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW,
- const gsl_vector *Uty, const double lambda,
- const double trace_G, double &pve, double &pve_se) {
- size_t n_cvt=UtW->size2, ni_test=UtW->size1;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
+void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_vector *Uty, const double lambda, const double trace_G,
+ double &pve, double &pve_se) {
+ size_t n_cvt = UtW->size2, ni_test = UtW->size1;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
+ gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
- gsl_matrix_set_zero (Uab);
- CalcUab (UtW, Uty, Uab);
+ gsl_matrix_set_zero(Uab);
+ CalcUab(UtW, Uty, Uab);
- FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
+ FUNC_PARAM param0 = {true, ni_test, n_cvt, eval, Uab, ab, 0};
- double se=sqrt(-1.0/LogRL_dev2 (lambda, &param0));
+ double se = sqrt(-1.0 / LogRL_dev2(lambda, &param0));
- pve=trace_G*lambda/(trace_G*lambda+1.0);
- pve_se=trace_G/((trace_G*lambda+1.0)*(trace_G*lambda+1.0))*se;
+ pve = trace_G * lambda / (trace_G * lambda + 1.0);
+ pve_se = trace_G / ((trace_G * lambda + 1.0) * (trace_G * lambda + 1.0)) * se;
- gsl_matrix_free (Uab);
- gsl_vector_free (ab);
- return;
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+ return;
}
// Obtain REML estimate for Vg and Ve using lambda_remle.
// Obtain beta and se(beta) for coefficients.
// ab is not used when e_mode==0.
-void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
- const gsl_vector *Uty, const double lambda,
- double &vg, double &ve, gsl_vector *beta,
- gsl_vector *se_beta) {
- size_t n_cvt=UtW->size2, ni_test=UtW->size1;
- size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
- gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
- gsl_vector *Hi_eval=gsl_vector_alloc(eval->size);
- gsl_vector *v_temp=gsl_vector_alloc(eval->size);
- gsl_matrix *HiW=gsl_matrix_alloc(eval->size, UtW->size2);
- gsl_matrix *WHiW=gsl_matrix_alloc(UtW->size2, UtW->size2);
- gsl_vector *WHiy=gsl_vector_alloc(UtW->size2);
- gsl_matrix *Vbeta=gsl_matrix_alloc(UtW->size2, UtW->size2);
-
- gsl_matrix_set_zero (Uab);
- CalcUab (UtW, Uty, Uab);
-
- gsl_vector_memcpy (v_temp, eval);
- gsl_vector_scale (v_temp, lambda);
- gsl_vector_set_all (Hi_eval, 1.0);
- gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- // Calculate beta.
- gsl_matrix_memcpy (HiW, UtW);
- for (size_t i=0; i<UtW->size2; i++) {
- gsl_vector_view HiW_col=gsl_matrix_column(HiW, i);
- gsl_vector_mul(&HiW_col.vector, Hi_eval);
- }
- gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW);
- gsl_blas_dgemv (CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
-
- int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (UtW->size2);
- LUDecomp (WHiW, pmt, &sig);
- LUSolve (WHiW, pmt, WHiy, beta);
- LUInvert (WHiW, pmt, Vbeta);
-
- // Calculate vg and ve.
- CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
-
- ve=P_yy/(double)(ni_test-n_cvt);
- vg=ve*lambda;
-
- // With ve, calculate se(beta).
- gsl_matrix_scale(Vbeta, ve);
-
- // Obtain se_beta.
- for (size_t i=0; i<Vbeta->size1; i++) {
- gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta,i,i)));
- }
-
- gsl_matrix_free(Uab);
- gsl_matrix_free(Pab);
- gsl_vector_free(ab);
- gsl_vector_free(Hi_eval);
- gsl_vector_free(v_temp);
- gsl_matrix_free(HiW);
- gsl_matrix_free(WHiW);
- gsl_vector_free(WHiy);
- gsl_matrix_free(Vbeta);
-
- gsl_permutation_free(pmt);
- return;
+void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_vector *Uty, const double lambda, double &vg,
+ double &ve, gsl_vector *beta, gsl_vector *se_beta) {
+ size_t n_cvt = UtW->size2, ni_test = UtW->size1;
+ size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+ gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
+ gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_alloc(eval->size);
+ gsl_vector *v_temp = gsl_vector_alloc(eval->size);
+ gsl_matrix *HiW = gsl_matrix_alloc(eval->size, UtW->size2);
+ gsl_matrix *WHiW = gsl_matrix_alloc(UtW->size2, UtW->size2);
+ gsl_vector *WHiy = gsl_vector_alloc(UtW->size2);
+ gsl_matrix *Vbeta = gsl_matrix_alloc(UtW->size2, UtW->size2);
+
+ gsl_matrix_set_zero(Uab);
+ CalcUab(UtW, Uty, Uab);
+
+ gsl_vector_memcpy(v_temp, eval);
+ gsl_vector_scale(v_temp, lambda);
+ gsl_vector_set_all(Hi_eval, 1.0);
+ gsl_vector_add_constant(v_temp, 1.0);
+ gsl_vector_div(Hi_eval, v_temp);
+
+ // Calculate beta.
+ gsl_matrix_memcpy(HiW, UtW);
+ for (size_t i = 0; i < UtW->size2; i++) {
+ gsl_vector_view HiW_col = gsl_matrix_column(HiW, i);
+ gsl_vector_mul(&HiW_col.vector, Hi_eval);
+ }
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW);
+ gsl_blas_dgemv(CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
+
+ int sig;
+ gsl_permutation *pmt = gsl_permutation_alloc(UtW->size2);
+ LUDecomp(WHiW, pmt, &sig);
+ LUSolve(WHiW, pmt, WHiy, beta);
+ LUInvert(WHiW, pmt, Vbeta);
+
+ // Calculate vg and ve.
+ CalcPab(n_cvt, 0, Hi_eval, Uab, ab, Pab);
+
+ size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
+ double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy);
+
+ ve = P_yy / (double)(ni_test - n_cvt);
+ vg = ve * lambda;
+
+ // With ve, calculate se(beta).
+ gsl_matrix_scale(Vbeta, ve);
+
+ // Obtain se_beta.
+ for (size_t i = 0; i < Vbeta->size1; i++) {
+ gsl_vector_set(se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i)));
+ }
+
+ gsl_matrix_free(Uab);
+ gsl_matrix_free(Pab);
+ gsl_vector_free(ab);
+ gsl_vector_free(Hi_eval);
+ gsl_vector_free(v_temp);
+ gsl_matrix_free(HiW);
+ gsl_matrix_free(WHiW);
+ gsl_vector_free(WHiy);
+ gsl_matrix_free(Vbeta);
+
+ gsl_permutation_free(pmt);
+ return;
}
-void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y,
- const gsl_vector *env) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return;
- }
-
- clock_t time_start=clock();
-
- string line;
- char *ch_ptr;
-
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
- double p_lrt=0, p_score=0;
- double logl_H1=0.0, logl_H0=0.0;
- int n_miss, c_phen;
- double geno, x_mean;
-
- // Calculate basic quantities.
- size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
-
- gsl_vector *x=gsl_vector_alloc (U->size1);
- gsl_vector *x_miss=gsl_vector_alloc (U->size1);
- gsl_vector *Utx=gsl_vector_alloc (U->size2);
- gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
- gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
- gsl_matrix_view UtW_expand_mat=
- gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
- gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
- gsl_vector_view UtW_expand_env=
- gsl_matrix_column(UtW_expand, UtW->size2);
- gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
- gsl_vector_view UtW_expand_x=
- gsl_matrix_column(UtW_expand, UtW->size2+1);
-
- // Start reading genotypes and analyze.
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- }
- else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {
- gsl_vector_set(x, i, x_mean);
- }
- geno=gsl_vector_get(x, i);
- if (x_mean>1) {
- gsl_vector_set(x, i, 2-geno);
- }
- }
-
- // Calculate statistics.
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0,
- &UtW_expand_x.vector);
- gsl_vector_mul (x, env);
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Uab);
- CalcUab (UtW_expand, Uty, Uab);
-
- if (a_mode==2 || a_mode==4) {
- FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
- CalcLambda ('L', param0, l_min, l_max, n_region,
- lambda_mle, logl_H0);
- }
-
- CalcUab(UtW_expand, Uty, Utx, Uab);
-
- time_start=clock();
- FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
-
- // 3 is before 1.
- if (a_mode==3 || a_mode==4) {
- CalcRLScore (l_mle_null, param1, beta, se, p_score);
- }
-
- if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region,
- lambda_remle, logl_H1);
- CalcRLWald (lambda_remle, param1, beta, se, p_wald);
- }
-
- if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region,
- lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
- }
-
- if (x_mean>1) {beta*=-1;}
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score};
- sumStat.push_back(SNPs);
- }
- cout<<endl;
-
- gsl_vector_free (x);
- gsl_vector_free (x_miss);
- gsl_vector_free (Utx);
- gsl_matrix_free (Uab);
- gsl_vector_free (ab);
-
- gsl_matrix_free (UtW_expand);
-
- infile.close();
- infile.clear();
-
- return;
+void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const gsl_vector *env) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+
+ string line;
+ char *ch_ptr;
+
+ double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+ double p_lrt = 0, p_score = 0;
+ double logl_H1 = 0.0, logl_H0 = 0.0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ // Calculate basic quantities.
+ size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
+
+ gsl_vector *x = gsl_vector_alloc(U->size1);
+ gsl_vector *x_miss = gsl_vector_alloc(U->size1);
+ gsl_vector *Utx = gsl_vector_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
+
+ gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+ gsl_matrix_view UtW_expand_mat =
+ gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+ gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
+ gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
+ gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1);
+
+ // Start reading genotypes and analyze.
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+ geno = atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean);
+ }
+ geno = gsl_vector_get(x, i);
+ if (x_mean > 1) {
+ gsl_vector_set(x, i, 2 - geno);
+ }
+ }
+
+ // Calculate statistics.
+ time_start = clock();
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+ gsl_vector_mul(x, env);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_matrix_set_zero(Uab);
+ CalcUab(UtW_expand, Uty, Uab);
+
+ if (a_mode == 2 || a_mode == 4) {
+ FUNC_PARAM param0 = {true, ni_test, n_cvt + 2, eval, Uab, ab, 0};
+ CalcLambda('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+ }
+
+ CalcUab(UtW_expand, Uty, Utx, Uab);
+
+ time_start = clock();
+ FUNC_PARAM param1 = {false, ni_test, n_cvt + 2, eval, Uab, ab, 0};
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ CalcRLScore(l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
+ }
+
+ if (x_mean > 1) {
+ beta *= -1;
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+ gsl_vector_free(Utx);
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+
+ gsl_matrix_free(UtW_expand);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
-void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y,
- const gsl_vector *env) {
- string file_bed=file_bfile+".bed";
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
- clock_t time_start=clock();
-
- char ch[1];
- bitset<8> b;
-
- double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0;
- double p_lrt=0, p_score=0;
- double logl_H1=0.0, logl_H0=0.0;
- int n_bit, n_miss, ci_total, ci_test;
- double geno, x_mean;
-
- // Calculate basic quantities.
- size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
-
- gsl_vector *x=gsl_vector_alloc (U->size1);
- gsl_vector *Utx=gsl_vector_alloc (U->size2);
- gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
- gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
- gsl_matrix_view UtW_expand_mat=
- gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
- gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
- gsl_vector_view UtW_expand_env=
- gsl_matrix_column(UtW_expand, UtW->size2);
- gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
- gsl_vector_view UtW_expand_x=
- gsl_matrix_column(UtW_expand, UtW->size2+1);
-
- // Calculate n_bit and c, the number of bit for each SNP.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {
- ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
- break;
- }
- if (indicator_idv[ci_total]==0) {
- ci_total++;
- continue;
- }
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(x, ci_test, 2);
- x_mean+=2.0;
- }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
-
- ci_total++;
- ci_test++;
- }
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- geno=gsl_vector_get(x,i);
- if (geno==-9) {
- gsl_vector_set(x, i, x_mean);
- geno=x_mean;
- }
- if (x_mean>1) {
- gsl_vector_set(x, i, 2-geno);
- }
- }
-
- // Calculate statistics.
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0,
- &UtW_expand_x.vector);
- gsl_vector_mul (x, env);
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Uab);
- CalcUab (UtW_expand, Uty, Uab);
-
- if (a_mode==2 || a_mode==4) {
- FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
- CalcLambda ('L', param0, l_min, l_max, n_region,
- lambda_mle, logl_H0);
- }
-
- CalcUab(UtW_expand, Uty, Utx, Uab);
-
- time_start=clock();
- FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
-
- // 3 is before 1, for beta.
- if (a_mode==3 || a_mode==4) {
- CalcRLScore (l_mle_null, param1, beta, se, p_score);
- }
-
- if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region,
- lambda_remle, logl_H1);
- CalcRLWald (lambda_remle, param1, beta, se, p_wald);
- }
-
- if (a_mode==2 || a_mode==4) {
- CalcLambda ('L', param1, l_min, l_max, n_region,
- lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
- }
-
- if (x_mean>1) {beta*=-1;}
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald,
- p_lrt, p_score};
- sumStat.push_back(SNPs);
- }
- cout<<endl;
-
- gsl_vector_free (x);
- gsl_vector_free (Utx);
- gsl_matrix_free (Uab);
- gsl_vector_free (ab);
-
- gsl_matrix_free (UtW_expand);
-
- infile.close();
- infile.clear();
-
- return;
+void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const gsl_vector *env) {
+ string file_bed = file_bfile + ".bed";
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+
+ char ch[1];
+ bitset<8> b;
+
+ double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+ double p_lrt = 0, p_score = 0;
+ double logl_H1 = 0.0, logl_H0 = 0.0;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+
+ // Calculate basic quantities.
+ size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
+
+ gsl_vector *x = gsl_vector_alloc(U->size1);
+ gsl_vector *Utx = gsl_vector_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_alloc(n_index);
+
+ gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+ gsl_matrix_view UtW_expand_mat =
+ gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+ gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
+ gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
+ gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1);
+
+ // Calculate n_bit and c, the number of bit for each SNP.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+ if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+ ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ x_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ ci_test = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean += 2.0;
+ } else {
+ gsl_vector_set(x, ci_test, 1);
+ x_mean += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(x, ci_test, 0);
+ } else {
+ gsl_vector_set(x, ci_test, -9);
+ n_miss++;
+ }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ geno = gsl_vector_get(x, i);
+ if (geno == -9) {
+ gsl_vector_set(x, i, x_mean);
+ geno = x_mean;
+ }
+ if (x_mean > 1) {
+ gsl_vector_set(x, i, 2 - geno);
+ }
+ }
+
+ // Calculate statistics.
+ time_start = clock();
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+ gsl_vector_mul(x, env);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_matrix_set_zero(Uab);
+ CalcUab(UtW_expand, Uty, Uab);
+
+ if (a_mode == 2 || a_mode == 4) {
+ FUNC_PARAM param0 = {true, ni_test, n_cvt + 2, eval, Uab, ab, 0};
+ CalcLambda('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+ }
+
+ CalcUab(UtW_expand, Uty, Utx, Uab);
+
+ time_start = clock();
+ FUNC_PARAM param1 = {false, ni_test, n_cvt + 2, eval, Uab, ab, 0};
+
+ // 3 is before 1, for beta.
+ if (a_mode == 3 || a_mode == 4) {
+ CalcRLScore(l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1);
+ }
+
+ if (x_mean > 1) {
+ beta *= -1;
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(Utx);
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+
+ gsl_matrix_free(UtW_expand);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
diff --git a/src/lmm.h b/src/lmm.h
index 9c3de9d..c393daf 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -19,120 +19,117 @@
#ifndef __LMM_H__
#define __LMM_H__
-#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
#include "io.h"
+#include "param.h"
using namespace std;
class FUNC_PARAM {
public:
- bool calc_null;
- size_t ni_test;
- size_t n_cvt;
- const gsl_vector *eval;
- const gsl_matrix *Uab;
- const gsl_vector *ab;
- size_t e_mode;
+ bool calc_null;
+ size_t ni_test;
+ size_t n_cvt;
+ const gsl_vector *eval;
+ const gsl_matrix *Uab;
+ const gsl_vector *ab;
+ size_t e_mode;
};
class LMM {
public:
- // IO-related parameters
- int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests.
- size_t d_pace; // Display pace.
-
- string file_bfile;
- string file_geno;
- string file_out;
- string path_out;
-
- string file_gene;
- // WJA added
- string file_oxford;
-
- // LMM related parameters
- double l_min;
- double l_max;
- size_t n_region;
- double l_mle_null;
- double logl_mle_H0;
-
- // Summary statistics
- size_t ni_total, ni_test; // Number of individuals.
- size_t ns_total, ns_test; // Number of SNPs.
- size_t ng_total, ng_test; // Number of genes.
- size_t n_cvt;
- double time_UtX; // Time spent on optimization iterations.
- double time_opt; // Time spent on optimization iterations.
-
- // Indicator for individuals (phenotypes): 0 missing, 1
- // available for analysis.
- vector<int> indicator_idv;
-
- // Sequence indicator for SNPs: 0 ignored because of (a) maf,
- // (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
-
- vector<SNPINFO> snpInfo; // Record SNP information.
-
- // Not included in PARAM.
- vector<SUMSTAT> sumStat; // Output SNPSummary Data.
-
- // Main functions.
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
- void AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Utx,
- const gsl_matrix *W, const gsl_vector *x);
- void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y);
- // WJA added.
- void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y);
- void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y);
- void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y,
- const gsl_vector *env);
- void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y,
- const gsl_vector *env);
- void WriteFiles ();
-
- void CalcRLWald (const double &lambda, const FUNC_PARAM &params,
- double &beta, double &se, double &p_wald);
- void CalcRLScore (const double &l, const FUNC_PARAM &params,
- double &beta, double &se, double &p_score);
+ // IO-related parameters
+ int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests.
+ size_t d_pace; // Display pace.
+
+ string file_bfile;
+ string file_geno;
+ string file_out;
+ string path_out;
+
+ string file_gene;
+ // WJA added
+ string file_oxford;
+
+ // LMM related parameters
+ double l_min;
+ double l_max;
+ size_t n_region;
+ double l_mle_null;
+ double logl_mle_H0;
+
+ // Summary statistics
+ size_t ni_total, ni_test; // Number of individuals.
+ size_t ns_total, ns_test; // Number of SNPs.
+ size_t ng_total, ng_test; // Number of genes.
+ size_t n_cvt;
+ double time_UtX; // Time spent on optimization iterations.
+ double time_opt; // Time spent on optimization iterations.
+
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_idv;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
+
+ vector<SNPINFO> snpInfo; // Record SNP information.
+
+ // Not included in PARAM.
+ vector<SUMSTAT> sumStat; // Output SNPSummary Data.
+
+ // Main functions.
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
+ void AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Utx,
+ const gsl_matrix *W, const gsl_vector *x);
+ void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y);
+ // WJA added.
+ void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y);
+ void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y);
+ void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const gsl_vector *env);
+ void AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const gsl_vector *env);
+ void WriteFiles();
+
+ void CalcRLWald(const double &lambda, const FUNC_PARAM &params, double &beta,
+ double &se, double &p_wald);
+ void CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
+ double &se, double &p_score);
};
-void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const double l_min, const double l_max,
- const size_t n_region,
- vector<pair<size_t, double> > &pos_loglr);
-void CalcLambda (const char func_name, FUNC_PARAM &params,
- const double l_min, const double l_max,
- const size_t n_region, double &lambda, double &logf);
-void CalcLambda (const char func_name, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const double l_min, const double l_max,
- const size_t n_region, double &lambda, double &logl_H0);
-void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW,
- const gsl_vector *Uty, const double lambda,
- const double trace_G, double &pve, double &pve_se);
-void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
- const gsl_vector *Uty, const double lambda, double &vg,
- double &ve, gsl_vector *beta, gsl_vector *se_beta);
+void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ const double l_min, const double l_max, const size_t n_region,
+ vector<pair<size_t, double>> &pos_loglr);
+void CalcLambda(const char func_name, FUNC_PARAM &params, const double l_min,
+ const double l_max, const size_t n_region, double &lambda,
+ double &logf);
+void CalcLambda(const char func_name, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const double l_min, const double l_max, const size_t n_region,
+ double &lambda, double &logl_H0);
+void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_vector *Uty, const double lambda, const double trace_G,
+ double &pve, double &pve_se);
+void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_vector *Uty, const double lambda, double &vg,
+ double &ve, gsl_vector *beta, gsl_vector *se_beta);
#endif
-
-
diff --git a/src/logistic.cpp b/src/logistic.cpp
index f9edc68..2308de7 100644
--- a/src/logistic.cpp
+++ b/src/logistic.cpp
@@ -1,15 +1,15 @@
-#include <stdio.h>
-#include <math.h>
+#include "logistic.h"
+#include <gsl/gsl_linalg.h>
#include <gsl/gsl_matrix.h>
-#include <gsl/gsl_rng.h>
#include <gsl/gsl_multimin.h>
+#include <gsl/gsl_rng.h>
#include <gsl/gsl_sf.h>
-#include <gsl/gsl_linalg.h>
-#include "logistic.h"
+#include <math.h>
+#include <stdio.h>
// I need to bundle all the data that goes to the function to optimze
// together.
-typedef struct{
+typedef struct {
gsl_matrix_int *X;
gsl_vector_int *nlev;
gsl_vector *y;
@@ -18,13 +18,9 @@ typedef struct{
double lambdaL2;
} fix_parm_mixed_T;
-double fLogit_mixed(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_matrix *Xc,
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2) {
+double fLogit_mixed(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+ gsl_matrix *Xc, gsl_vector *y, double lambdaL1,
+ double lambdaL2) {
int n = y->size;
int npar = beta->size;
double total = 0;
@@ -33,57 +29,56 @@ double fLogit_mixed(gsl_vector *beta,
// Changed loop start at 1 instead of 0 to avoid regularization of
// beta_0*\/
// #pragma omp parallel for reduction (+:total)
- for(int i = 1; i < npar; ++i)
- total += beta->data[i]*beta->data[i];
- total = (-total*lambdaL2/2);
+ for (int i = 1; i < npar; ++i)
+ total += beta->data[i] * beta->data[i];
+ total = (-total * lambdaL2 / 2);
// #pragma omp parallel for reduction (+:aux)
- for(int i = 1; i < npar; ++i)
- aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
- total = total-aux*lambdaL1;
+ for (int i = 1; i < npar; ++i)
+ aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]);
+ total = total - aux * lambdaL1;
// #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
// #reduction (+:total)
- for(int i = 0; i < n; ++i) {
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < X->size2; ++k) {
- if(gsl_matrix_int_get(X,i,k)>0)
- Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
- iParm+=nlev->data[k]-1;
+ for (int i = 0; i < n; ++i) {
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < X->size2; ++k) {
+ if (gsl_matrix_int_get(X, i, k) > 0)
+ Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
+ iParm += nlev->data[k] - 1;
}
- for(int k = 0; k < (Xc->size2); ++k)
- Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
- total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+ for (int k = 0; k < (Xc->size2); ++k)
+ Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
+ total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
}
return -total;
}
void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters
- // length = 1 + Sum_k(C_k -1)
- gsl_matrix_int *X, // Matrix Nobs x K
- gsl_vector_int *nlev, // Vector with number categories
- gsl_matrix *Xc, // Continuous covariates matrix:
- // obs x Kc (NULL if not used).
- gsl_vector *yhat){ // Vector of prob. predicted by
- // the logistic
- for(int i = 0; i < X->size1; ++i) {
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < X->size2; ++k) {
- if(gsl_matrix_int_get(X,i,k)>0)
- Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
- iParm+=nlev->data[k]-1;
+ // length = 1 + Sum_k(C_k -1)
+ gsl_matrix_int *X, // Matrix Nobs x K
+ gsl_vector_int *nlev, // Vector with number categories
+ gsl_matrix *Xc, // Continuous covariates matrix:
+ // obs x Kc (NULL if not used).
+ gsl_vector *yhat) { // Vector of prob. predicted by
+ // the logistic
+ for (int i = 0; i < X->size1; ++i) {
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < X->size2; ++k) {
+ if (gsl_matrix_int_get(X, i, k) > 0)
+ Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
+ iParm += nlev->data[k] - 1;
}
// Adding the continuous.
- for(int k = 0; k < (Xc->size2); ++k)
- Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
- yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+ for (int k = 0; k < (Xc->size2); ++k)
+ Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
+ yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai));
}
}
-
// The gradient of f, df = (df/dx, df/dy).
-void wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
- gsl_vector *out) {
+void wgsl_mixed_optim_df(const gsl_vector *beta, void *params,
+ gsl_vector *out) {
fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
int n = p->y->size;
int K = p->X->size2;
@@ -91,50 +86,49 @@ void wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
int npar = beta->size;
// Intitialize gradient out necessary?
- for(int i = 0; i < npar; ++i)
- out->data[i]= 0;
+ for (int i = 0; i < npar; ++i)
+ out->data[i] = 0;
// Changed loop start at 1 instead of 0 to avoid regularization of beta 0.
- for(int i = 1; i < npar; ++i)
- out->data[i]= p->lambdaL2*beta->data[i];
- for(int i = 1; i < npar; ++i)
- out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
-
- for(int i = 0; i < n; ++i) {
- double pn=0;
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < K; ++k) {
- if(gsl_matrix_int_get(p->X,i,k)>0)
- Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
- iParm+=p->nlev->data[k]-1;
+ for (int i = 1; i < npar; ++i)
+ out->data[i] = p->lambdaL2 * beta->data[i];
+ for (int i = 1; i < npar; ++i)
+ out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0));
+
+ for (int i = 0; i < n; ++i) {
+ double pn = 0;
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < K; ++k) {
+ if (gsl_matrix_int_get(p->X, i, k) > 0)
+ Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm];
+ iParm += p->nlev->data[k] - 1;
}
// Adding the continuous.
- for(int k = 0; k < Kc; ++k)
- Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
+ for (int k = 0; k < Kc; ++k)
+ Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm++];
- pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+ pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai)));
- out->data[0]+= pn;
- iParm=1;
- for(int k = 0; k < K; ++k) {
- if(gsl_matrix_int_get(p->X,i,k)>0)
- out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
- iParm+=p->nlev->data[k]-1;
+ out->data[0] += pn;
+ iParm = 1;
+ for (int k = 0; k < K; ++k) {
+ if (gsl_matrix_int_get(p->X, i, k) > 0)
+ out->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm] += pn;
+ iParm += p->nlev->data[k] - 1;
}
// Adding the continuous.
- for(int k = 0; k < Kc; ++k) {
- out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
+ for (int k = 0; k < Kc; ++k) {
+ out->data[iParm++] += gsl_matrix_get(p->Xc, i, k) * pn;
}
}
-
}
// The Hessian of f.
-void wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params,
- gsl_matrix *out) {
+void wgsl_mixed_optim_hessian(const gsl_vector *beta, void *params,
+ gsl_matrix *out) {
fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
int n = p->y->size;
int K = p->X->size2;
@@ -146,120 +140,121 @@ void wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params,
gsl_matrix_set_zero(out);
/* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/
- for(int i = 1; i < npar; ++i)
- gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double-check this.
+ for (int i = 1; i < npar; ++i)
+ gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this.
// L1 penalty not working yet, as not differentiable, I may need to
// do coordinate descent (as in glm_net)
- for(int i = 0; i < n; ++i) {
- double pn=0;
- double aux=0;
- double Xbetai=beta->data[0];
- int iParm1=1;
- for(int k = 0; k < K; ++k) {
- if(gsl_matrix_int_get(p->X,i,k)>0)
- Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1];
- iParm1+=p->nlev->data[k]-1; //-1?
+ for (int i = 0; i < n; ++i) {
+ double pn = 0;
+ double aux = 0;
+ double Xbetai = beta->data[0];
+ int iParm1 = 1;
+ for (int k = 0; k < K; ++k) {
+ if (gsl_matrix_int_get(p->X, i, k) > 0)
+ Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1];
+ iParm1 += p->nlev->data[k] - 1; //-1?
}
// Adding the continuous.
- for(int k = 0; k < Kc; ++k)
- Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++];
+ for (int k = 0; k < Kc; ++k)
+ Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm1++];
- pn= 1/(1 + gsl_sf_exp(-Xbetai));
+ pn = 1 / (1 + gsl_sf_exp(-Xbetai));
// Add a protection for pn very close to 0 or 1?
- aux=pn*(1-pn);
+ aux = pn * (1 - pn);
// Calculate sub-gradient vector gn.
gsl_vector_set_zero(gn);
- gn->data[0]= 1;
- iParm1=1;
- for(int k = 0; k < K; ++k) {
- if(gsl_matrix_int_get(p->X,i,k)>0)
- gn->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1]=1;
- iParm1+=p->nlev->data[k]-1;
+ gn->data[0] = 1;
+ iParm1 = 1;
+ for (int k = 0; k < K; ++k) {
+ if (gsl_matrix_int_get(p->X, i, k) > 0)
+ gn->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1] = 1;
+ iParm1 += p->nlev->data[k] - 1;
}
// Adding the continuous.
- for(int k = 0; k < Kc; ++k) {
- gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k);
+ for (int k = 0; k < Kc; ++k) {
+ gn->data[iParm1++] = gsl_matrix_get(p->Xc, i, k);
}
- for(int k1=0;k1<npar; ++k1)
- if(gn->data[k1]!=0)
- for(int k2=0;k2<npar; ++k2)
- if(gn->data[k2]!=0)
- *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]);
+ for (int k1 = 0; k1 < npar; ++k1)
+ if (gn->data[k1] != 0)
+ for (int k2 = 0; k2 < npar; ++k2)
+ if (gn->data[k2] != 0)
+ *gsl_matrix_ptr(out, k1, k2) += (aux * gn->data[k1] * gn->data[k2]);
}
gsl_vector_free(gn);
}
double wgsl_mixed_optim_f(gsl_vector *v, void *params) {
- double mLogLik=0;
+ double mLogLik = 0;
fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
- mLogLik = fLogit_mixed(v,p->X,p->nlev,p->Xc,p->y,p->lambdaL1,p->lambdaL2);
+ mLogLik =
+ fLogit_mixed(v, p->X, p->nlev, p->Xc, p->y, p->lambdaL1, p->lambdaL2);
return mLogLik;
}
// Compute both f and df together.
-void
-wgsl_mixed_optim_fdf (gsl_vector *x, void *params, double *f, gsl_vector *df) {
+void wgsl_mixed_optim_fdf(gsl_vector *x, void *params, double *f,
+ gsl_vector *df) {
*f = wgsl_mixed_optim_f(x, params);
wgsl_mixed_optim_df(x, params, df);
}
// Xc is the matrix of continuous covariates, Nobs x Kc (NULL if not used).
int logistic_mixed_fit(gsl_vector *beta, gsl_matrix_int *X,
- gsl_vector_int *nlev, gsl_matrix *Xc,
- gsl_vector *y, double lambdaL1, double lambdaL2) {
- double mLogLik=0;
+ gsl_vector_int *nlev, gsl_matrix *Xc, gsl_vector *y,
+ double lambdaL1, double lambdaL2) {
+ double mLogLik = 0;
fix_parm_mixed_T p;
int npar = beta->size;
- int iter=0;
- double maxchange=0;
+ int iter = 0;
+ double maxchange = 0;
// Intializing fix parameters.
- p.X=X;
- p.Xc=Xc;
- p.nlev=nlev;
- p.y=y;
- p.lambdaL1=lambdaL1;
- p.lambdaL2=lambdaL2;
+ p.X = X;
+ p.Xc = Xc;
+ p.nlev = nlev;
+ p.y = y;
+ p.lambdaL1 = lambdaL1;
+ p.lambdaL2 = lambdaL2;
// Initial fit.
- mLogLik = wgsl_mixed_optim_f(beta,&p);
+ mLogLik = wgsl_mixed_optim_f(beta, &p);
- gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix.
- gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
+ gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
+ gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
- gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
- gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+ gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
+ gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
- for(iter=0;iter<100;iter++){
- wgsl_mixed_optim_hessian(beta,&p,myH); // Calculate Hessian.
- wgsl_mixed_optim_df(beta,&p,myG); // Calculate Gradient.
- gsl_linalg_QR_decomp(myH,tau); // Calculate next beta.
- gsl_linalg_QR_solve(myH,tau,myG,stBeta);
- gsl_vector_sub(beta,stBeta);
+ for (iter = 0; iter < 100; iter++) {
+ wgsl_mixed_optim_hessian(beta, &p, myH); // Calculate Hessian.
+ wgsl_mixed_optim_df(beta, &p, myG); // Calculate Gradient.
+ gsl_linalg_QR_decomp(myH, tau); // Calculate next beta.
+ gsl_linalg_QR_solve(myH, tau, myG, stBeta);
+ gsl_vector_sub(beta, stBeta);
// Monitor convergence.
- maxchange=0;
- for(int i=0;i<npar; i++)
- if(maxchange<fabs(stBeta->data[i]))
- maxchange=fabs(stBeta->data[i]);
+ maxchange = 0;
+ for (int i = 0; i < npar; i++)
+ if (maxchange < fabs(stBeta->data[i]))
+ maxchange = fabs(stBeta->data[i]);
- if(maxchange<1E-4)
+ if (maxchange < 1E-4)
break;
}
// Final fit.
- mLogLik = wgsl_mixed_optim_f(beta,&p);
+ mLogLik = wgsl_mixed_optim_f(beta, &p);
- gsl_vector_free (tau);
- gsl_vector_free (stBeta);
- gsl_vector_free (myG);
- gsl_matrix_free (myH);
+ gsl_vector_free(tau);
+ gsl_vector_free(stBeta);
+ gsl_vector_free(myG);
+ gsl_matrix_free(myH);
return 0;
}
@@ -278,8 +273,8 @@ typedef struct {
double lambdaL2;
} fix_parm_cat_T;
-double fLogit_cat (gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
- gsl_vector *y, double lambdaL1, double lambdaL2) {
+double fLogit_cat(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+ gsl_vector *y, double lambdaL1, double lambdaL2) {
int n = y->size;
int npar = beta->size;
double total = 0;
@@ -288,91 +283,90 @@ double fLogit_cat (gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
// omp_set_num_threads(ompthr); /\* Changed loop start at 1 instead
// of 0 to avoid regularization of beta 0*\/ /\*#pragma omp parallel
// for reduction (+:total)*\/
- for(int i = 1; i < npar; ++i)
- total += beta->data[i]*beta->data[i];
- total = (-total*lambdaL2/2);
+ for (int i = 1; i < npar; ++i)
+ total += beta->data[i] * beta->data[i];
+ total = (-total * lambdaL2 / 2);
// /\*#pragma omp parallel for reduction (+:aux)*\/
- for(int i = 1; i < npar; ++i)
- aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
- total = total-aux*lambdaL1;
+ for (int i = 1; i < npar; ++i)
+ aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]);
+ total = total - aux * lambdaL1;
// #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
// #reduction (+:total)
- for(int i = 0; i < n; ++i) {
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < X->size2; ++k) {
- if(gsl_matrix_int_get(X,i,k)>0)
- Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
- iParm+=nlev->data[k]-1;
+ for (int i = 0; i < n; ++i) {
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < X->size2; ++k) {
+ if (gsl_matrix_int_get(X, i, k) > 0)
+ Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
+ iParm += nlev->data[k] - 1;
}
- total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+ total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
}
return -total;
}
-void logistic_cat_pred (gsl_vector *beta, // Vector of parameters
- // length = 1 + Sum_k(C_k-1).
- gsl_matrix_int *X, // Matrix Nobs x K
- gsl_vector_int *nlev, // Vector with #categories
- gsl_vector *yhat){ // Vector of prob. predicted by
- // the logistic.
- for(int i = 0; i < X->size1; ++i) {
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < X->size2; ++k) {
- if(gsl_matrix_int_get(X,i,k)>0)
- Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm];
- iParm+=nlev->data[k]-1;
+void logistic_cat_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1 + Sum_k(C_k-1).
+ gsl_matrix_int *X, // Matrix Nobs x K
+ gsl_vector_int *nlev, // Vector with #categories
+ gsl_vector *yhat) { // Vector of prob. predicted by
+ // the logistic.
+ for (int i = 0; i < X->size1; ++i) {
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < X->size2; ++k) {
+ if (gsl_matrix_int_get(X, i, k) > 0)
+ Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
+ iParm += nlev->data[k] - 1;
}
- yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+ yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai));
}
}
// The gradient of f, df = (df/dx, df/dy).
-void wgsl_cat_optim_df (const gsl_vector *beta, void *params,
- gsl_vector *out) {
+void wgsl_cat_optim_df(const gsl_vector *beta, void *params, gsl_vector *out) {
fix_parm_cat_T *p = (fix_parm_cat_T *)params;
int n = p->y->size;
int K = p->X->size2;
int npar = beta->size;
// Intitialize gradient out necessary?
- for(int i = 0; i < npar; ++i)
- out->data[i]= 0;
+ for (int i = 0; i < npar; ++i)
+ out->data[i] = 0;
// Changed loop start at 1 instead of 0 to avoid regularization of beta 0.
- for(int i = 1; i < npar; ++i)
- out->data[i]= p->lambdaL2*beta->data[i];
- for(int i = 1; i < npar; ++i)
- out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
-
- for(int i = 0; i < n; ++i) {
- double pn=0;
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < K; ++k) {
- if(gsl_matrix_int_get(p->X,i,k)>0)
- Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
- iParm+=p->nlev->data[k]-1;
+ for (int i = 1; i < npar; ++i)
+ out->data[i] = p->lambdaL2 * beta->data[i];
+ for (int i = 1; i < npar; ++i)
+ out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0));
+
+ for (int i = 0; i < n; ++i) {
+ double pn = 0;
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < K; ++k) {
+ if (gsl_matrix_int_get(p->X, i, k) > 0)
+ Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm];
+ iParm += p->nlev->data[k] - 1;
}
- pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+ pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai)));
- out->data[0]+= pn;
- iParm=1;
- for(int k = 0; k < K; ++k) {
- if(gsl_matrix_int_get(p->X,i,k)>0)
- out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
- iParm+=p->nlev->data[k]-1;
+ out->data[0] += pn;
+ iParm = 1;
+ for (int k = 0; k < K; ++k) {
+ if (gsl_matrix_int_get(p->X, i, k) > 0)
+ out->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm] += pn;
+ iParm += p->nlev->data[k] - 1;
}
}
}
// The Hessian of f.
-void wgsl_cat_optim_hessian (const gsl_vector *beta, void *params,
- gsl_matrix *out) {
+void wgsl_cat_optim_hessian(const gsl_vector *beta, void *params,
+ gsl_matrix *out) {
fix_parm_cat_T *p = (fix_parm_cat_T *)params;
int n = p->y->size;
int K = p->X->size2;
@@ -382,123 +376,119 @@ void wgsl_cat_optim_hessian (const gsl_vector *beta, void *params,
gsl_matrix_set_zero(out);
// Changed loop start at 1 instead of 0 to avoid regularization of beta.
- for(int i = 1; i < npar; ++i)
- gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double-check this.
+ for (int i = 1; i < npar; ++i)
+ gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this.
// L1 penalty not working yet, as not differentiable, I may need to
// do coordinate descent (as in glm_net).
- for(int i = 0; i < n; ++i) {
- double pn=0;
- double aux=0;
- double Xbetai=beta->data[0];
- int iParm2=1;
- int iParm1=1;
- for(int k = 0; k < K; ++k) {
- if(gsl_matrix_int_get(p->X,i,k)>0)
- Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1];
- iParm1+=p->nlev->data[k]-1; //-1?
+ for (int i = 0; i < n; ++i) {
+ double pn = 0;
+ double aux = 0;
+ double Xbetai = beta->data[0];
+ int iParm2 = 1;
+ int iParm1 = 1;
+ for (int k = 0; k < K; ++k) {
+ if (gsl_matrix_int_get(p->X, i, k) > 0)
+ Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1];
+ iParm1 += p->nlev->data[k] - 1; //-1?
}
- pn= 1/(1 + gsl_sf_exp(-Xbetai));
+ pn = 1 / (1 + gsl_sf_exp(-Xbetai));
// Add a protection for pn very close to 0 or 1?
- aux=pn*(1-pn);
- *gsl_matrix_ptr(out,0,0)+=aux;
- iParm2=1;
- for(int k2 = 0; k2 < K; ++k2) {
- if(gsl_matrix_int_get(p->X,i,k2)>0)
- *gsl_matrix_ptr(out,0,gsl_matrix_int_get(p->X,i,k2)-1+iParm2)+=aux;
- iParm2+=p->nlev->data[k2]-1; //-1?
+ aux = pn * (1 - pn);
+ *gsl_matrix_ptr(out, 0, 0) += aux;
+ iParm2 = 1;
+ for (int k2 = 0; k2 < K; ++k2) {
+ if (gsl_matrix_int_get(p->X, i, k2) > 0)
+ *gsl_matrix_ptr(out, 0, gsl_matrix_int_get(p->X, i, k2) - 1 + iParm2) +=
+ aux;
+ iParm2 += p->nlev->data[k2] - 1; //-1?
}
- iParm1=1;
- for(int k1 = 0; k1 < K; ++k1) {
- if(gsl_matrix_int_get(p->X,i,k1)>0)
- *gsl_matrix_ptr(out,gsl_matrix_int_get(p->X,i,k1)-1+iParm1,0)+=aux;
- iParm2=1;
- for(int k2 = 0; k2 < K; ++k2) {
- if((gsl_matrix_int_get(p->X,i,k1)>0) &&
- (gsl_matrix_int_get(p->X,i,k2)>0))
- *gsl_matrix_ptr(out
- ,gsl_matrix_int_get(p->X,i,k1)-1+iParm1
- ,gsl_matrix_int_get(p->X,i,k2)-1+iParm2
- )+=aux;
- iParm2+=p->nlev->data[k2]-1; //-1?
+ iParm1 = 1;
+ for (int k1 = 0; k1 < K; ++k1) {
+ if (gsl_matrix_int_get(p->X, i, k1) > 0)
+ *gsl_matrix_ptr(out, gsl_matrix_int_get(p->X, i, k1) - 1 + iParm1, 0) +=
+ aux;
+ iParm2 = 1;
+ for (int k2 = 0; k2 < K; ++k2) {
+ if ((gsl_matrix_int_get(p->X, i, k1) > 0) &&
+ (gsl_matrix_int_get(p->X, i, k2) > 0))
+ *gsl_matrix_ptr(out, gsl_matrix_int_get(p->X, i, k1) - 1 + iParm1,
+ gsl_matrix_int_get(p->X, i, k2) - 1 + iParm2) += aux;
+ iParm2 += p->nlev->data[k2] - 1; //-1?
}
- iParm1+=p->nlev->data[k1]-1; //-1?
+ iParm1 += p->nlev->data[k1] - 1; //-1?
}
}
}
double wgsl_cat_optim_f(gsl_vector *v, void *params) {
- double mLogLik=0;
+ double mLogLik = 0;
fix_parm_cat_T *p = (fix_parm_cat_T *)params;
- mLogLik = fLogit_cat(v,p->X,p->nlev,p->y,p->lambdaL1,p->lambdaL2);
+ mLogLik = fLogit_cat(v, p->X, p->nlev, p->y, p->lambdaL1, p->lambdaL2);
return mLogLik;
}
// Compute both f and df together.
-void wgsl_cat_optim_fdf (gsl_vector *x, void *params, double *f,
- gsl_vector *df) {
+void wgsl_cat_optim_fdf(gsl_vector *x, void *params, double *f,
+ gsl_vector *df) {
*f = wgsl_cat_optim_f(x, params);
wgsl_cat_optim_df(x, params, df);
}
-int logistic_cat_fit(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2) {
- double mLogLik=0;
+int logistic_cat_fit(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+ gsl_vector *y, double lambdaL1, double lambdaL2) {
+ double mLogLik = 0;
fix_parm_cat_T p;
int npar = beta->size;
- int iter=0;
- double maxchange=0;
+ int iter = 0;
+ double maxchange = 0;
// Intializing fix parameters.
- p.X=X;
- p.nlev=nlev;
- p.y=y;
- p.lambdaL1=lambdaL1;
- p.lambdaL2=lambdaL2;
+ p.X = X;
+ p.nlev = nlev;
+ p.y = y;
+ p.lambdaL1 = lambdaL1;
+ p.lambdaL2 = lambdaL2;
// Initial fit.
- mLogLik = wgsl_cat_optim_f(beta,&p);
+ mLogLik = wgsl_cat_optim_f(beta, &p);
- gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix.
- gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
+ gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
+ gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
- gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
- gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+ gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
+ gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
- for(iter=0;iter<100;iter++){
- wgsl_cat_optim_hessian(beta,&p,myH); // Calculate Hessian.
- wgsl_cat_optim_df(beta,&p,myG); // Calculate Gradient.
- gsl_linalg_QR_decomp(myH,tau); // Calculate next beta.
- gsl_linalg_QR_solve(myH,tau,myG,stBeta);
- gsl_vector_sub(beta,stBeta);
+ for (iter = 0; iter < 100; iter++) {
+ wgsl_cat_optim_hessian(beta, &p, myH); // Calculate Hessian.
+ wgsl_cat_optim_df(beta, &p, myG); // Calculate Gradient.
+ gsl_linalg_QR_decomp(myH, tau); // Calculate next beta.
+ gsl_linalg_QR_solve(myH, tau, myG, stBeta);
+ gsl_vector_sub(beta, stBeta);
// Monitor convergence.
- maxchange=0;
- for(int i=0;i<npar; i++)
- if(maxchange<fabs(stBeta->data[i]))
- maxchange=fabs(stBeta->data[i]);
+ maxchange = 0;
+ for (int i = 0; i < npar; i++)
+ if (maxchange < fabs(stBeta->data[i]))
+ maxchange = fabs(stBeta->data[i]);
#ifdef _RPR_DEBUG_
- mLogLik = wgsl_cat_optim_f(beta,&p);
+ mLogLik = wgsl_cat_optim_f(beta, &p);
#endif
- if(maxchange<1E-4)
+ if (maxchange < 1E-4)
break;
}
// Final fit.
- mLogLik = wgsl_cat_optim_f(beta,&p);
+ mLogLik = wgsl_cat_optim_f(beta, &p);
- gsl_vector_free (tau);
- gsl_vector_free (stBeta);
- gsl_vector_free (myG);
- gsl_matrix_free (myH);
+ gsl_vector_free(tau);
+ gsl_vector_free(stBeta);
+ gsl_vector_free(myG);
+ gsl_matrix_free(myH);
return 0;
}
@@ -509,15 +499,15 @@ int logistic_cat_fit(gsl_vector *beta,
// I need to bundle all the data that goes to the function to optimze
// together.
-typedef struct{
- gsl_matrix *Xc; // continuous covariates; Matrix Nobs x Kc
+typedef struct {
+ gsl_matrix *Xc; // continuous covariates; Matrix Nobs x Kc
gsl_vector *y;
double lambdaL1;
double lambdaL2;
-}fix_parm_cont_T;
+} fix_parm_cont_T;
double fLogit_cont(gsl_vector *beta, gsl_matrix *Xc, gsl_vector *y,
- double lambdaL1, double lambdaL2) {
+ double lambdaL1, double lambdaL2) {
int n = y->size;
int npar = beta->size;
double total = 0;
@@ -526,82 +516,81 @@ double fLogit_cont(gsl_vector *beta, gsl_matrix *Xc, gsl_vector *y,
// omp_set_num_threads(ompthr); /\* Changed loop start at 1 instead
// of 0 to avoid regularization of beta_0*\/ /\*#pragma omp parallel
// for reduction (+:total)*\/
- for(int i = 1; i < npar; ++i)
- total += beta->data[i]*beta->data[i];
- total = (-total*lambdaL2/2);
+ for (int i = 1; i < npar; ++i)
+ total += beta->data[i] * beta->data[i];
+ total = (-total * lambdaL2 / 2);
// /\*#pragma omp parallel for reduction (+:aux)*\/
- for(int i = 1; i < npar; ++i)
- aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
- total = total-aux*lambdaL1;
+ for (int i = 1; i < npar; ++i)
+ aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]);
+ total = total - aux * lambdaL1;
// #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
// #reduction (+:total)
- for(int i = 0; i < n; ++i) {
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < (Xc->size2); ++k)
- Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
- total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
+ for (int i = 0; i < n; ++i) {
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < (Xc->size2); ++k)
+ Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
+ total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
}
return -total;
}
-void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
- // length = 1 + Sum_k(C_k-1).
- gsl_matrix *Xc, // Continuous covariates matrix,
- // Nobs x Kc (NULL if not used).
- gsl_vector *yhat) { // Vector of prob. predicted by
- // the logistic.
- for(int i = 0; i < Xc->size1; ++i) {
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < (Xc->size2); ++k)
- Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++];
- yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai));
+void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1 + Sum_k(C_k-1).
+ gsl_matrix *Xc, // Continuous covariates matrix,
+ // Nobs x Kc (NULL if not used).
+ gsl_vector *yhat) { // Vector of prob. predicted by
+ // the logistic.
+ for (int i = 0; i < Xc->size1; ++i) {
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < (Xc->size2); ++k)
+ Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
+ yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai));
}
}
// The gradient of f, df = (df/dx, df/dy).
-void wgsl_cont_optim_df (const gsl_vector *beta, void *params,
- gsl_vector *out) {
+void wgsl_cont_optim_df(const gsl_vector *beta, void *params, gsl_vector *out) {
fix_parm_cont_T *p = (fix_parm_cont_T *)params;
int n = p->y->size;
int Kc = p->Xc->size2;
int npar = beta->size;
// Intitialize gradient out necessary?
- for(int i = 0; i < npar; ++i)
- out->data[i]= 0;
+ for (int i = 0; i < npar; ++i)
+ out->data[i] = 0;
// Changed loop start at 1 instead of 0 to avoid regularization of beta 0.
- for(int i = 1; i < npar; ++i)
- out->data[i]= p->lambdaL2*beta->data[i];
- for(int i = 1; i < npar; ++i)
- out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0));
+ for (int i = 1; i < npar; ++i)
+ out->data[i] = p->lambdaL2 * beta->data[i];
+ for (int i = 1; i < npar; ++i)
+ out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0));
- for(int i = 0; i < n; ++i) {
- double pn=0;
- double Xbetai=beta->data[0];
- int iParm=1;
- for(int k = 0; k < Kc; ++k)
- Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
+ for (int i = 0; i < n; ++i) {
+ double pn = 0;
+ double Xbetai = beta->data[0];
+ int iParm = 1;
+ for (int k = 0; k < Kc; ++k)
+ Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm++];
- pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) );
+ pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai)));
- out->data[0]+= pn;
- iParm=1;
+ out->data[0] += pn;
+ iParm = 1;
// Adding the continuous.
- for(int k = 0; k < Kc; ++k) {
- out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
+ for (int k = 0; k < Kc; ++k) {
+ out->data[iParm++] += gsl_matrix_get(p->Xc, i, k) * pn;
}
}
}
// The Hessian of f.
-void wgsl_cont_optim_hessian (const gsl_vector *beta, void *params,
- gsl_matrix *out) {
+void wgsl_cont_optim_hessian(const gsl_vector *beta, void *params,
+ gsl_matrix *out) {
fix_parm_cont_T *p = (fix_parm_cont_T *)params;
int n = p->y->size;
int Kc = p->Xc->size2;
@@ -614,111 +603,109 @@ void wgsl_cont_optim_hessian (const gsl_vector *beta, void *params,
// Changed loop start at 1 instead of 0 to avoid regularization of
// beta 0.
- for(int i = 1; i < npar; ++i)
- gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double-check this.
+ for (int i = 1; i < npar; ++i)
+ gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this.
// L1 penalty not working yet, as not differentiable, I may need to
// do coordinate descent (as in glm_net).
- for(int i = 0; i < n; ++i) {
- double pn=0;
- double aux=0;
- double Xbetai=beta->data[0];
- int iParm1=1;
- for(int k = 0; k < Kc; ++k)
- Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++];
+ for (int i = 0; i < n; ++i) {
+ double pn = 0;
+ double aux = 0;
+ double Xbetai = beta->data[0];
+ int iParm1 = 1;
+ for (int k = 0; k < Kc; ++k)
+ Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm1++];
- pn= 1/(1 + gsl_sf_exp(-Xbetai));
+ pn = 1 / (1 + gsl_sf_exp(-Xbetai));
// Add a protection for pn very close to 0 or 1?
- aux=pn*(1-pn);
+ aux = pn * (1 - pn);
// Calculate sub-gradient vector gn.
gsl_vector_set_zero(gn);
- gn->data[0]= 1;
- iParm1=1;
- for(int k = 0; k < Kc; ++k) {
- gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k);
+ gn->data[0] = 1;
+ iParm1 = 1;
+ for (int k = 0; k < Kc; ++k) {
+ gn->data[iParm1++] = gsl_matrix_get(p->Xc, i, k);
}
- for(int k1=0;k1<npar; ++k1)
- if(gn->data[k1]!=0)
- for(int k2=0;k2<npar; ++k2)
- if(gn->data[k2]!=0)
- *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]);
+ for (int k1 = 0; k1 < npar; ++k1)
+ if (gn->data[k1] != 0)
+ for (int k2 = 0; k2 < npar; ++k2)
+ if (gn->data[k2] != 0)
+ *gsl_matrix_ptr(out, k1, k2) += (aux * gn->data[k1] * gn->data[k2]);
}
gsl_vector_free(gn);
}
double wgsl_cont_optim_f(gsl_vector *v, void *params) {
- double mLogLik=0;
+ double mLogLik = 0;
fix_parm_cont_T *p = (fix_parm_cont_T *)params;
- mLogLik = fLogit_cont(v,p->Xc,p->y,p->lambdaL1,p->lambdaL2);
+ mLogLik = fLogit_cont(v, p->Xc, p->y, p->lambdaL1, p->lambdaL2);
return mLogLik;
}
// Compute both f and df together.
-void wgsl_cont_optim_fdf (gsl_vector *x, void *params,
- double *f, gsl_vector *df) {
+void wgsl_cont_optim_fdf(gsl_vector *x, void *params, double *f,
+ gsl_vector *df) {
*f = wgsl_cont_optim_f(x, params);
wgsl_cont_optim_df(x, params, df);
}
-int logistic_cont_fit (gsl_vector *beta,
- gsl_matrix *Xc, // Continuous covariates matrix,
- // Nobs x Kc (NULL if not used).
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2) {
+int logistic_cont_fit(gsl_vector *beta,
+ gsl_matrix *Xc, // Continuous covariates matrix,
+ // Nobs x Kc (NULL if not used).
+ gsl_vector *y, double lambdaL1, double lambdaL2) {
- double mLogLik=0;
+ double mLogLik = 0;
fix_parm_cont_T p;
int npar = beta->size;
- int iter=0;
- double maxchange=0;
+ int iter = 0;
+ double maxchange = 0;
// Initializing fix parameters.
- p.Xc=Xc;
- p.y=y;
- p.lambdaL1=lambdaL1;
- p.lambdaL2=lambdaL2;
+ p.Xc = Xc;
+ p.y = y;
+ p.lambdaL1 = lambdaL1;
+ p.lambdaL2 = lambdaL2;
// Initial fit.
- mLogLik = wgsl_cont_optim_f(beta,&p);
+ mLogLik = wgsl_cont_optim_f(beta, &p);
- gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix.
- gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
+ gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
+ gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
- gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
- gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+ gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
+ gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
- for(iter=0;iter<100;iter++){
- wgsl_cont_optim_hessian(beta,&p,myH); // Calculate Hessian.
- wgsl_cont_optim_df(beta,&p,myG); // Calculate Gradient.
- gsl_linalg_QR_decomp(myH,tau); // Calculate next beta.
- gsl_linalg_QR_solve(myH,tau,myG,stBeta);
- gsl_vector_sub(beta,stBeta);
+ for (iter = 0; iter < 100; iter++) {
+ wgsl_cont_optim_hessian(beta, &p, myH); // Calculate Hessian.
+ wgsl_cont_optim_df(beta, &p, myG); // Calculate Gradient.
+ gsl_linalg_QR_decomp(myH, tau); // Calculate next beta.
+ gsl_linalg_QR_solve(myH, tau, myG, stBeta);
+ gsl_vector_sub(beta, stBeta);
// Monitor convergence.
- maxchange=0;
- for(int i=0;i<npar; i++)
- if(maxchange<fabs(stBeta->data[i]))
- maxchange=fabs(stBeta->data[i]);
+ maxchange = 0;
+ for (int i = 0; i < npar; i++)
+ if (maxchange < fabs(stBeta->data[i]))
+ maxchange = fabs(stBeta->data[i]);
#ifdef _RPR_DEBUG_
- mLogLik = wgsl_cont_optim_f(beta,&p);
+ mLogLik = wgsl_cont_optim_f(beta, &p);
#endif
- if(maxchange<1E-4)
+ if (maxchange < 1E-4)
break;
}
// Final fit.
- mLogLik = wgsl_cont_optim_f(beta,&p);
+ mLogLik = wgsl_cont_optim_f(beta, &p);
- gsl_vector_free (tau);
- gsl_vector_free (stBeta);
- gsl_vector_free (myG);
- gsl_matrix_free (myH);
+ gsl_vector_free(tau);
+ gsl_vector_free(stBeta);
+ gsl_vector_free(myG);
+ gsl_matrix_free(myH);
return 0;
}
diff --git a/src/logistic.h b/src/logistic.h
index b61ab14..bebcbf6 100644
--- a/src/logistic.h
+++ b/src/logistic.h
@@ -3,73 +3,63 @@
// Mixed interface.
void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with num. categories.
- gsl_matrix *Xc, // Continuous covariates matrix
- // Nobs x Kc
- gsl_vector *yhat); // Vector of prob. predicted by
- // the logistic.
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with num. categories.
+ gsl_matrix *Xc, // Continuous covariates matrix
+ // Nobs x Kc
+ gsl_vector *yhat); // Vector of prob. predicted by
+ // the logistic.
int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_matrix *Xc, // Continuous covariates
- // matrix Nobs x Kc
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Reg. L1 0.0 if not used.
- double lambdaL2); // Reg. L2 0.0 if not used.
+ // length = 1+Sum_k(C_k-1)+Kc
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_matrix *Xc, // Continuous covariates
+ // matrix Nobs x Kc
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Reg. L1 0.0 if not used.
+ double lambdaL2); // Reg. L2 0.0 if not used.
-double fLogit_mixed(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
+double fLogit_mixed(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+ gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
+ gsl_vector *y, double lambdaL1, double lambdaL2);
// Categorical-only interface.
void logistic_cat_pred(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_vector *yhat); // Vector of prob. predicted by
- // the logistic.
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_vector *yhat); // Vector of prob. predicted by
+ // the logistic.
int logistic_cat_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K .
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Regularization L1, 0 if not used
- double lambdaL2); // Regularization L2, 0 if not used
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K .
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Regularization L1, 0 if not used
+ double lambdaL2); // Regularization L2, 0 if not used
-double fLogit_cat(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
+double fLogit_cat(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
+ gsl_vector *y, double lambdaL1, double lambdaL2);
// Continuous-only interface.
-void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
- // length = 1 + Sum_k(C_k-1) + Kc.
- gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
- gsl_vector *yhat);// Vector of prob. predicted
- // by the logistic.
+void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1 + Sum_k(C_k-1) + Kc.
+ gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
+ gsl_vector *yhat); // Vector of prob. predicted
+ // by the logistic.
int logistic_cont_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Regularization L1, 0 if not used.
- double lambdaL2); // Regularization L2, 0 if not used.
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Regularization L1, 0 if not used.
+ double lambdaL2); // Regularization L2, 0 if not used.
double fLogit_cont(gsl_vector *beta,
- gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
+ gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
+ gsl_vector *y, double lambdaL1, double lambdaL2);
#endif
diff --git a/src/main.cpp b/src/main.cpp
index c7f0573..833136c 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -16,57 +16,67 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include "gemma.h"
#include <fstream>
+#include <iostream>
#include <sstream>
#include <sys/stat.h>
#include <sys/types.h>
-#include "gemma.h"
using namespace std;
-int main(int argc, char * argv[]) {
- GEMMA cGemma;
- PARAM cPar;
-
- if (argc <= 1) {
- cGemma.PrintHeader();
- return EXIT_SUCCESS;
- }
- if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'h') {
- cGemma.PrintHelp(0);
- return EXIT_SUCCESS;
- }
- if (argc==3 && argv[1][0] == '-' && argv[1][1] == 'h') {
- string str;
- str.assign(argv[2]);
- cGemma.PrintHelp(atoi(str.c_str()));
- return EXIT_SUCCESS;
- }
- if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'l') {
- cGemma.PrintLicense();
- return EXIT_SUCCESS;
- }
-
- cGemma.Assign(argc, argv, cPar);
-
- ifstream check_dir((cPar.path_out).c_str());
- if (!check_dir) {
- mkdir((cPar.path_out).c_str(), S_IRWXU|S_IRGRP|S_IROTH);
- }
-
- if (cPar.error==true) {return EXIT_FAILURE;}
-
- if (cPar.mode_silence) {stringstream ss; cout.rdbuf (ss.rdbuf());}
-
- cPar.CheckParam();
-
- if (cPar.error==true) {return EXIT_FAILURE;}
-
- cGemma.BatchRun(cPar);
-
- if (cPar.error==true) {return EXIT_FAILURE;}
-
- cGemma.WriteLog(argc, argv, cPar);
-
- return EXIT_SUCCESS; }
+int main(int argc, char *argv[]) {
+ GEMMA cGemma;
+ PARAM cPar;
+
+ if (argc <= 1) {
+ cGemma.PrintHeader();
+ return EXIT_SUCCESS;
+ }
+ if (argc == 2 && argv[1][0] == '-' && argv[1][1] == 'h') {
+ cGemma.PrintHelp(0);
+ return EXIT_SUCCESS;
+ }
+ if (argc == 3 && argv[1][0] == '-' && argv[1][1] == 'h') {
+ string str;
+ str.assign(argv[2]);
+ cGemma.PrintHelp(atoi(str.c_str()));
+ return EXIT_SUCCESS;
+ }
+ if (argc == 2 && argv[1][0] == '-' && argv[1][1] == 'l') {
+ cGemma.PrintLicense();
+ return EXIT_SUCCESS;
+ }
+
+ cGemma.Assign(argc, argv, cPar);
+
+ ifstream check_dir((cPar.path_out).c_str());
+ if (!check_dir) {
+ mkdir((cPar.path_out).c_str(), S_IRWXU | S_IRGRP | S_IROTH);
+ }
+
+ if (cPar.error == true) {
+ return EXIT_FAILURE;
+ }
+
+ if (cPar.mode_silence) {
+ stringstream ss;
+ cout.rdbuf(ss.rdbuf());
+ }
+
+ cPar.CheckParam();
+
+ if (cPar.error == true) {
+ return EXIT_FAILURE;
+ }
+
+ cGemma.BatchRun(cPar);
+
+ if (cPar.error == true) {
+ return EXIT_FAILURE;
+ }
+
+ cGemma.WriteLog(argc, argv, cPar);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 709bdde..9e19bf1 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -16,394 +16,381 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include <bitset>
+#include <cmath>
+#include <cstring>
#include <fstream>
-#include <sstream>
-#include <string>
#include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
+#include <limits.h>
#include <map>
#include <set>
-#include <cstring>
-#include <cmath>
+#include <sstream>
#include <stdio.h>
#include <stdlib.h>
-#include <limits.h>
+#include <string>
+#include <vector>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
+#include "Eigen/Dense"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
-#include "Eigen/Dense"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
-#include "lapack.h"
#include "eigenlib.h"
+#include "lapack.h"
#include "mathfunc.h"
using namespace std;
using namespace Eigen;
-//calculate variance of a vector
-double VectorVar (const gsl_vector *v) {
- double d, m=0.0, m2=0.0;
- for (size_t i=0; i<v->size; ++i) {
- d=gsl_vector_get (v, i);
- m+=d;
- m2+=d*d;
- }
- m/=(double)v->size;
- m2/=(double)v->size;
- return m2-m*m;
+// calculate variance of a vector
+double VectorVar(const gsl_vector *v) {
+ double d, m = 0.0, m2 = 0.0;
+ for (size_t i = 0; i < v->size; ++i) {
+ d = gsl_vector_get(v, i);
+ m += d;
+ m2 += d * d;
+ }
+ m /= (double)v->size;
+ m2 /= (double)v->size;
+ return m2 - m * m;
}
// Center the matrix G.
-void CenterMatrix (gsl_matrix *G) {
- double d;
- gsl_vector *w=gsl_vector_alloc (G->size1);
- gsl_vector *Gw=gsl_vector_alloc (G->size1);
- gsl_vector_set_all (w, 1.0);
-
- gsl_blas_dgemv (CblasNoTrans, 1.0, G, w, 0.0, Gw);
- gsl_blas_dsyr2 (CblasUpper, -1.0/(double)G->size1, Gw, w, G);
- gsl_blas_ddot (w, Gw, &d);
- gsl_blas_dsyr (CblasUpper, d/((double)G->size1*(double)G->size1),
- w, G);
-
- for (size_t i=0; i<G->size1; ++i) {
- for (size_t j=0; j<i; ++j) {
- d=gsl_matrix_get (G, j, i);
- gsl_matrix_set (G, i, j, d);
- }
- }
-
- gsl_vector_free(w);
- gsl_vector_free(Gw);
-
- return;
+void CenterMatrix(gsl_matrix *G) {
+ double d;
+ gsl_vector *w = gsl_vector_alloc(G->size1);
+ gsl_vector *Gw = gsl_vector_alloc(G->size1);
+ gsl_vector_set_all(w, 1.0);
+
+ gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
+ gsl_blas_dsyr2(CblasUpper, -1.0 / (double)G->size1, Gw, w, G);
+ gsl_blas_ddot(w, Gw, &d);
+ gsl_blas_dsyr(CblasUpper, d / ((double)G->size1 * (double)G->size1), w, G);
+
+ for (size_t i = 0; i < G->size1; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ d = gsl_matrix_get(G, j, i);
+ gsl_matrix_set(G, i, j, d);
+ }
+ }
+
+ gsl_vector_free(w);
+ gsl_vector_free(Gw);
+
+ return;
}
// Center the matrix G.
-void CenterMatrix (gsl_matrix *G, const gsl_vector *w) {
- double d, wtw;
- gsl_vector *Gw=gsl_vector_alloc (G->size1);
-
- gsl_blas_ddot (w, w, &wtw);
- gsl_blas_dgemv (CblasNoTrans, 1.0, G, w, 0.0, Gw);
- gsl_blas_dsyr2 (CblasUpper, -1.0/wtw, Gw, w, G);
- gsl_blas_ddot (w, Gw, &d);
- gsl_blas_dsyr (CblasUpper, d/(wtw*wtw), w, G);
-
- for (size_t i=0; i<G->size1; ++i) {
- for (size_t j=0; j<i; ++j) {
- d=gsl_matrix_get (G, j, i);
- gsl_matrix_set (G, i, j, d);
- }
- }
-
- gsl_vector_free(Gw);
-
- return;
+void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
+ double d, wtw;
+ gsl_vector *Gw = gsl_vector_alloc(G->size1);
+
+ gsl_blas_ddot(w, w, &wtw);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
+ gsl_blas_dsyr2(CblasUpper, -1.0 / wtw, Gw, w, G);
+ gsl_blas_ddot(w, Gw, &d);
+ gsl_blas_dsyr(CblasUpper, d / (wtw * wtw), w, G);
+
+ for (size_t i = 0; i < G->size1; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ d = gsl_matrix_get(G, j, i);
+ gsl_matrix_set(G, i, j, d);
+ }
+ }
+
+ gsl_vector_free(Gw);
+
+ return;
}
// Center the matrix G.
-void CenterMatrix (gsl_matrix *G, const gsl_matrix *W) {
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWiWt=gsl_matrix_alloc (W->size2, G->size1);
- gsl_matrix *GW=gsl_matrix_alloc (G->size1, W->size2);
- gsl_matrix *WtGW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *Gtmp=gsl_matrix_alloc (G->size1, G->size1);
-
- gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-
- int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
- LUDecomp (WtW, pmt, &sig);
- LUInvert (WtW, pmt, WtWi);
-
- gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, WtWi, W, 0.0, WtWiWt);
- gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, G, W, 0.0, GW);
- gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0,
- Gtmp);
-
- gsl_matrix_sub (G, Gtmp);
- gsl_matrix_transpose (Gtmp);
- gsl_matrix_sub (G, Gtmp);
-
- gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, GW, 0.0, WtGW);
- //GW is destroyed.
- gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, WtWiWt, WtGW, 0.0, GW);
- gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0,
- Gtmp);
-
- gsl_matrix_add (G, Gtmp);
-
- gsl_matrix_free(WtW);
- gsl_matrix_free(WtWi);
- gsl_matrix_free(WtWiWt);
- gsl_matrix_free(GW);
- gsl_matrix_free(WtGW);
- gsl_matrix_free(Gtmp);
-
- return;
+void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *WtWiWt = gsl_matrix_alloc(W->size2, G->size1);
+ gsl_matrix *GW = gsl_matrix_alloc(G->size1, W->size2);
+ gsl_matrix *WtGW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_matrix *Gtmp = gsl_matrix_alloc(G->size1, G->size1);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+
+ int sig;
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+ LUDecomp(WtW, pmt, &sig);
+ LUInvert(WtW, pmt, WtWi);
+
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, WtWi, W, 0.0, WtWiWt);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, G, W, 0.0, GW);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp);
+
+ gsl_matrix_sub(G, Gtmp);
+ gsl_matrix_transpose(Gtmp);
+ gsl_matrix_sub(G, Gtmp);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, GW, 0.0, WtGW);
+ // GW is destroyed.
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, WtWiWt, WtGW, 0.0, GW);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp);
+
+ gsl_matrix_add(G, Gtmp);
+
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_matrix_free(WtWiWt);
+ gsl_matrix_free(GW);
+ gsl_matrix_free(WtGW);
+ gsl_matrix_free(Gtmp);
+
+ return;
}
// "Standardize" the matrix G such that all diagonal elements = 1.
-void StandardizeMatrix (gsl_matrix *G) {
- double d=0.0;
- vector<double> vec_d;
-
- for (size_t i=0; i<G->size1; ++i) {
- vec_d.push_back(gsl_matrix_get(G, i, i));
- }
- for (size_t i=0; i<G->size1; ++i) {
- for (size_t j=i; j<G->size2; ++j) {
- if (j==i) {
- gsl_matrix_set(G, i, j, 1);
- } else {
- d=gsl_matrix_get(G, i, j);
- d/=sqrt(vec_d[i]*vec_d[j]);
- gsl_matrix_set(G, i, j, d);
- gsl_matrix_set(G, j, i, d);
- }
- }
- }
-
- return;
+void StandardizeMatrix(gsl_matrix *G) {
+ double d = 0.0;
+ vector<double> vec_d;
+
+ for (size_t i = 0; i < G->size1; ++i) {
+ vec_d.push_back(gsl_matrix_get(G, i, i));
+ }
+ for (size_t i = 0; i < G->size1; ++i) {
+ for (size_t j = i; j < G->size2; ++j) {
+ if (j == i) {
+ gsl_matrix_set(G, i, j, 1);
+ } else {
+ d = gsl_matrix_get(G, i, j);
+ d /= sqrt(vec_d[i] * vec_d[j]);
+ gsl_matrix_set(G, i, j, d);
+ gsl_matrix_set(G, j, i, d);
+ }
+ }
+ }
+
+ return;
}
// Scale the matrix G such that the mean diagonal = 1.
-double ScaleMatrix (gsl_matrix *G) {
- double d=0.0;
+double ScaleMatrix(gsl_matrix *G) {
+ double d = 0.0;
- for (size_t i=0; i<G->size1; ++i) {
- d+=gsl_matrix_get(G, i, i);
- }
- d/=(double)G->size1;
+ for (size_t i = 0; i < G->size1; ++i) {
+ d += gsl_matrix_get(G, i, i);
+ }
+ d /= (double)G->size1;
- if (d!=0) {
- gsl_matrix_scale (G, 1.0/d);
- }
+ if (d != 0) {
+ gsl_matrix_scale(G, 1.0 / d);
+ }
- return d;
+ return d;
}
// Center the vector y.
-double CenterVector (gsl_vector *y) {
- double d=0.0;
+double CenterVector(gsl_vector *y) {
+ double d = 0.0;
- for (size_t i=0; i<y->size; ++i) {
- d+=gsl_vector_get (y, i);
- }
- d/=(double)y->size;
+ for (size_t i = 0; i < y->size; ++i) {
+ d += gsl_vector_get(y, i);
+ }
+ d /= (double)y->size;
- gsl_vector_add_constant (y, -1.0*d);
+ gsl_vector_add_constant(y, -1.0 * d);
- return d;
+ return d;
}
// Center the vector y.
-void CenterVector (gsl_vector *y, const gsl_matrix *W) {
- gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_vector *Wty=gsl_vector_alloc (W->size2);
- gsl_vector *WtWiWty=gsl_vector_alloc (W->size2);
+void CenterVector(gsl_vector *y, const gsl_matrix *W) {
+ gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
+ gsl_vector *Wty = gsl_vector_alloc(W->size2);
+ gsl_vector *WtWiWty = gsl_vector_alloc(W->size2);
- gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
- int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
- LUDecomp (WtW, pmt, &sig);
- LUSolve (WtW, pmt, Wty, WtWiWty);
+ int sig;
+ gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
+ LUDecomp(WtW, pmt, &sig);
+ LUSolve(WtW, pmt, Wty, WtWiWty);
- gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
+ gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
- gsl_matrix_free(WtW);
- gsl_vector_free(Wty);
- gsl_vector_free(WtWiWty);
+ gsl_matrix_free(WtW);
+ gsl_vector_free(Wty);
+ gsl_vector_free(WtWiWty);
- return;
+ return;
}
// "Standardize" vector y to have mean 0 and y^ty/n=1.
-void StandardizeVector (gsl_vector *y) {
- double d=0.0, m=0.0, v=0.0;
+void StandardizeVector(gsl_vector *y) {
+ double d = 0.0, m = 0.0, v = 0.0;
- for (size_t i=0; i<y->size; ++i) {
- d=gsl_vector_get (y, i);
- m+=d;
- v+=d*d;
+ for (size_t i = 0; i < y->size; ++i) {
+ d = gsl_vector_get(y, i);
+ m += d;
+ v += d * d;
}
- m/=(double)y->size;
- v/=(double)y->size;
- v-=m*m;
+ m /= (double)y->size;
+ v /= (double)y->size;
+ v -= m * m;
- gsl_vector_add_constant (y, -1.0*m);
- gsl_vector_scale (y, 1.0/sqrt(v));
+ gsl_vector_add_constant(y, -1.0 * m);
+ gsl_vector_scale(y, 1.0 / sqrt(v));
return;
}
// Calculate UtX.
-void CalcUtX (const gsl_matrix *U, gsl_matrix *UtX) {
- gsl_matrix *X=gsl_matrix_alloc (UtX->size1, UtX->size2);
- gsl_matrix_memcpy (X, UtX);
- eigenlib_dgemm ("T", "N", 1.0, U, X, 0.0, UtX);
- gsl_matrix_free (X);
+void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
+ gsl_matrix *X = gsl_matrix_alloc(UtX->size1, UtX->size2);
+ gsl_matrix_memcpy(X, UtX);
+ eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
+ gsl_matrix_free(X);
- return;
+ return;
}
-void CalcUtX (const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
- eigenlib_dgemm ("T", "N", 1.0, U, X, 0.0, UtX);
- return;
+void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
+ eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
+ return;
}
-void CalcUtX (const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
- return;
+void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
+ return;
}
// Kronecker product.
void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
- for (size_t i=0; i<K->size1; i++) {
- for (size_t j=0; j<K->size2; j++) {
- gsl_matrix_view H_sub=
- gsl_matrix_submatrix (H, i*V->size1, j*V->size2,
- V->size1, V->size2);
- gsl_matrix_memcpy (&H_sub.matrix, V);
- gsl_matrix_scale (&H_sub.matrix,
- gsl_matrix_get (K, i, j));
- }
- }
- return;
+ for (size_t i = 0; i < K->size1; i++) {
+ for (size_t j = 0; j < K->size2; j++) {
+ gsl_matrix_view H_sub = gsl_matrix_submatrix(
+ H, i * V->size1, j * V->size2, V->size1, V->size2);
+ gsl_matrix_memcpy(&H_sub.matrix, V);
+ gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
+ }
+ }
+ return;
}
// Symmetric K matrix.
void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
- for (size_t i=0; i<K->size1; i++) {
- for (size_t j=i; j<K->size2; j++) {
- gsl_matrix_view H_sub=
- gsl_matrix_submatrix (H, i*V->size1, j*V->size2,
- V->size1, V->size2);
- gsl_matrix_memcpy (&H_sub.matrix, V);
- gsl_matrix_scale (&H_sub.matrix,
- gsl_matrix_get (K, i, j));
-
- if (i!=j) {
- gsl_matrix_view H_sub_sym=
- gsl_matrix_submatrix (H, j*V->size1,
- i*V->size2, V->size1,
- V->size2);
- gsl_matrix_memcpy (&H_sub_sym.matrix,
- &H_sub.matrix);
- }
- }
- }
- return;
+ for (size_t i = 0; i < K->size1; i++) {
+ for (size_t j = i; j < K->size2; j++) {
+ gsl_matrix_view H_sub = gsl_matrix_submatrix(
+ H, i * V->size1, j * V->size2, V->size1, V->size2);
+ gsl_matrix_memcpy(&H_sub.matrix, V);
+ gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
+
+ if (i != j) {
+ gsl_matrix_view H_sub_sym = gsl_matrix_submatrix(
+ H, j * V->size1, i * V->size2, V->size1, V->size2);
+ gsl_matrix_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
+ }
+ }
+ }
+ return;
}
// This function calculates HWE p value with methods described in
// Wigginton et al. (2005) AJHG; it is based on the code in plink 1.07.
-double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab) {
- if ( (n_hom1+n_hom2+n_ab)==0 ) {return 1;}
-
- // "AA" is the rare allele.
- int n_aa=n_hom1 < n_hom2 ? n_hom1 : n_hom2;
- int n_bb=n_hom1 < n_hom2 ? n_hom2 : n_hom1;
-
- int rare_copies = 2 * n_aa + n_ab;
- int genotypes = n_ab + n_bb + n_aa;
-
- double * het_probs = (double *) malloc( (rare_copies + 1) *
- sizeof(double));
- if (het_probs == NULL)
- cout << "Internal error: SNP-HWE: Unable to allocate array" <<
- endl;
-
- int i;
- for (i = 0; i <= rare_copies; i++)
- het_probs[i] = 0.0;
-
- // Start at midpoint.
- // XZ modified to add (long int)
- int mid = ((long int)rare_copies *
- (2 * (long int)genotypes - (long int)rare_copies)) /
- (2 * (long int)genotypes);
-
- // Check to ensure that midpoint and rare alleles have same
- // parity.
- if ((rare_copies & 1) ^ (mid & 1))
- mid++;
-
- int curr_hets = mid;
- int curr_homr = (rare_copies - mid) / 2;
- int curr_homc = genotypes - curr_hets - curr_homr;
-
- het_probs[mid] = 1.0;
- double sum = het_probs[mid];
- for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) {
- het_probs[curr_hets - 2] = het_probs[curr_hets] *
- curr_hets * (curr_hets - 1.0)
- / (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
- sum += het_probs[curr_hets - 2];
-
- // Two fewer heterozygotes for next iteration; add one
- // rare, one common homozygote.
- curr_homr++;
- curr_homc++;
- }
+double CalcHWE(const size_t n_hom1, const size_t n_hom2, const size_t n_ab) {
+ if ((n_hom1 + n_hom2 + n_ab) == 0) {
+ return 1;
+ }
- curr_hets = mid;
- curr_homr = (rare_copies - mid) / 2;
- curr_homc = genotypes - curr_hets - curr_homr;
- for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) {
- het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 *
- curr_homr * curr_homc /
- ((curr_hets + 2.0) * (curr_hets + 1.0));
- sum += het_probs[curr_hets + 2];
-
- // Add 2 heterozygotes for next iteration; subtract
- // one rare, one common homozygote.
- curr_homr--;
- curr_homc--;
- }
+ // "AA" is the rare allele.
+ int n_aa = n_hom1 < n_hom2 ? n_hom1 : n_hom2;
+ int n_bb = n_hom1 < n_hom2 ? n_hom2 : n_hom1;
+
+ int rare_copies = 2 * n_aa + n_ab;
+ int genotypes = n_ab + n_bb + n_aa;
+
+ double *het_probs = (double *)malloc((rare_copies + 1) * sizeof(double));
+ if (het_probs == NULL)
+ cout << "Internal error: SNP-HWE: Unable to allocate array" << endl;
+
+ int i;
+ for (i = 0; i <= rare_copies; i++)
+ het_probs[i] = 0.0;
+
+ // Start at midpoint.
+ // XZ modified to add (long int)
+ int mid = ((long int)rare_copies *
+ (2 * (long int)genotypes - (long int)rare_copies)) /
+ (2 * (long int)genotypes);
+
+ // Check to ensure that midpoint and rare alleles have same
+ // parity.
+ if ((rare_copies & 1) ^ (mid & 1))
+ mid++;
+
+ int curr_hets = mid;
+ int curr_homr = (rare_copies - mid) / 2;
+ int curr_homc = genotypes - curr_hets - curr_homr;
+
+ het_probs[mid] = 1.0;
+ double sum = het_probs[mid];
+ for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) {
+ het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets *
+ (curr_hets - 1.0) /
+ (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
+ sum += het_probs[curr_hets - 2];
+
+ // Two fewer heterozygotes for next iteration; add one
+ // rare, one common homozygote.
+ curr_homr++;
+ curr_homc++;
+ }
- for (i = 0; i <= rare_copies; i++)
- het_probs[i] /= sum;
+ curr_hets = mid;
+ curr_homr = (rare_copies - mid) / 2;
+ curr_homc = genotypes - curr_hets - curr_homr;
+ for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) {
+ het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr *
+ curr_homc /
+ ((curr_hets + 2.0) * (curr_hets + 1.0));
+ sum += het_probs[curr_hets + 2];
+
+ // Add 2 heterozygotes for next iteration; subtract
+ // one rare, one common homozygote.
+ curr_homr--;
+ curr_homc--;
+ }
+
+ for (i = 0; i <= rare_copies; i++)
+ het_probs[i] /= sum;
- double p_hwe = 0.0;
+ double p_hwe = 0.0;
- // p-value calculation for p_hwe.
- for (i = 0; i <= rare_copies; i++)
- {
- if (het_probs[i] > het_probs[n_ab])
- continue;
- p_hwe += het_probs[i];
- }
+ // p-value calculation for p_hwe.
+ for (i = 0; i <= rare_copies; i++) {
+ if (het_probs[i] > het_probs[n_ab])
+ continue;
+ p_hwe += het_probs[i];
+ }
- p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
+ p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
- free(het_probs);
+ free(het_probs);
- return p_hwe;
+ return p_hwe;
}
-double UcharToDouble02(const unsigned char c) {
- return (double)c*0.01;
-}
+double UcharToDouble02(const unsigned char c) { return (double)c * 0.01; }
unsigned char Double02ToUchar(const double dosage) {
- return (int) (dosage*100);
+ return (int)(dosage * 100);
}
-void uchar_matrix_get_row (const vector<vector<unsigned char> > &X,
- const size_t i_row, VectorXd &x_row) {
- if (i_row<X.size()) {
- for (size_t j=0; j<x_row.size(); j++) {
- x_row(j)=UcharToDouble02(X[i_row][j]);
+void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
+ const size_t i_row, VectorXd &x_row) {
+ if (i_row < X.size()) {
+ for (size_t j = 0; j < x_row.size(); j++) {
+ x_row(j) = UcharToDouble02(X[i_row][j]);
}
} else {
std::cerr << "Error return genotype vector...\n";
diff --git a/src/mathfunc.h b/src/mathfunc.h
index b24364b..29eafe4 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -19,32 +19,32 @@
#ifndef __MATHFUNC_H__
#define __MATHFUNC_H__
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
#include "Eigen/Dense"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
using namespace std;
using namespace Eigen;
-double VectorVar (const gsl_vector *v);
-void CenterMatrix (gsl_matrix *G);
-void CenterMatrix (gsl_matrix *G, const gsl_vector *w);
-void CenterMatrix (gsl_matrix *G, const gsl_matrix *W);
-void StandardizeMatrix (gsl_matrix *G);
-double ScaleMatrix (gsl_matrix *G);
-double CenterVector (gsl_vector *y);
-void CenterVector (gsl_vector *y, const gsl_matrix *W);
-void StandardizeVector (gsl_vector *y);
-void CalcUtX (const gsl_matrix *U, gsl_matrix *UtX);
-void CalcUtX (const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX);
-void CalcUtX (const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx);
-double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab);
+double VectorVar(const gsl_vector *v);
+void CenterMatrix(gsl_matrix *G);
+void CenterMatrix(gsl_matrix *G, const gsl_vector *w);
+void CenterMatrix(gsl_matrix *G, const gsl_matrix *W);
+void StandardizeMatrix(gsl_matrix *G);
+double ScaleMatrix(gsl_matrix *G);
+double CenterVector(gsl_vector *y);
+void CenterVector(gsl_vector *y, const gsl_matrix *W);
+void StandardizeVector(gsl_vector *y);
+void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX);
+void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX);
+void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx);
+double CalcHWE(const size_t n_hom1, const size_t n_hom2, const size_t n_ab);
void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H);
void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H);
double UcharToDouble02(const unsigned char c);
unsigned char Double02ToUchar(const double dosage);
-void uchar_matrix_get_row (const vector<vector<unsigned char> > &X,
- const size_t i_row, VectorXd &x_row);
+void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
+ const size_t i_row, VectorXd &x_row);
#endif
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index 78cd926..f1ab3fc 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -16,895 +16,914 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <sstream>
-#include <iomanip>
+#include <assert.h>
+#include <bitset>
#include <cmath>
+#include <cstring>
+#include <iomanip>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
-#include <bitset>
-#include <cstring>
-#include <assert.h>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
-#include "gsl/gsl_roots.h"
-#include "gsl/gsl_min.h"
#include "gsl/gsl_integration.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_min.h"
+#include "gsl/gsl_roots.h"
+#include "gsl/gsl_vector.h"
-#include "io.h"
-#include "lapack.h"
#include "eigenlib.h"
#include "gzstream.h"
+#include "io.h"
+#include "lapack.h"
#include "lmm.h"
#include "mvlmm.h"
using namespace std;
// In this file, X, Y are already transformed (i.e. UtX and UtY).
-void MVLMM::CopyFromParam (PARAM &cPar) {
- a_mode=cPar.a_mode;
- d_pace=cPar.d_pace;
-
- file_bfile=cPar.file_bfile;
- file_geno=cPar.file_geno;
- file_oxford=cPar.file_oxford;
- file_out=cPar.file_out;
- path_out=cPar.path_out;
-
- l_min=cPar.l_min;
- l_max=cPar.l_max;
- n_region=cPar.n_region;
- p_nr=cPar.p_nr;
- em_iter=cPar.em_iter;
- nr_iter=cPar.nr_iter;
- em_prec=cPar.em_prec;
- nr_prec=cPar.nr_prec;
- crt=cPar.crt;
-
- Vg_remle_null=cPar.Vg_remle_null;
- Ve_remle_null=cPar.Ve_remle_null;
- Vg_mle_null=cPar.Vg_mle_null;
- Ve_mle_null=cPar.Ve_mle_null;
-
- time_UtX=0.0;
- time_opt=0.0;
-
- ni_total=cPar.ni_total;
- ns_total=cPar.ns_total;
- ni_test=cPar.ni_test;
- ns_test=cPar.ns_test;
- n_cvt=cPar.n_cvt;
-
- n_ph=cPar.n_ph;
-
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
- snpInfo=cPar.snpInfo;
-
- return;
+void MVLMM::CopyFromParam(PARAM &cPar) {
+ a_mode = cPar.a_mode;
+ d_pace = cPar.d_pace;
+
+ file_bfile = cPar.file_bfile;
+ file_geno = cPar.file_geno;
+ file_oxford = cPar.file_oxford;
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
+
+ l_min = cPar.l_min;
+ l_max = cPar.l_max;
+ n_region = cPar.n_region;
+ p_nr = cPar.p_nr;
+ em_iter = cPar.em_iter;
+ nr_iter = cPar.nr_iter;
+ em_prec = cPar.em_prec;
+ nr_prec = cPar.nr_prec;
+ crt = cPar.crt;
+
+ Vg_remle_null = cPar.Vg_remle_null;
+ Ve_remle_null = cPar.Ve_remle_null;
+ Vg_mle_null = cPar.Vg_mle_null;
+ Ve_mle_null = cPar.Ve_mle_null;
+
+ time_UtX = 0.0;
+ time_opt = 0.0;
+
+ ni_total = cPar.ni_total;
+ ns_total = cPar.ns_total;
+ ni_test = cPar.ni_test;
+ ns_test = cPar.ns_test;
+ n_cvt = cPar.n_cvt;
+
+ n_ph = cPar.n_ph;
+
+ indicator_idv = cPar.indicator_idv;
+ indicator_snp = cPar.indicator_snp;
+ snpInfo = cPar.snpInfo;
+
+ return;
}
-void MVLMM::CopyToParam (PARAM &cPar) {
- cPar.time_UtX=time_UtX;
- cPar.time_opt=time_opt;
+void MVLMM::CopyToParam(PARAM &cPar) {
+ cPar.time_UtX = time_UtX;
+ cPar.time_opt = time_opt;
- cPar.Vg_remle_null=Vg_remle_null;
- cPar.Ve_remle_null=Ve_remle_null;
- cPar.Vg_mle_null=Vg_mle_null;
- cPar.Ve_mle_null=Ve_mle_null;
+ cPar.Vg_remle_null = Vg_remle_null;
+ cPar.Ve_remle_null = Ve_remle_null;
+ cPar.Vg_mle_null = Vg_mle_null;
+ cPar.Ve_mle_null = Ve_mle_null;
- cPar.VVg_remle_null=VVg_remle_null;
- cPar.VVe_remle_null=VVe_remle_null;
- cPar.VVg_mle_null=VVg_mle_null;
- cPar.VVe_mle_null=VVe_mle_null;
+ cPar.VVg_remle_null = VVg_remle_null;
+ cPar.VVe_remle_null = VVe_remle_null;
+ cPar.VVg_mle_null = VVg_mle_null;
+ cPar.VVe_mle_null = VVe_mle_null;
- cPar.beta_remle_null=beta_remle_null;
- cPar.se_beta_remle_null=se_beta_remle_null;
- cPar.beta_mle_null=beta_mle_null;
- cPar.se_beta_mle_null=se_beta_mle_null;
+ cPar.beta_remle_null = beta_remle_null;
+ cPar.se_beta_remle_null = se_beta_remle_null;
+ cPar.beta_mle_null = beta_mle_null;
+ cPar.se_beta_mle_null = se_beta_mle_null;
- cPar.logl_remle_H0=logl_remle_H0;
- cPar.logl_mle_H0=logl_mle_H0;
- return;
+ cPar.logl_remle_H0 = logl_remle_H0;
+ cPar.logl_mle_H0 = logl_mle_H0;
+ return;
}
-void MVLMM::WriteFiles () {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".assoc.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"
- <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
- for (size_t i=0; i<n_ph; i++) {
- outfile<<"beta_"<<i+1<<"\t";
- }
- for (size_t i=0; i<n_ph; i++) {
- for (size_t j=i; j<n_ph; j++) {
- outfile<<"Vbeta_"<<i+1<<"_"<<j+1<<"\t";
- }
- }
-
- if (a_mode==1) {
- outfile<<"p_wald"<<endl;
- } else if (a_mode==2) {
- outfile<<"p_lrt"<<endl;
- } else if (a_mode==3) {
- outfile<<"p_score"<<endl;
- } else if (a_mode==4) {
- outfile<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
- } else {}
-
-
- size_t t=0, c=0;
- for (size_t i=0; i<snpInfo.size(); ++i) {
- if (indicator_snp[i]==0) {continue;}
-
- outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
- <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<
- "\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<
- fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
- outfile<<scientific<<setprecision(6);
-
- for (size_t i=0; i<n_ph; i++) {
- outfile<<sumStat[t].v_beta[i]<<"\t";
- }
-
- c=0;
- for (size_t i=0; i<n_ph; i++) {
- for (size_t j=i; j<n_ph; j++) {
- outfile<<sumStat[t].v_Vbeta[c]<<"\t";
- c++;
- }
- }
-
- if (a_mode==1) {
- outfile<<sumStat[t].p_wald <<endl;
- } else if (a_mode==2) {
- outfile<<sumStat[t].p_lrt<<endl;
- } else if (a_mode==3) {
- outfile<<sumStat[t].p_score<<endl;
- } else if (a_mode==4) {
- outfile<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<
- "\t"<<sumStat[t].p_score<<endl;
- } else {}
-
- t++;
- }
-
- outfile.close();
- outfile.clear();
- return;
+void MVLMM::WriteFiles() {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".assoc.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ outfile << "chr"
+ << "\t"
+ << "rs"
+ << "\t"
+ << "ps"
+ << "\t"
+ << "n_miss"
+ << "\t"
+ << "allele1"
+ << "\t"
+ << "allele0"
+ << "\t"
+ << "af"
+ << "\t";
+
+ for (size_t i = 0; i < n_ph; i++) {
+ outfile << "beta_" << i + 1 << "\t";
+ }
+ for (size_t i = 0; i < n_ph; i++) {
+ for (size_t j = i; j < n_ph; j++) {
+ outfile << "Vbeta_" << i + 1 << "_" << j + 1 << "\t";
+ }
+ }
+
+ if (a_mode == 1) {
+ outfile << "p_wald" << endl;
+ } else if (a_mode == 2) {
+ outfile << "p_lrt" << endl;
+ } else if (a_mode == 3) {
+ outfile << "p_score" << endl;
+ } else if (a_mode == 4) {
+ outfile << "p_wald"
+ << "\t"
+ << "p_lrt"
+ << "\t"
+ << "p_score" << endl;
+ } else {
+ }
+
+ size_t t = 0, c = 0;
+ for (size_t i = 0; i < snpInfo.size(); ++i) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+ << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
+ << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" << fixed
+ << setprecision(3) << snpInfo[i].maf << "\t";
+
+ outfile << scientific << setprecision(6);
+
+ for (size_t i = 0; i < n_ph; i++) {
+ outfile << sumStat[t].v_beta[i] << "\t";
+ }
+
+ c = 0;
+ for (size_t i = 0; i < n_ph; i++) {
+ for (size_t j = i; j < n_ph; j++) {
+ outfile << sumStat[t].v_Vbeta[c] << "\t";
+ c++;
+ }
+ }
+
+ if (a_mode == 1) {
+ outfile << sumStat[t].p_wald << endl;
+ } else if (a_mode == 2) {
+ outfile << sumStat[t].p_lrt << endl;
+ } else if (a_mode == 3) {
+ outfile << sumStat[t].p_score << endl;
+ } else if (a_mode == 4) {
+ outfile << sumStat[t].p_wald << "\t" << sumStat[t].p_lrt << "\t"
+ << sumStat[t].p_score << endl;
+ } else {
+ }
+
+ t++;
+ }
+
+ outfile.close();
+ outfile.clear();
+ return;
}
// Below are functions for EM algorithm.
-double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e,
- gsl_vector *D_l, gsl_matrix *UltVeh,
- gsl_matrix *UltVehi) {
- size_t d_size=V_g->size1;
- double d, logdet_Ve=0.0;
-
- // Eigen decomposition of V_e.
- gsl_matrix *Lambda=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e_temp=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e_h=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e_hi=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *VgVehi=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *U_l=gsl_matrix_alloc (d_size, d_size);
-
- gsl_matrix_memcpy(V_e_temp, V_e);
- EigenDecomp(V_e_temp, U_l, D_l, 0);
-
- // Calculate V_e_h and V_e_hi.
- gsl_matrix_set_zero(V_e_h);
- gsl_matrix_set_zero(V_e_hi);
- for (size_t i=0; i<d_size; i++) {
- d=gsl_vector_get (D_l, i);
- if (d<=0) {continue;}
- logdet_Ve+=log(d);
-
- gsl_vector_view U_col=gsl_matrix_column(U_l, i);
- d=sqrt(d);
- gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_h);
- d=1.0/d;
- gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_hi);
- }
-
- // Copy the upper part to lower part.
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<i; j++) {
- gsl_matrix_set (V_e_h, i, j, gsl_matrix_get(V_e_h, j, i));
- gsl_matrix_set (V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i));
- }
- }
-
- // Calculate Lambda=V_ehi V_g V_ehi.
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,V_g,V_e_hi,0.0,VgVehi);
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,V_e_hi,VgVehi,0.0,Lambda);
-
- // Eigen decomposition of Lambda.
- EigenDecomp(Lambda, U_l, D_l, 0);
-
- for (size_t i=0; i<d_size; i++) {
- d=gsl_vector_get (D_l, i);
- if (d<0) {gsl_vector_set (D_l, i, 0);}
- }
-
- // Calculate UltVeh and UltVehi.
- gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,U_l,V_e_h,0.0,UltVeh);
- gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,U_l,V_e_hi,0.0,UltVehi);
-
- //free memory
- gsl_matrix_free (Lambda);
- gsl_matrix_free (V_e_temp);
- gsl_matrix_free (V_e_h);
- gsl_matrix_free (V_e_hi);
- gsl_matrix_free (VgVehi);
- gsl_matrix_free (U_l);
-
- return logdet_Ve;
+double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
+ gsl_matrix *UltVeh, gsl_matrix *UltVehi) {
+ size_t d_size = V_g->size1;
+ double d, logdet_Ve = 0.0;
+
+ // Eigen decomposition of V_e.
+ gsl_matrix *Lambda = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e_temp = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e_h = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e_hi = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *VgVehi = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *U_l = gsl_matrix_alloc(d_size, d_size);
+
+ gsl_matrix_memcpy(V_e_temp, V_e);
+ EigenDecomp(V_e_temp, U_l, D_l, 0);
+
+ // Calculate V_e_h and V_e_hi.
+ gsl_matrix_set_zero(V_e_h);
+ gsl_matrix_set_zero(V_e_hi);
+ for (size_t i = 0; i < d_size; i++) {
+ d = gsl_vector_get(D_l, i);
+ if (d <= 0) {
+ continue;
+ }
+ logdet_Ve += log(d);
+
+ gsl_vector_view U_col = gsl_matrix_column(U_l, i);
+ d = sqrt(d);
+ gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_h);
+ d = 1.0 / d;
+ gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_hi);
+ }
+
+ // Copy the upper part to lower part.
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j < i; j++) {
+ gsl_matrix_set(V_e_h, i, j, gsl_matrix_get(V_e_h, j, i));
+ gsl_matrix_set(V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i));
+ }
+ }
+
+ // Calculate Lambda=V_ehi V_g V_ehi.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_g, V_e_hi, 0.0, VgVehi);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_e_hi, VgVehi, 0.0, Lambda);
+
+ // Eigen decomposition of Lambda.
+ EigenDecomp(Lambda, U_l, D_l, 0);
+
+ for (size_t i = 0; i < d_size; i++) {
+ d = gsl_vector_get(D_l, i);
+ if (d < 0) {
+ gsl_vector_set(D_l, i, 0);
+ }
+ }
+
+ // Calculate UltVeh and UltVehi.
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_h, 0.0, UltVeh);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_hi, 0.0, UltVehi);
+
+ // free memory
+ gsl_matrix_free(Lambda);
+ gsl_matrix_free(V_e_temp);
+ gsl_matrix_free(V_e_h);
+ gsl_matrix_free(V_e_hi);
+ gsl_matrix_free(VgVehi);
+ gsl_matrix_free(U_l);
+
+ return logdet_Ve;
}
-//Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}.
-double CalcQi (const gsl_vector *eval, const gsl_vector *D_l,
- const gsl_matrix *X, gsl_matrix *Qi) {
- size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1;
- size_t c_size=dc_size/d_size;
-
- double delta, dl, d1, d2, d, logdet_Q;
-
- gsl_matrix *Q=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix_set_zero (Q);
-
- for (size_t i=0; i<c_size; i++) {
- for (size_t j=0; j<c_size; j++) {
- for (size_t l=0; l<d_size; l++) {
- dl=gsl_vector_get(D_l, l);
-
- if (j<i) {
- d=gsl_matrix_get (Q, j*d_size+l, i*d_size+l);
- } else {
- d=0.0;
- for (size_t k=0; k<n_size; k++) {
- d1=gsl_matrix_get(X, i, k);
- d2=gsl_matrix_get(X, j, k);
- delta=gsl_vector_get(eval, k);
- d+=d1*d2/(dl*delta+1.0);
- }
- }
-
- gsl_matrix_set (Q, i*d_size+l, j*d_size+l, d);
- }
- }
- }
-
- // Calculate LU decomposition of Q, and invert Q and calculate |Q|.
- int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (dc_size);
- LUDecomp (Q, pmt, &sig);
- LUInvert (Q, pmt, Qi);
-
- logdet_Q=LULndet (Q);
-
- gsl_matrix_free (Q);
- gsl_permutation_free (pmt);
-
- return logdet_Q;
+// Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}.
+double CalcQi(const gsl_vector *eval, const gsl_vector *D_l,
+ const gsl_matrix *X, gsl_matrix *Qi) {
+ size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1;
+ size_t c_size = dc_size / d_size;
+
+ double delta, dl, d1, d2, d, logdet_Q;
+
+ gsl_matrix *Q = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix_set_zero(Q);
+
+ for (size_t i = 0; i < c_size; i++) {
+ for (size_t j = 0; j < c_size; j++) {
+ for (size_t l = 0; l < d_size; l++) {
+ dl = gsl_vector_get(D_l, l);
+
+ if (j < i) {
+ d = gsl_matrix_get(Q, j * d_size + l, i * d_size + l);
+ } else {
+ d = 0.0;
+ for (size_t k = 0; k < n_size; k++) {
+ d1 = gsl_matrix_get(X, i, k);
+ d2 = gsl_matrix_get(X, j, k);
+ delta = gsl_vector_get(eval, k);
+ d += d1 * d2 / (dl * delta + 1.0);
+ }
+ }
+
+ gsl_matrix_set(Q, i * d_size + l, j * d_size + l, d);
+ }
+ }
+ }
+
+ // Calculate LU decomposition of Q, and invert Q and calculate |Q|.
+ int sig;
+ gsl_permutation *pmt = gsl_permutation_alloc(dc_size);
+ LUDecomp(Q, pmt, &sig);
+ LUInvert(Q, pmt, Qi);
+
+ logdet_Q = LULndet(Q);
+
+ gsl_matrix_free(Q);
+ gsl_permutation_free(pmt);
+
+ return logdet_Q;
}
// xHiy=\sum_{k=1}^n x_k\otimes ((delta_k*Dl+I)^{-1}Ul^TVe^{-1/2}y.
-void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l,
- const gsl_matrix *X, const gsl_matrix *UltVehiY,
- gsl_vector *xHiy) {
- size_t n_size=eval->size, c_size=X->size1, d_size=D_l->size;
-
- gsl_vector_set_zero (xHiy);
-
- double x, delta, dl, y, d;
- for (size_t i=0; i<d_size; i++) {
- dl=gsl_vector_get(D_l, i);
- for (size_t j=0; j<c_size; j++) {
- d=0.0;
- for (size_t k=0; k<n_size; k++) {
- x=gsl_matrix_get(X, j, k);
- y=gsl_matrix_get(UltVehiY, i, k);
- delta=gsl_vector_get(eval, k);
- d+=x*y/(delta*dl+1.0);
- }
- gsl_vector_set(xHiy, j*d_size+i, d);
- }
- }
-
- return;
-}
+void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l,
+ const gsl_matrix *X, const gsl_matrix *UltVehiY,
+ gsl_vector *xHiy) {
+ size_t n_size = eval->size, c_size = X->size1, d_size = D_l->size;
+
+ gsl_vector_set_zero(xHiy);
+
+ double x, delta, dl, y, d;
+ for (size_t i = 0; i < d_size; i++) {
+ dl = gsl_vector_get(D_l, i);
+ for (size_t j = 0; j < c_size; j++) {
+ d = 0.0;
+ for (size_t k = 0; k < n_size; k++) {
+ x = gsl_matrix_get(X, j, k);
+ y = gsl_matrix_get(UltVehiY, i, k);
+ delta = gsl_vector_get(eval, k);
+ d += x * y / (delta * dl + 1.0);
+ }
+ gsl_vector_set(xHiy, j * d_size + i, d);
+ }
+ }
+ return;
+}
// OmegaU=D_l/(delta Dl+I)^{-1}
// OmegaE=delta D_l/(delta Dl+I)^{-1}
-void CalcOmega (const gsl_vector *eval, const gsl_vector *D_l,
-gsl_matrix *OmegaU, gsl_matrix *OmegaE) {
- size_t n_size=eval->size, d_size=D_l->size;
- double delta, dl, d_u, d_e;
+void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l,
+ gsl_matrix *OmegaU, gsl_matrix *OmegaE) {
+ size_t n_size = eval->size, d_size = D_l->size;
+ double delta, dl, d_u, d_e;
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get(eval, k);
- for (size_t i=0; i<d_size; i++) {
- dl=gsl_vector_get(D_l, i);
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ for (size_t i = 0; i < d_size; i++) {
+ dl = gsl_vector_get(D_l, i);
- d_u=dl/(delta*dl+1.0);
- d_e=delta*d_u;
+ d_u = dl / (delta * dl + 1.0);
+ d_e = delta * d_u;
- gsl_matrix_set(OmegaU, i, k, d_u);
- gsl_matrix_set(OmegaE, i, k, d_e);
- }
- }
+ gsl_matrix_set(OmegaU, i, k, d_u);
+ gsl_matrix_set(OmegaE, i, k, d_e);
+ }
+ }
- return;
+ return;
}
-void UpdateU (const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY,
- const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) {
- gsl_matrix_memcpy (UltVehiU, UltVehiY);
- gsl_matrix_sub (UltVehiU, UltVehiBX);
+void UpdateU(const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY,
+ const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) {
+ gsl_matrix_memcpy(UltVehiU, UltVehiY);
+ gsl_matrix_sub(UltVehiU, UltVehiBX);
- gsl_matrix_mul_elements (UltVehiU, OmegaE);
- return;
+ gsl_matrix_mul_elements(UltVehiU, OmegaE);
+ return;
}
-void UpdateE (const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX,
- const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) {
- gsl_matrix_memcpy (UltVehiE, UltVehiY);
- gsl_matrix_sub (UltVehiE, UltVehiBX);
- gsl_matrix_sub (UltVehiE, UltVehiU);
+void UpdateE(const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX,
+ const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) {
+ gsl_matrix_memcpy(UltVehiE, UltVehiY);
+ gsl_matrix_sub(UltVehiE, UltVehiBX);
+ gsl_matrix_sub(UltVehiE, UltVehiU);
- return;
+ return;
}
-void UpdateL_B (const gsl_matrix *X, const gsl_matrix *XXti,
- const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU,
- gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) {
- size_t c_size=X->size1, d_size=UltVehiY->size1;
+void UpdateL_B(const gsl_matrix *X, const gsl_matrix *XXti,
+ const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU,
+ gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) {
+ size_t c_size = X->size1, d_size = UltVehiY->size1;
- gsl_matrix *YUX=gsl_matrix_alloc (d_size, c_size);
+ gsl_matrix *YUX = gsl_matrix_alloc(d_size, c_size);
- gsl_matrix_memcpy (UltVehiBX, UltVehiY);
- gsl_matrix_sub (UltVehiBX, UltVehiU);
+ gsl_matrix_memcpy(UltVehiBX, UltVehiY);
+ gsl_matrix_sub(UltVehiBX, UltVehiU);
- gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,UltVehiBX,X,0.0,YUX);
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,YUX,XXti,0.0,UltVehiB);
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, UltVehiBX, X, 0.0, YUX);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, YUX, XXti, 0.0, UltVehiB);
- gsl_matrix_free(YUX);
+ gsl_matrix_free(YUX);
- return;
+ return;
}
-void UpdateRL_B (const gsl_vector *xHiy, const gsl_matrix *Qi,
- gsl_matrix *UltVehiB) {
- size_t d_size=UltVehiB->size1, c_size=UltVehiB->size2,
- dc_size=Qi->size1;
+void UpdateRL_B(const gsl_vector *xHiy, const gsl_matrix *Qi,
+ gsl_matrix *UltVehiB) {
+ size_t d_size = UltVehiB->size1, c_size = UltVehiB->size2,
+ dc_size = Qi->size1;
- gsl_vector *b=gsl_vector_alloc (dc_size);
+ gsl_vector *b = gsl_vector_alloc(dc_size);
- // Calculate b=Qiv.
- gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b);
+ // Calculate b=Qiv.
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b);
- // Copy b to UltVehiB.
- for (size_t i=0; i<c_size; i++) {
- gsl_vector_view UltVehiB_col=gsl_matrix_column (UltVehiB, i);
- gsl_vector_const_view b_subcol=
- gsl_vector_const_subvector (b, i*d_size, d_size);
- gsl_vector_memcpy (&UltVehiB_col.vector, &b_subcol.vector);
- }
+ // Copy b to UltVehiB.
+ for (size_t i = 0; i < c_size; i++) {
+ gsl_vector_view UltVehiB_col = gsl_matrix_column(UltVehiB, i);
+ gsl_vector_const_view b_subcol =
+ gsl_vector_const_subvector(b, i * d_size, d_size);
+ gsl_vector_memcpy(&UltVehiB_col.vector, &b_subcol.vector);
+ }
- gsl_vector_free(b);
+ gsl_vector_free(b);
- return;
+ return;
}
-void UpdateV (const gsl_vector *eval, const gsl_matrix *U,
- const gsl_matrix *E, const gsl_matrix *Sigma_uu,
- const gsl_matrix *Sigma_ee, gsl_matrix *V_g, gsl_matrix *V_e) {
- size_t n_size=eval->size, d_size=U->size1;
+void UpdateV(const gsl_vector *eval, const gsl_matrix *U, const gsl_matrix *E,
+ const gsl_matrix *Sigma_uu, const gsl_matrix *Sigma_ee,
+ gsl_matrix *V_g, gsl_matrix *V_e) {
+ size_t n_size = eval->size, d_size = U->size1;
- gsl_matrix_set_zero (V_g);
- gsl_matrix_set_zero (V_e);
+ gsl_matrix_set_zero(V_g);
+ gsl_matrix_set_zero(V_e);
- double delta;
+ double delta;
- // Calculate the first part: UD^{-1}U^T and EE^T.
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
- if (delta==0) {continue;}
+ // Calculate the first part: UD^{-1}U^T and EE^T.
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ if (delta == 0) {
+ continue;
+ }
- gsl_vector_const_view U_col=gsl_matrix_const_column (U, k);
- gsl_blas_dsyr (CblasUpper, 1.0/delta, &U_col.vector, V_g);
- }
+ gsl_vector_const_view U_col = gsl_matrix_const_column(U, k);
+ gsl_blas_dsyr(CblasUpper, 1.0 / delta, &U_col.vector, V_g);
+ }
- gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
+ gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
- // Copy the upper part to lower part.
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<i; j++) {
- gsl_matrix_set (V_g, i, j, gsl_matrix_get(V_g, j, i));
- gsl_matrix_set (V_e, i, j, gsl_matrix_get(V_e, j, i));
- }
- }
+ // Copy the upper part to lower part.
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j < i; j++) {
+ gsl_matrix_set(V_g, i, j, gsl_matrix_get(V_g, j, i));
+ gsl_matrix_set(V_e, i, j, gsl_matrix_get(V_e, j, i));
+ }
+ }
- // Add Sigma.
- gsl_matrix_add (V_g, Sigma_uu);
- gsl_matrix_add (V_e, Sigma_ee);
+ // Add Sigma.
+ gsl_matrix_add(V_g, Sigma_uu);
+ gsl_matrix_add(V_e, Sigma_ee);
- // Scale by 1/n.
- gsl_matrix_scale (V_g, 1.0/(double)n_size);
- gsl_matrix_scale (V_e, 1.0/(double)n_size);
+ // Scale by 1/n.
+ gsl_matrix_scale(V_g, 1.0 / (double)n_size);
+ gsl_matrix_scale(V_e, 1.0 / (double)n_size);
- return;
+ return;
}
-void CalcSigma (const char func_name, const gsl_vector *eval,
- const gsl_vector *D_l, const gsl_matrix *X,
- const gsl_matrix *OmegaU, const gsl_matrix *OmegaE,
- const gsl_matrix *UltVeh, const gsl_matrix *Qi,
- gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) {
- if (func_name!='R' && func_name!='L' && func_name!='r' &&
- func_name!='l') {
- cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
- "likelihood, 'L' for log-likelihood."<<endl;
- return;
- }
-
- size_t n_size=eval->size, c_size=X->size1;
- size_t d_size=D_l->size, dc_size=Qi->size1;
-
- gsl_matrix_set_zero(Sigma_uu);
- gsl_matrix_set_zero(Sigma_ee);
-
- double delta, dl, x, d;
-
- // Calculate the first diagonal term.
- gsl_vector_view Suu_diag=gsl_matrix_diagonal (Sigma_uu);
- gsl_vector_view See_diag=gsl_matrix_diagonal (Sigma_ee);
-
- for (size_t k=0; k<n_size; k++) {
- gsl_vector_const_view OmegaU_col=gsl_matrix_const_column (OmegaU, k);
- gsl_vector_const_view OmegaE_col=gsl_matrix_const_column (OmegaE, k);
-
- gsl_vector_add (&Suu_diag.vector, &OmegaU_col.vector);
- gsl_vector_add (&See_diag.vector, &OmegaE_col.vector);
- }
-
- // Calculate the second term for REML.
- if (func_name=='R' || func_name=='r') {
- gsl_matrix *M_u=gsl_matrix_alloc(dc_size, d_size);
- gsl_matrix *M_e=gsl_matrix_alloc(dc_size, d_size);
- gsl_matrix *QiM=gsl_matrix_alloc(dc_size, d_size);
-
- gsl_matrix_set_zero(M_u);
- gsl_matrix_set_zero(M_e);
-
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get(eval, k);
-
- for (size_t i=0; i<d_size; i++) {
- dl=gsl_vector_get(D_l, i);
- for (size_t j=0; j<c_size; j++) {
- x=gsl_matrix_get(X, j, k);
- d=x/(delta*dl+1.0);
- gsl_matrix_set(M_e, j*d_size+i, i, d);
- gsl_matrix_set(M_u, j*d_size+i, i, d*dl);
- }
- }
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Qi,M_u,0.0,QiM);
- gsl_blas_dgemm(CblasTrans,CblasNoTrans,delta,M_u,QiM,1.0,
- Sigma_uu);
-
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Qi,M_e,0.0,QiM);
- gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,M_e,QiM,1.0,
- Sigma_ee);
- }
-
- gsl_matrix_free(M_u);
- gsl_matrix_free(M_e);
- gsl_matrix_free(QiM);
- }
-
- // Multiply both sides by VehUl.
- gsl_matrix *M=gsl_matrix_alloc (d_size, d_size);
-
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Sigma_uu,UltVeh,0.0,M);
- gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,M,0.0,Sigma_uu);
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Sigma_ee,UltVeh,0.0,M);
- gsl_blas_dgemm(CblasTrans, CblasNoTrans,1.0,UltVeh,M,0.0,Sigma_ee);
-
- gsl_matrix_free(M);
- return;
+void CalcSigma(const char func_name, const gsl_vector *eval,
+ const gsl_vector *D_l, const gsl_matrix *X,
+ const gsl_matrix *OmegaU, const gsl_matrix *OmegaE,
+ const gsl_matrix *UltVeh, const gsl_matrix *Qi,
+ gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) {
+ if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+ func_name != 'l') {
+ cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+ << "likelihood, 'L' for log-likelihood." << endl;
+ return;
+ }
+
+ size_t n_size = eval->size, c_size = X->size1;
+ size_t d_size = D_l->size, dc_size = Qi->size1;
+
+ gsl_matrix_set_zero(Sigma_uu);
+ gsl_matrix_set_zero(Sigma_ee);
+
+ double delta, dl, x, d;
+
+ // Calculate the first diagonal term.
+ gsl_vector_view Suu_diag = gsl_matrix_diagonal(Sigma_uu);
+ gsl_vector_view See_diag = gsl_matrix_diagonal(Sigma_ee);
+
+ for (size_t k = 0; k < n_size; k++) {
+ gsl_vector_const_view OmegaU_col = gsl_matrix_const_column(OmegaU, k);
+ gsl_vector_const_view OmegaE_col = gsl_matrix_const_column(OmegaE, k);
+
+ gsl_vector_add(&Suu_diag.vector, &OmegaU_col.vector);
+ gsl_vector_add(&See_diag.vector, &OmegaE_col.vector);
+ }
+
+ // Calculate the second term for REML.
+ if (func_name == 'R' || func_name == 'r') {
+ gsl_matrix *M_u = gsl_matrix_alloc(dc_size, d_size);
+ gsl_matrix *M_e = gsl_matrix_alloc(dc_size, d_size);
+ gsl_matrix *QiM = gsl_matrix_alloc(dc_size, d_size);
+
+ gsl_matrix_set_zero(M_u);
+ gsl_matrix_set_zero(M_e);
+
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+
+ for (size_t i = 0; i < d_size; i++) {
+ dl = gsl_vector_get(D_l, i);
+ for (size_t j = 0; j < c_size; j++) {
+ x = gsl_matrix_get(X, j, k);
+ d = x / (delta * dl + 1.0);
+ gsl_matrix_set(M_e, j * d_size + i, i, d);
+ gsl_matrix_set(M_u, j * d_size + i, i, d * dl);
+ }
+ }
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_u, 0.0, QiM);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, delta, M_u, QiM, 1.0, Sigma_uu);
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_e, 0.0, QiM);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, M_e, QiM, 1.0, Sigma_ee);
+ }
+
+ gsl_matrix_free(M_u);
+ gsl_matrix_free(M_e);
+ gsl_matrix_free(QiM);
+ }
+
+ // Multiply both sides by VehUl.
+ gsl_matrix *M = gsl_matrix_alloc(d_size, d_size);
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_uu, UltVeh, 0.0, M);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_uu);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_ee, UltVeh, 0.0, M);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_ee);
+
+ gsl_matrix_free(M);
+ return;
}
// 'R' for restricted likelihood and 'L' for likelihood.
// 'R' update B and 'L' don't.
// only calculate -0.5*\sum_{k=1}^n|H_k|-0.5yPxy.
-double MphCalcLogL (const gsl_vector *eval, const gsl_vector *xHiy,
- const gsl_vector *D_l, const gsl_matrix *UltVehiY,
- const gsl_matrix *Qi) {
- size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1;
- double logl=0.0, delta, dl, y, d;
-
- // Calculate yHiy+log|H_k|.
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get(eval, k);
- for (size_t i=0; i<d_size; i++) {
- y=gsl_matrix_get(UltVehiY, i, k);
- dl=gsl_vector_get(D_l, i);
- d=delta*dl+1.0;
-
- logl+=y*y/d+log(d);
- }
- }
+double MphCalcLogL(const gsl_vector *eval, const gsl_vector *xHiy,
+ const gsl_vector *D_l, const gsl_matrix *UltVehiY,
+ const gsl_matrix *Qi) {
+ size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1;
+ double logl = 0.0, delta, dl, y, d;
+
+ // Calculate yHiy+log|H_k|.
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ for (size_t i = 0; i < d_size; i++) {
+ y = gsl_matrix_get(UltVehiY, i, k);
+ dl = gsl_vector_get(D_l, i);
+ d = delta * dl + 1.0;
+
+ logl += y * y / d + log(d);
+ }
+ }
- // Calculate the rest of yPxy.
- gsl_vector *Qiv=gsl_vector_alloc(dc_size);
+ // Calculate the rest of yPxy.
+ gsl_vector *Qiv = gsl_vector_alloc(dc_size);
- gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv);
- gsl_blas_ddot(xHiy, Qiv, &d);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv);
+ gsl_blas_ddot(xHiy, Qiv, &d);
- logl-=d;
+ logl -= d;
- gsl_vector_free(Qiv);
+ gsl_vector_free(Qiv);
- return -0.5*logl;
+ return -0.5 * logl;
}
// Y is a dxn matrix, X is a cxn matrix, B is a dxc matrix, V_g is a
// dxd matrix, V_e is a dxd matrix, eval is a size n vector
//'R' for restricted likelihood and 'L' for likelihood.
-double MphEM (const char func_name, const size_t max_iter,
- const double max_prec, const gsl_vector *eval,
- const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *U_hat,
- gsl_matrix *E_hat, gsl_matrix *OmegaU, gsl_matrix *OmegaE,
- gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX,
- gsl_matrix *UltVehiU, gsl_matrix *UltVehiE,
- gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B) {
- if (func_name!='R' && func_name!='L' &&
- func_name!='r' && func_name!='l') {
- cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
- "likelihood, 'L' for log-likelihood."<<endl;
- return 0.0;
- }
-
- size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
- size_t dc_size=d_size*c_size;
-
- gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size);
- gsl_matrix *XXti=gsl_matrix_alloc (c_size, c_size);
- gsl_vector *D_l=gsl_vector_alloc (d_size);
- gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *UltVehiB=gsl_matrix_alloc (d_size, c_size);
- gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *Sigma_uu=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *Sigma_ee=gsl_matrix_alloc (d_size, d_size);
- gsl_vector *xHiy=gsl_vector_alloc (dc_size);
- gsl_permutation * pmt=gsl_permutation_alloc (c_size);
-
- double logl_const=0.0, logl_old=0.0, logl_new=0.0;
- double logdet_Q, logdet_Ve;
- int sig;
-
- // Calculate |XXt| and (XXt)^{-1}.
- gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
- for (size_t i=0; i<c_size; ++i) {
- for (size_t j=0; j<i; ++j) {
- gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i));
- }
- }
-
- LUDecomp (XXt, pmt, &sig);
- LUInvert (XXt, pmt, XXti);
-
- // Calculate the constant for logl.
- if (func_name=='R' || func_name=='r') {
- logl_const=-0.5*(double)(n_size-c_size)*
- (double)d_size*log(2.0*M_PI)+0.5*(double)d_size*LULndet (XXt);
- } else {
- logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI);
- }
-
- // Start EM.
- for (size_t t=0; t<max_iter; t++) {
- logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
- logdet_Q=CalcQi (eval, D_l, X, Qi);
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi,
- Y, 0.0, UltVehiY);
- CalcXHiY(eval, D_l, X, UltVehiY, xHiy);
-
- // Calculate log likelihood/restricted likelihood value, and
- // terminate if change is small.
- logl_new=logl_const+MphCalcLogL (eval, xHiy, D_l, UltVehiY, Qi) -
- 0.5*(double)n_size*logdet_Ve;
- if (func_name=='R' || func_name=='r') {
- logl_new+=-0.5*(logdet_Q-(double)c_size*logdet_Ve);
- }
- if (t!=0 && abs(logl_new-logl_old)<max_prec) {break;}
- logl_old=logl_new;
-
- CalcOmega (eval, D_l, OmegaU, OmegaE);
-
- // Update UltVehiB, UltVehiU.
- if (func_name=='R' || func_name=='r') {
- UpdateRL_B(xHiy, Qi, UltVehiB);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X,
- 0.0, UltVehiBX);
- } else if (t==0) {
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B,
- 0.0, UltVehiB);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X,
- 0.0, UltVehiBX);
- }
-
- UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU);
-
- if (func_name=='L' || func_name=='l') {
-
- // UltVehiBX is destroyed here.
- UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X,
- 0.0, UltVehiBX);
- }
-
- UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE);
-
- // Calculate U_hat, E_hat and B.
- gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiU,
- 0.0,U_hat);
- gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiE,
- 0.0,E_hat);
- gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiB,0.0,B);
-
- // Calculate Sigma_uu and Sigma_ee.
- CalcSigma (func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh,
- Qi, Sigma_uu, Sigma_ee);
-
- // Update V_g and V_e.
- UpdateV (eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
- }
-
- gsl_matrix_free(XXt);
- gsl_matrix_free(XXti);
- gsl_vector_free(D_l);
- gsl_matrix_free(UltVeh);
- gsl_matrix_free(UltVehi);
- gsl_matrix_free(UltVehiB);
- gsl_matrix_free(Qi);
- gsl_matrix_free(Sigma_uu);
- gsl_matrix_free(Sigma_ee);
- gsl_vector_free(xHiy);
- gsl_permutation_free(pmt);
-
- return logl_new;
+double MphEM(const char func_name, const size_t max_iter, const double max_prec,
+ const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y,
+ gsl_matrix *U_hat, gsl_matrix *E_hat, gsl_matrix *OmegaU,
+ gsl_matrix *OmegaE, gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX,
+ gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, gsl_matrix *V_g,
+ gsl_matrix *V_e, gsl_matrix *B) {
+ if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+ func_name != 'l') {
+ cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+ << "likelihood, 'L' for log-likelihood." << endl;
+ return 0.0;
+ }
+
+ size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+ size_t dc_size = d_size * c_size;
+
+ gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size);
+ gsl_matrix *XXti = gsl_matrix_alloc(c_size, c_size);
+ gsl_vector *D_l = gsl_vector_alloc(d_size);
+ gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *UltVehiB = gsl_matrix_alloc(d_size, c_size);
+ gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *Sigma_uu = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *Sigma_ee = gsl_matrix_alloc(d_size, d_size);
+ gsl_vector *xHiy = gsl_vector_alloc(dc_size);
+ gsl_permutation *pmt = gsl_permutation_alloc(c_size);
+
+ double logl_const = 0.0, logl_old = 0.0, logl_new = 0.0;
+ double logdet_Q, logdet_Ve;
+ int sig;
+
+ // Calculate |XXt| and (XXt)^{-1}.
+ gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
+ for (size_t i = 0; i < c_size; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i));
+ }
+ }
+
+ LUDecomp(XXt, pmt, &sig);
+ LUInvert(XXt, pmt, XXti);
+
+ // Calculate the constant for logl.
+ if (func_name == 'R' || func_name == 'r') {
+ logl_const =
+ -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) +
+ 0.5 * (double)d_size * LULndet(XXt);
+ } else {
+ logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI);
+ }
+
+ // Start EM.
+ for (size_t t = 0; t < max_iter; t++) {
+ logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+ logdet_Q = CalcQi(eval, D_l, X, Qi);
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+ CalcXHiY(eval, D_l, X, UltVehiY, xHiy);
+
+ // Calculate log likelihood/restricted likelihood value, and
+ // terminate if change is small.
+ logl_new = logl_const + MphCalcLogL(eval, xHiy, D_l, UltVehiY, Qi) -
+ 0.5 * (double)n_size * logdet_Ve;
+ if (func_name == 'R' || func_name == 'r') {
+ logl_new += -0.5 * (logdet_Q - (double)c_size * logdet_Ve);
+ }
+ if (t != 0 && abs(logl_new - logl_old) < max_prec) {
+ break;
+ }
+ logl_old = logl_new;
+
+ CalcOmega(eval, D_l, OmegaU, OmegaE);
+
+ // Update UltVehiB, UltVehiU.
+ if (func_name == 'R' || func_name == 'r') {
+ UpdateRL_B(xHiy, Qi, UltVehiB);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+ UltVehiBX);
+ } else if (t == 0) {
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, 0.0,
+ UltVehiB);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+ UltVehiBX);
+ }
+
+ UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU);
+
+ if (func_name == 'L' || func_name == 'l') {
+
+ // UltVehiBX is destroyed here.
+ UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0,
+ UltVehiBX);
+ }
+
+ UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE);
+
+ // Calculate U_hat, E_hat and B.
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiU, 0.0, U_hat);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiE, 0.0, E_hat);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiB, 0.0, B);
+
+ // Calculate Sigma_uu and Sigma_ee.
+ CalcSigma(func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, Qi, Sigma_uu,
+ Sigma_ee);
+
+ // Update V_g and V_e.
+ UpdateV(eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
+ }
+
+ gsl_matrix_free(XXt);
+ gsl_matrix_free(XXti);
+ gsl_vector_free(D_l);
+ gsl_matrix_free(UltVeh);
+ gsl_matrix_free(UltVehi);
+ gsl_matrix_free(UltVehiB);
+ gsl_matrix_free(Qi);
+ gsl_matrix_free(Sigma_uu);
+ gsl_matrix_free(Sigma_ee);
+ gsl_vector_free(xHiy);
+ gsl_permutation_free(pmt);
+
+ return logl_new;
}
// Calculate p-value, beta (d by 1 vector) and V(beta).
-double MphCalcP (const gsl_vector *eval, const gsl_vector *x_vec,
- const gsl_matrix *W, const gsl_matrix *Y,
- const gsl_matrix *V_g, const gsl_matrix *V_e,
- gsl_matrix *UltVehiY, gsl_vector *beta, gsl_matrix *Vbeta) {
- size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1;
- size_t dc_size=d_size*c_size;
- double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value;
-
- gsl_vector *D_l=gsl_vector_alloc (d_size);
- gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *WHix=gsl_matrix_alloc (dc_size, d_size);
- gsl_matrix *QiWHix=gsl_matrix_alloc(dc_size, d_size);
-
- gsl_matrix *xPx=gsl_matrix_alloc (d_size, d_size);
- gsl_vector *xPy=gsl_vector_alloc (d_size);
- gsl_vector *WHiy=gsl_vector_alloc (dc_size);
-
- gsl_matrix_set_zero (xPx);
- gsl_matrix_set_zero (WHix);
- gsl_vector_set_zero (xPy);
- gsl_vector_set_zero (WHiy);
-
- // Eigen decomposition and calculate log|Ve|.
- logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
- // Calculate Qi and log|Q|.
- logdet_Q=CalcQi (eval, D_l, W, Qi);
-
- // Calculate UltVehiY.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y,
- 0.0, UltVehiY);
-
- // Calculate WHix, WHiy, xHiy, xHix.
- for (size_t i=0; i<d_size; i++) {
- dl=gsl_vector_get(D_l, i);
-
- d1=0.0; d2=0.0;
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get(eval, k);
- dx=gsl_vector_get(x_vec, k);
- dy=gsl_matrix_get(UltVehiY, i, k);
-
- d1+=dx*dy/(delta*dl+1.0);
- d2+=dx*dx/(delta*dl+1.0);
- }
- gsl_vector_set (xPy, i, d1);
- gsl_matrix_set (xPx, i, i, d2);
-
- for (size_t j=0; j<c_size; j++) {
- d1=0.0; d2=0.0;
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get(eval, k);
- dx=gsl_vector_get(x_vec, k);
- dw=gsl_matrix_get(W, j, k);
- dy=gsl_matrix_get(UltVehiY, i, k);
-
- d1+=dx*dw/(delta*dl+1.0);
- d2+=dy*dw/(delta*dl+1.0);
- }
- gsl_matrix_set(WHix, j*d_size+i, i, d1);
- gsl_vector_set(WHiy, j*d_size+i, d2);
- }
- }
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix);
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx);
- gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy);
-
- // Calculate V(beta) and beta.
- int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (d_size);
- LUDecomp (xPx, pmt, &sig);
- LUSolve (xPx, pmt, xPy, D_l);
- LUInvert (xPx, pmt, Vbeta);
-
- // Need to multiply UltVehi on both sides or one side.
- gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh,
- 0.0, xPx);
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
-
- // Calculate test statistic and p value.
- gsl_blas_ddot(D_l, xPy, &d);
-
- p_value=gsl_cdf_chisq_Q (d, (double)d_size);
-
- gsl_vector_free(D_l);
- gsl_matrix_free(UltVeh);
- gsl_matrix_free(UltVehi);
- gsl_matrix_free(Qi);
- gsl_matrix_free(WHix);
- gsl_matrix_free(QiWHix);
-
- gsl_matrix_free(xPx);
- gsl_vector_free(xPy);
- gsl_vector_free(WHiy);
-
- gsl_permutation_free(pmt);
-
- return p_value;
+double MphCalcP(const gsl_vector *eval, const gsl_vector *x_vec,
+ const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g,
+ const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_vector *beta,
+ gsl_matrix *Vbeta) {
+ size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
+ size_t dc_size = d_size * c_size;
+ double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value;
+
+ gsl_vector *D_l = gsl_vector_alloc(d_size);
+ gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *WHix = gsl_matrix_alloc(dc_size, d_size);
+ gsl_matrix *QiWHix = gsl_matrix_alloc(dc_size, d_size);
+
+ gsl_matrix *xPx = gsl_matrix_alloc(d_size, d_size);
+ gsl_vector *xPy = gsl_vector_alloc(d_size);
+ gsl_vector *WHiy = gsl_vector_alloc(dc_size);
+
+ gsl_matrix_set_zero(xPx);
+ gsl_matrix_set_zero(WHix);
+ gsl_vector_set_zero(xPy);
+ gsl_vector_set_zero(WHiy);
+
+ // Eigen decomposition and calculate log|Ve|.
+ logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+ // Calculate Qi and log|Q|.
+ logdet_Q = CalcQi(eval, D_l, W, Qi);
+
+ // Calculate UltVehiY.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+ // Calculate WHix, WHiy, xHiy, xHix.
+ for (size_t i = 0; i < d_size; i++) {
+ dl = gsl_vector_get(D_l, i);
+
+ d1 = 0.0;
+ d2 = 0.0;
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ dx = gsl_vector_get(x_vec, k);
+ dy = gsl_matrix_get(UltVehiY, i, k);
+
+ d1 += dx * dy / (delta * dl + 1.0);
+ d2 += dx * dx / (delta * dl + 1.0);
+ }
+ gsl_vector_set(xPy, i, d1);
+ gsl_matrix_set(xPx, i, i, d2);
+
+ for (size_t j = 0; j < c_size; j++) {
+ d1 = 0.0;
+ d2 = 0.0;
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ dx = gsl_vector_get(x_vec, k);
+ dw = gsl_matrix_get(W, j, k);
+ dy = gsl_matrix_get(UltVehiY, i, k);
+
+ d1 += dx * dw / (delta * dl + 1.0);
+ d2 += dy * dw / (delta * dl + 1.0);
+ }
+ gsl_matrix_set(WHix, j * d_size + i, i, d1);
+ gsl_vector_set(WHiy, j * d_size + i, d2);
+ }
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx);
+ gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy);
+
+ // Calculate V(beta) and beta.
+ int sig;
+ gsl_permutation *pmt = gsl_permutation_alloc(d_size);
+ LUDecomp(xPx, pmt, &sig);
+ LUSolve(xPx, pmt, xPy, D_l);
+ LUInvert(xPx, pmt, Vbeta);
+
+ // Need to multiply UltVehi on both sides or one side.
+ gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, 0.0, xPx);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
+
+ // Calculate test statistic and p value.
+ gsl_blas_ddot(D_l, xPy, &d);
+
+ p_value = gsl_cdf_chisq_Q(d, (double)d_size);
+
+ gsl_vector_free(D_l);
+ gsl_matrix_free(UltVeh);
+ gsl_matrix_free(UltVehi);
+ gsl_matrix_free(Qi);
+ gsl_matrix_free(WHix);
+ gsl_matrix_free(QiWHix);
+
+ gsl_matrix_free(xPx);
+ gsl_vector_free(xPy);
+ gsl_vector_free(WHiy);
+
+ gsl_permutation_free(pmt);
+
+ return p_value;
}
// Calculate B and its standard error (which is a matrix of the same
// dimension as B).
-void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W,
- const gsl_matrix *Y, const gsl_matrix *V_g,
- const gsl_matrix *V_e, gsl_matrix *UltVehiY,
- gsl_matrix *B, gsl_matrix *se_B) {
- size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1;
- size_t dc_size=d_size*c_size;
- double delta, dl, d, dy, dw, logdet_Ve, logdet_Q;
-
- gsl_vector *D_l=gsl_vector_alloc (d_size);
- gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *Qi_temp=gsl_matrix_alloc (dc_size, dc_size);
- gsl_vector *WHiy=gsl_vector_alloc (dc_size);
- gsl_vector *QiWHiy=gsl_vector_alloc (dc_size);
- gsl_vector *beta=gsl_vector_alloc (dc_size);
- gsl_matrix *Vbeta=gsl_matrix_alloc (dc_size, dc_size);
-
- gsl_vector_set_zero (WHiy);
-
- // Eigen decomposition and calculate log|Ve|.
- logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
- // Calculate Qi and log|Q|.
- logdet_Q=CalcQi (eval, D_l, W, Qi);
-
- // Calculate UltVehiY.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y,
- 0.0, UltVehiY);
-
- // Calculate WHiy.
- for (size_t i=0; i<d_size; i++) {
- dl=gsl_vector_get(D_l, i);
-
- for (size_t j=0; j<c_size; j++) {
- d=0.0;
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get(eval, k);
- dw=gsl_matrix_get(W, j, k);
- dy=gsl_matrix_get(UltVehiY, i, k);
-
- d+=dy*dw/(delta*dl+1.0);
- }
- gsl_vector_set(WHiy, j*d_size+i, d);
- }
- }
-
- gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy);
-
- // Need to multiply I_c\otimes UltVehi on both sides or one side.
- for (size_t i=0; i<c_size; i++) {
- gsl_vector_view QiWHiy_sub=
- gsl_vector_subvector(QiWHiy, i*d_size, d_size);
- gsl_vector_view beta_sub=gsl_vector_subvector(beta,i*d_size,d_size);
- gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0,
- &beta_sub.vector);
-
- for (size_t j=0; j<c_size; j++) {
- gsl_matrix_view Qi_sub=
- gsl_matrix_submatrix (Qi, i*d_size, j*d_size, d_size, d_size);
- gsl_matrix_view Qitemp_sub=
- gsl_matrix_submatrix (Qi_temp,i*d_size,j*d_size,d_size,d_size);
- gsl_matrix_view Vbeta_sub=
- gsl_matrix_submatrix (Vbeta, i*d_size, j*d_size, d_size, d_size);
-
- if (j<i) {
- gsl_matrix_view Vbeta_sym=
- gsl_matrix_submatrix(Vbeta,j*d_size,i*d_size,d_size,d_size);
- gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix,&Vbeta_sym.matrix);
- } else {
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix,
- UltVeh, 0.0, &Qitemp_sub.matrix);
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh,
- &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix);
- }
- }
- }
-
- // Copy beta to B, and Vbeta to se_B.
- for (size_t j=0; j<B->size2; j++) {
- for (size_t i=0; i<B->size1; i++) {
- gsl_matrix_set(B, i, j, gsl_vector_get(beta, j*d_size+i));
- gsl_matrix_set(se_B, i, j,
- sqrt(gsl_matrix_get(Vbeta,j*d_size+i,j*d_size+i)));
- }
- }
-
- // Free matrices.
- gsl_vector_free(D_l);
- gsl_matrix_free(UltVeh);
- gsl_matrix_free(UltVehi);
- gsl_matrix_free(Qi);
- gsl_matrix_free(Qi_temp);
- gsl_vector_free(WHiy);
- gsl_vector_free(QiWHiy);
- gsl_vector_free(beta);
- gsl_matrix_free(Vbeta);
-
- return;
+void MphCalcBeta(const gsl_vector *eval, const gsl_matrix *W,
+ const gsl_matrix *Y, const gsl_matrix *V_g,
+ const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_matrix *B,
+ gsl_matrix *se_B) {
+ size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
+ size_t dc_size = d_size * c_size;
+ double delta, dl, d, dy, dw, logdet_Ve, logdet_Q;
+
+ gsl_vector *D_l = gsl_vector_alloc(d_size);
+ gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *Qi_temp = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_vector *WHiy = gsl_vector_alloc(dc_size);
+ gsl_vector *QiWHiy = gsl_vector_alloc(dc_size);
+ gsl_vector *beta = gsl_vector_alloc(dc_size);
+ gsl_matrix *Vbeta = gsl_matrix_alloc(dc_size, dc_size);
+
+ gsl_vector_set_zero(WHiy);
+
+ // Eigen decomposition and calculate log|Ve|.
+ logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+ // Calculate Qi and log|Q|.
+ logdet_Q = CalcQi(eval, D_l, W, Qi);
+
+ // Calculate UltVehiY.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+ // Calculate WHiy.
+ for (size_t i = 0; i < d_size; i++) {
+ dl = gsl_vector_get(D_l, i);
+
+ for (size_t j = 0; j < c_size; j++) {
+ d = 0.0;
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ dw = gsl_matrix_get(W, j, k);
+ dy = gsl_matrix_get(UltVehiY, i, k);
+
+ d += dy * dw / (delta * dl + 1.0);
+ }
+ gsl_vector_set(WHiy, j * d_size + i, d);
+ }
+ }
+
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy);
+
+ // Need to multiply I_c\otimes UltVehi on both sides or one side.
+ for (size_t i = 0; i < c_size; i++) {
+ gsl_vector_view QiWHiy_sub =
+ gsl_vector_subvector(QiWHiy, i * d_size, d_size);
+ gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size);
+ gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0,
+ &beta_sub.vector);
+
+ for (size_t j = 0; j < c_size; j++) {
+ gsl_matrix_view Qi_sub =
+ gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size);
+ gsl_matrix_view Qitemp_sub =
+ gsl_matrix_submatrix(Qi_temp, i * d_size, j * d_size, d_size, d_size);
+ gsl_matrix_view Vbeta_sub =
+ gsl_matrix_submatrix(Vbeta, i * d_size, j * d_size, d_size, d_size);
+
+ if (j < i) {
+ gsl_matrix_view Vbeta_sym =
+ gsl_matrix_submatrix(Vbeta, j * d_size, i * d_size, d_size, d_size);
+ gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix, &Vbeta_sym.matrix);
+ } else {
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh,
+ 0.0, &Qitemp_sub.matrix);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh,
+ &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix);
+ }
+ }
+ }
+
+ // Copy beta to B, and Vbeta to se_B.
+ for (size_t j = 0; j < B->size2; j++) {
+ for (size_t i = 0; i < B->size1; i++) {
+ gsl_matrix_set(B, i, j, gsl_vector_get(beta, j * d_size + i));
+ gsl_matrix_set(se_B, i, j, sqrt(gsl_matrix_get(Vbeta, j * d_size + i,
+ j * d_size + i)));
+ }
+ }
+
+ // Free matrices.
+ gsl_vector_free(D_l);
+ gsl_matrix_free(UltVeh);
+ gsl_matrix_free(UltVehi);
+ gsl_matrix_free(Qi);
+ gsl_matrix_free(Qi_temp);
+ gsl_vector_free(WHiy);
+ gsl_vector_free(QiWHiy);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ return;
}
// Below are functions for Newton-Raphson's algorithm.
@@ -912,996 +931,962 @@ void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W,
// Calculate all Hi and return logdet_H=\sum_{k=1}^{n}log|H_k|
// and calculate Qi and return logdet_Q
// and calculate yPy.
-void CalcHiQi (const gsl_vector *eval, const gsl_matrix *X,
- const gsl_matrix *V_g, const gsl_matrix *V_e,
- gsl_matrix *Hi_all, gsl_matrix *Qi, double &logdet_H,
- double &logdet_Q) {
- gsl_matrix_set_zero (Hi_all);
- gsl_matrix_set_zero (Qi);
- logdet_H=0.0; logdet_Q=0.0;
-
- size_t n_size=eval->size, c_size=X->size1, d_size=V_g->size1;
- double logdet_Ve=0.0, delta, dl, d;
-
- gsl_matrix *mat_dd=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
- gsl_vector *D_l=gsl_vector_alloc (d_size);
-
- // Calculate D_l, UltVeh and UltVehi.
- logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
- // Calculate each Hi and log|H_k|.
- logdet_H=(double)n_size*logdet_Ve;
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
-
- gsl_matrix_memcpy (mat_dd, UltVehi);
- for (size_t i=0; i<d_size; i++) {
- dl=gsl_vector_get(D_l, i);
- d=delta*dl+1.0;
-
- gsl_vector_view mat_row=gsl_matrix_row (mat_dd, i);
- gsl_vector_scale (&mat_row.vector, 1.0/d);
-
- logdet_H+=log(d);
- }
-
- gsl_matrix_view Hi_k=
- gsl_matrix_submatrix(Hi_all, 0, k*d_size, d_size, d_size);
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi,
- mat_dd, 0.0, &Hi_k.matrix);
- }
-
- // Calculate Qi, and multiply I\o times UtVeh on both side and
- // calculate logdet_Q, don't forget to substract
- // c_size*logdet_Ve.
- logdet_Q=CalcQi (eval, D_l, X, Qi)-(double)c_size*logdet_Ve;
-
- for (size_t i=0; i<c_size; i++) {
- for (size_t j=0; j<c_size; j++) {
- gsl_matrix_view Qi_sub=
- gsl_matrix_submatrix(Qi,i*d_size,j*d_size,d_size,d_size);
- if (j<i) {
- gsl_matrix_view Qi_sym=
- gsl_matrix_submatrix(Qi,j*d_size,i*d_size,d_size,d_size);
- gsl_matrix_transpose_memcpy(&Qi_sub.matrix,&Qi_sym.matrix);
- } else {
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &Qi_sub.matrix, UltVeh, 0.0, mat_dd);
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh,
- mat_dd, 0.0, &Qi_sub.matrix);
- }
- }
- }
-
- // Free memory.
- gsl_matrix_free(mat_dd);
- gsl_matrix_free(UltVeh);
- gsl_matrix_free(UltVehi);
- gsl_vector_free(D_l);
-
- return;
+void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X,
+ const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *Hi_all,
+ gsl_matrix *Qi, double &logdet_H, double &logdet_Q) {
+ gsl_matrix_set_zero(Hi_all);
+ gsl_matrix_set_zero(Qi);
+ logdet_H = 0.0;
+ logdet_Q = 0.0;
+
+ size_t n_size = eval->size, c_size = X->size1, d_size = V_g->size1;
+ double logdet_Ve = 0.0, delta, dl, d;
+
+ gsl_matrix *mat_dd = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+ gsl_vector *D_l = gsl_vector_alloc(d_size);
+
+ // Calculate D_l, UltVeh and UltVehi.
+ logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+ // Calculate each Hi and log|H_k|.
+ logdet_H = (double)n_size * logdet_Ve;
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+
+ gsl_matrix_memcpy(mat_dd, UltVehi);
+ for (size_t i = 0; i < d_size; i++) {
+ dl = gsl_vector_get(D_l, i);
+ d = delta * dl + 1.0;
+
+ gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i);
+ gsl_vector_scale(&mat_row.vector, 1.0 / d);
+
+ logdet_H += log(d);
+ }
+
+ gsl_matrix_view Hi_k =
+ gsl_matrix_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi, mat_dd, 0.0,
+ &Hi_k.matrix);
+ }
+
+ // Calculate Qi, and multiply I\o times UtVeh on both side and
+ // calculate logdet_Q, don't forget to substract
+ // c_size*logdet_Ve.
+ logdet_Q = CalcQi(eval, D_l, X, Qi) - (double)c_size * logdet_Ve;
+
+ for (size_t i = 0; i < c_size; i++) {
+ for (size_t j = 0; j < c_size; j++) {
+ gsl_matrix_view Qi_sub =
+ gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size);
+ if (j < i) {
+ gsl_matrix_view Qi_sym =
+ gsl_matrix_submatrix(Qi, j * d_size, i * d_size, d_size, d_size);
+ gsl_matrix_transpose_memcpy(&Qi_sub.matrix, &Qi_sym.matrix);
+ } else {
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh,
+ 0.0, mat_dd);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, mat_dd, 0.0,
+ &Qi_sub.matrix);
+ }
+ }
+ }
+
+ // Free memory.
+ gsl_matrix_free(mat_dd);
+ gsl_matrix_free(UltVeh);
+ gsl_matrix_free(UltVehi);
+ gsl_vector_free(D_l);
+
+ return;
}
// Calculate all Hiy.
-void Calc_Hiy_all (const gsl_matrix *Y, const gsl_matrix *Hi_all,
- gsl_matrix *Hiy_all) {
- gsl_matrix_set_zero (Hiy_all);
+void Calc_Hiy_all(const gsl_matrix *Y, const gsl_matrix *Hi_all,
+ gsl_matrix *Hiy_all) {
+ gsl_matrix_set_zero(Hiy_all);
- size_t n_size=Y->size2, d_size=Y->size1;
+ size_t n_size = Y->size2, d_size = Y->size1;
- for (size_t k=0; k<n_size; k++) {
- gsl_matrix_const_view Hi_k=
- gsl_matrix_const_submatrix(Hi_all,0,k*d_size,d_size,d_size);
- gsl_vector_const_view y_k=gsl_matrix_const_column(Y,k);
- gsl_vector_view Hiy_k=gsl_matrix_column(Hiy_all, k);
+ for (size_t k = 0; k < n_size; k++) {
+ gsl_matrix_const_view Hi_k =
+ gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
+ gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
+ gsl_vector_view Hiy_k = gsl_matrix_column(Hiy_all, k);
- gsl_blas_dgemv(CblasNoTrans,1.0,&Hi_k.matrix,&y_k.vector,
- 0.0,&Hiy_k.vector);
- }
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &Hi_k.matrix, &y_k.vector, 0.0,
+ &Hiy_k.vector);
+ }
- return;
+ return;
}
// Calculate all xHi.
-void Calc_xHi_all (const gsl_matrix *X, const gsl_matrix *Hi_all,
- gsl_matrix *xHi_all) {
- gsl_matrix_set_zero (xHi_all);
+void Calc_xHi_all(const gsl_matrix *X, const gsl_matrix *Hi_all,
+ gsl_matrix *xHi_all) {
+ gsl_matrix_set_zero(xHi_all);
- size_t n_size=X->size2, c_size=X->size1, d_size=Hi_all->size1;
+ size_t n_size = X->size2, c_size = X->size1, d_size = Hi_all->size1;
- double d;
+ double d;
- for (size_t k=0; k<n_size; k++) {
- gsl_matrix_const_view Hi_k=
- gsl_matrix_const_submatrix(Hi_all,0,k*d_size,d_size,d_size);
+ for (size_t k = 0; k < n_size; k++) {
+ gsl_matrix_const_view Hi_k =
+ gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size);
- for (size_t i=0; i<c_size; i++) {
- d=gsl_matrix_get (X, i, k);
- gsl_matrix_view xHi_sub=
- gsl_matrix_submatrix(xHi_all,i*d_size,k*d_size,
- d_size,d_size);
- gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix);
- gsl_matrix_scale(&xHi_sub.matrix, d);
- }
- }
+ for (size_t i = 0; i < c_size; i++) {
+ d = gsl_matrix_get(X, i, k);
+ gsl_matrix_view xHi_sub =
+ gsl_matrix_submatrix(xHi_all, i * d_size, k * d_size, d_size, d_size);
+ gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix);
+ gsl_matrix_scale(&xHi_sub.matrix, d);
+ }
+ }
- return;
+ return;
}
// Calculate scalar yHiy.
-double Calc_yHiy (const gsl_matrix *Y, const gsl_matrix *Hiy_all) {
- double yHiy=0.0, d;
- size_t n_size=Y->size2;
+double Calc_yHiy(const gsl_matrix *Y, const gsl_matrix *Hiy_all) {
+ double yHiy = 0.0, d;
+ size_t n_size = Y->size2;
- for (size_t k=0; k<n_size; k++) {
- gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
- gsl_vector_const_view Hiy_k=gsl_matrix_const_column(Hiy_all, k);
+ for (size_t k = 0; k < n_size; k++) {
+ gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
+ gsl_vector_const_view Hiy_k = gsl_matrix_const_column(Hiy_all, k);
- gsl_blas_ddot (&Hiy_k.vector, &y_k.vector, &d);
- yHiy+=d;
- }
+ gsl_blas_ddot(&Hiy_k.vector, &y_k.vector, &d);
+ yHiy += d;
+ }
- return yHiy;
+ return yHiy;
}
// Calculate the vector xHiy.
-void Calc_xHiy (const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) {
- gsl_vector_set_zero (xHiy);
+void Calc_xHiy(const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) {
+ gsl_vector_set_zero(xHiy);
- size_t n_size=Y->size2, d_size=Y->size1, dc_size=xHi->size1;
+ size_t n_size = Y->size2, d_size = Y->size1, dc_size = xHi->size1;
- for (size_t k=0; k<n_size; k++) {
- gsl_matrix_const_view xHi_k=
- gsl_matrix_const_submatrix(xHi,0,k*d_size,dc_size,d_size);
- gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
+ for (size_t k = 0; k < n_size; k++) {
+ gsl_matrix_const_view xHi_k =
+ gsl_matrix_const_submatrix(xHi, 0, k * d_size, dc_size, d_size);
+ gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k);
- gsl_blas_dgemv(CblasNoTrans,1.0,&xHi_k.matrix,&y_k.vector,
- 1.0,xHiy);
- }
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &xHi_k.matrix, &y_k.vector, 1.0, xHiy);
+ }
- return;
+ return;
}
// 0<=i,j<d_size
-size_t GetIndex (const size_t i, const size_t j, const size_t d_size) {
- if (i>=d_size || j>=d_size) {
- cout<<"error in GetIndex."<<endl;
- return 0;
- }
-
- size_t s, l;
- if (j<i) {
- s=j;
- l=i;
- } else {
- s=i;
- l=j;
- }
-
- return (2*d_size-s+1)*s/2+l-s;
+size_t GetIndex(const size_t i, const size_t j, const size_t d_size) {
+ if (i >= d_size || j >= d_size) {
+ cout << "error in GetIndex." << endl;
+ return 0;
+ }
+
+ size_t s, l;
+ if (j < i) {
+ s = j;
+ l = i;
+ } else {
+ s = i;
+ l = j;
+ }
+
+ return (2 * d_size - s + 1) * s / 2 + l - s;
}
-void Calc_yHiDHiy (const gsl_vector *eval, const gsl_matrix *Hiy,
- const size_t i, const size_t j, double &yHiDHiy_g,
- double &yHiDHiy_e) {
- yHiDHiy_g=0.0;
- yHiDHiy_e=0.0;
+void Calc_yHiDHiy(const gsl_vector *eval, const gsl_matrix *Hiy, const size_t i,
+ const size_t j, double &yHiDHiy_g, double &yHiDHiy_e) {
+ yHiDHiy_g = 0.0;
+ yHiDHiy_e = 0.0;
- size_t n_size=eval->size;
+ size_t n_size = eval->size;
- double delta, d1, d2;
+ double delta, d1, d2;
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
- d1=gsl_matrix_get (Hiy, i, k);
- d2=gsl_matrix_get (Hiy, j, k);
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ d1 = gsl_matrix_get(Hiy, i, k);
+ d2 = gsl_matrix_get(Hiy, j, k);
- if (i==j) {
- yHiDHiy_g+=delta*d1*d2;
- yHiDHiy_e+=d1*d2;
- } else {
- yHiDHiy_g+=delta*d1*d2*2.0;
- yHiDHiy_e+=d1*d2*2.0;
- }
- }
+ if (i == j) {
+ yHiDHiy_g += delta * d1 * d2;
+ yHiDHiy_e += d1 * d2;
+ } else {
+ yHiDHiy_g += delta * d1 * d2 * 2.0;
+ yHiDHiy_e += d1 * d2 * 2.0;
+ }
+ }
- return;
+ return;
}
-void Calc_xHiDHiy (const gsl_vector *eval, const gsl_matrix *xHi,
- const gsl_matrix *Hiy, const size_t i, const size_t j,
- gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) {
- gsl_vector_set_zero(xHiDHiy_g);
- gsl_vector_set_zero(xHiDHiy_e);
+void Calc_xHiDHiy(const gsl_vector *eval, const gsl_matrix *xHi,
+ const gsl_matrix *Hiy, const size_t i, const size_t j,
+ gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) {
+ gsl_vector_set_zero(xHiDHiy_g);
+ gsl_vector_set_zero(xHiDHiy_e);
+
+ size_t n_size = eval->size, d_size = Hiy->size1;
- size_t n_size=eval->size, d_size=Hiy->size1;
+ double delta, d;
- double delta, d;
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
+ gsl_vector_const_view xHi_col_i =
+ gsl_matrix_const_column(xHi, k * d_size + i);
+ d = gsl_matrix_get(Hiy, j, k);
- gsl_vector_const_view xHi_col_i=
- gsl_matrix_const_column (xHi, k*d_size+i);
- d=gsl_matrix_get (Hiy, j, k);
+ gsl_blas_daxpy(d * delta, &xHi_col_i.vector, xHiDHiy_g);
+ gsl_blas_daxpy(d, &xHi_col_i.vector, xHiDHiy_e);
- gsl_blas_daxpy (d*delta, &xHi_col_i.vector, xHiDHiy_g);
- gsl_blas_daxpy (d, &xHi_col_i.vector, xHiDHiy_e);
+ if (i != j) {
+ gsl_vector_const_view xHi_col_j =
+ gsl_matrix_const_column(xHi, k * d_size + j);
+ d = gsl_matrix_get(Hiy, i, k);
- if (i!=j) {
- gsl_vector_const_view xHi_col_j=
- gsl_matrix_const_column (xHi, k*d_size+j);
- d=gsl_matrix_get (Hiy, i, k);
-
- gsl_blas_daxpy (d*delta, &xHi_col_j.vector, xHiDHiy_g);
- gsl_blas_daxpy (d, &xHi_col_j.vector, xHiDHiy_e);
- }
- }
+ gsl_blas_daxpy(d * delta, &xHi_col_j.vector, xHiDHiy_g);
+ gsl_blas_daxpy(d, &xHi_col_j.vector, xHiDHiy_e);
+ }
+ }
- return;
+ return;
}
-void Calc_xHiDHix (const gsl_vector *eval, const gsl_matrix *xHi,
- const size_t i, const size_t j, gsl_matrix *xHiDHix_g,
- gsl_matrix *xHiDHix_e) {
- gsl_matrix_set_zero(xHiDHix_g);
- gsl_matrix_set_zero(xHiDHix_e);
+void Calc_xHiDHix(const gsl_vector *eval, const gsl_matrix *xHi, const size_t i,
+ const size_t j, gsl_matrix *xHiDHix_g,
+ gsl_matrix *xHiDHix_e) {
+ gsl_matrix_set_zero(xHiDHix_g);
+ gsl_matrix_set_zero(xHiDHix_e);
- size_t n_size=eval->size, dc_size=xHi->size1;
- size_t d_size=xHi->size2/n_size;
+ size_t n_size = eval->size, dc_size = xHi->size1;
+ size_t d_size = xHi->size2 / n_size;
- double delta;
+ double delta;
- gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *mat_dcdc_t=gsl_matrix_alloc (dc_size, dc_size);
+ gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *mat_dcdc_t = gsl_matrix_alloc(dc_size, dc_size);
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
- gsl_vector_const_view xHi_col_i=
- gsl_matrix_const_column (xHi, k*d_size+i);
- gsl_vector_const_view xHi_col_j=
- gsl_matrix_const_column (xHi, k*d_size+j);
+ gsl_vector_const_view xHi_col_i =
+ gsl_matrix_const_column(xHi, k * d_size + i);
+ gsl_vector_const_view xHi_col_j =
+ gsl_matrix_const_column(xHi, k * d_size + j);
- gsl_matrix_set_zero (mat_dcdc);
- gsl_blas_dger(1.0,&xHi_col_i.vector,&xHi_col_j.vector,
- mat_dcdc);
+ gsl_matrix_set_zero(mat_dcdc);
+ gsl_blas_dger(1.0, &xHi_col_i.vector, &xHi_col_j.vector, mat_dcdc);
- gsl_matrix_transpose_memcpy (mat_dcdc_t, mat_dcdc);
+ gsl_matrix_transpose_memcpy(mat_dcdc_t, mat_dcdc);
- gsl_matrix_add (xHiDHix_e, mat_dcdc);
+ gsl_matrix_add(xHiDHix_e, mat_dcdc);
- gsl_matrix_scale (mat_dcdc, delta);
- gsl_matrix_add (xHiDHix_g, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHix_g, mat_dcdc);
- if (i!=j) {
- gsl_matrix_add (xHiDHix_e, mat_dcdc_t);
+ if (i != j) {
+ gsl_matrix_add(xHiDHix_e, mat_dcdc_t);
- gsl_matrix_scale (mat_dcdc_t, delta);
- gsl_matrix_add (xHiDHix_g, mat_dcdc_t);
- }
- }
+ gsl_matrix_scale(mat_dcdc_t, delta);
+ gsl_matrix_add(xHiDHix_g, mat_dcdc_t);
+ }
+ }
- gsl_matrix_free(mat_dcdc);
- gsl_matrix_free(mat_dcdc_t);
+ gsl_matrix_free(mat_dcdc);
+ gsl_matrix_free(mat_dcdc_t);
- return;
+ return;
}
-void Calc_yHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi,
- const gsl_matrix *Hiy, const size_t i1,
- const size_t j1, const size_t i2, const size_t j2,
- double &yHiDHiDHiy_gg, double &yHiDHiDHiy_ee,
- double &yHiDHiDHiy_ge) {
- yHiDHiDHiy_gg=0.0;
- yHiDHiDHiy_ee=0.0;
- yHiDHiDHiy_ge=0.0;
-
- size_t n_size=eval->size, d_size=Hiy->size1;
-
- double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2;
- double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
-
- d_Hiy_i1=gsl_matrix_get (Hiy, i1, k);
- d_Hiy_j1=gsl_matrix_get (Hiy, j1, k);
- d_Hiy_i2=gsl_matrix_get (Hiy, i2, k);
- d_Hiy_j2=gsl_matrix_get (Hiy, j2, k);
-
- d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
- d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
- d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
- d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
- if (i1==j1) {
- yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
- yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
- yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
-
- if (i2!=j2) {
- yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
- yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
- yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
- }
- } else {
- yHiDHiDHiy_gg+=
- delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2 +
- d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
- yHiDHiDHiy_ee+=
- (d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2+d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
- yHiDHiDHiy_ge+=
- delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2 +
- d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
-
- if (i2!=j2) {
- yHiDHiDHiy_gg+=
- delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
- d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
- yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
- d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
- yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 +
- d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
- }
- }
- }
-
- return;
+void Calc_yHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi,
+ const gsl_matrix *Hiy, const size_t i1, const size_t j1,
+ const size_t i2, const size_t j2, double &yHiDHiDHiy_gg,
+ double &yHiDHiDHiy_ee, double &yHiDHiDHiy_ge) {
+ yHiDHiDHiy_gg = 0.0;
+ yHiDHiDHiy_ee = 0.0;
+ yHiDHiDHiy_ge = 0.0;
+
+ size_t n_size = eval->size, d_size = Hiy->size1;
+
+ double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2;
+ double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+
+ d_Hiy_i1 = gsl_matrix_get(Hiy, i1, k);
+ d_Hiy_j1 = gsl_matrix_get(Hiy, j1, k);
+ d_Hiy_i2 = gsl_matrix_get(Hiy, i2, k);
+ d_Hiy_j2 = gsl_matrix_get(Hiy, j2, k);
+
+ d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+ d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+ d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+ d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+ if (i1 == j1) {
+ yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+ yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+ yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2);
+
+ if (i2 != j2) {
+ yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+ yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+ yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2);
+ }
+ } else {
+ yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 +
+ d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+ yHiDHiDHiy_ee +=
+ (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 + d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+ yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 +
+ d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2);
+
+ if (i2 != j2) {
+ yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 +
+ d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+ yHiDHiDHiy_ee +=
+ (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 + d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+ yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 +
+ d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2);
+ }
+ }
+ }
+
+ return;
}
-void Calc_xHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi,
- const gsl_matrix *xHi, const gsl_matrix *Hiy,
- const size_t i1, const size_t j1, const size_t i2,
- const size_t j2, gsl_vector *xHiDHiDHiy_gg,
- gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) {
- gsl_vector_set_zero(xHiDHiDHiy_gg);
- gsl_vector_set_zero(xHiDHiDHiy_ee);
- gsl_vector_set_zero(xHiDHiDHiy_ge);
-
- size_t n_size=eval->size, d_size=Hiy->size1;
-
- double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2;
- double d_Hi_j1i2, d_Hi_j1j2;
-
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
-
- gsl_vector_const_view xHi_col_i=
- gsl_matrix_const_column (xHi, k*d_size+i1);
- gsl_vector_const_view xHi_col_j=
- gsl_matrix_const_column (xHi, k*d_size+j1);
-
- d_Hiy_i=gsl_matrix_get (Hiy, i2, k);
- d_Hiy_j=gsl_matrix_get (Hiy, j2, k);
-
- d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
- d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
- d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
- d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
- if (i1==j1) {
- gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
- xHiDHiDHiy_gg);
- gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
- xHiDHiDHiy_ee);
- gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
- xHiDHiDHiy_ge);
-
- if (i2!=j2) {
- gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i,
- &xHi_col_i.vector, xHiDHiDHiy_gg);
- gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
- xHiDHiDHiy_ee);
- gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
- xHiDHiDHiy_ge);
- }
- } else {
- gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
- xHiDHiDHiy_gg);
- gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
- xHiDHiDHiy_ee);
- gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector,
- xHiDHiDHiy_ge);
-
- gsl_blas_daxpy (delta*delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
- xHiDHiDHiy_gg);
- gsl_blas_daxpy (d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
- xHiDHiDHiy_ee);
- gsl_blas_daxpy (delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector,
- xHiDHiDHiy_ge);
-
- if (i2!=j2) {
- gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i,
- &xHi_col_i.vector, xHiDHiDHiy_gg);
- gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
- xHiDHiDHiy_ee);
- gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector,
- xHiDHiDHiy_ge);
-
- gsl_blas_daxpy (delta*delta*d_Hi_i1j2*d_Hiy_i,
- &xHi_col_j.vector, xHiDHiDHiy_gg);
- gsl_blas_daxpy (d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector,
- xHiDHiDHiy_ee);
- gsl_blas_daxpy (delta*d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector,
- xHiDHiDHiy_ge);
- }
- }
- }
-
- return;
+void Calc_xHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi,
+ const gsl_matrix *xHi, const gsl_matrix *Hiy,
+ const size_t i1, const size_t j1, const size_t i2,
+ const size_t j2, gsl_vector *xHiDHiDHiy_gg,
+ gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) {
+ gsl_vector_set_zero(xHiDHiDHiy_gg);
+ gsl_vector_set_zero(xHiDHiDHiy_ee);
+ gsl_vector_set_zero(xHiDHiDHiy_ge);
+
+ size_t n_size = eval->size, d_size = Hiy->size1;
+
+ double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2;
+ double d_Hi_j1i2, d_Hi_j1j2;
+
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+
+ gsl_vector_const_view xHi_col_i =
+ gsl_matrix_const_column(xHi, k * d_size + i1);
+ gsl_vector_const_view xHi_col_j =
+ gsl_matrix_const_column(xHi, k * d_size + j1);
+
+ d_Hiy_i = gsl_matrix_get(Hiy, i2, k);
+ d_Hiy_j = gsl_matrix_get(Hiy, j2, k);
+
+ d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+ d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+ d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+ d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+ if (i1 == j1) {
+ gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+ xHiDHiDHiy_gg);
+ gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
+ gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+ xHiDHiDHiy_ge);
+
+ if (i2 != j2) {
+ gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+ xHiDHiDHiy_gg);
+ gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
+ gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+ xHiDHiDHiy_ge);
+ }
+ } else {
+ gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+ xHiDHiDHiy_gg);
+ gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
+ gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector,
+ xHiDHiDHiy_ge);
+
+ gsl_blas_daxpy(delta * delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector,
+ xHiDHiDHiy_gg);
+ gsl_blas_daxpy(d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector, xHiDHiDHiy_ee);
+ gsl_blas_daxpy(delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector,
+ xHiDHiDHiy_ge);
+
+ if (i2 != j2) {
+ gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+ xHiDHiDHiy_gg);
+ gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
+ gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector,
+ xHiDHiDHiy_ge);
+
+ gsl_blas_daxpy(delta * delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector,
+ xHiDHiDHiy_gg);
+ gsl_blas_daxpy(d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector, xHiDHiDHiy_ee);
+ gsl_blas_daxpy(delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector,
+ xHiDHiDHiy_ge);
+ }
+ }
+ }
+
+ return;
}
+void Calc_xHiDHiDHix(const gsl_vector *eval, const gsl_matrix *Hi,
+ const gsl_matrix *xHi, const size_t i1, const size_t j1,
+ const size_t i2, const size_t j2,
+ gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee,
+ gsl_matrix *xHiDHiDHix_ge) {
+ gsl_matrix_set_zero(xHiDHiDHix_gg);
+ gsl_matrix_set_zero(xHiDHiDHix_ee);
+ gsl_matrix_set_zero(xHiDHiDHix_ge);
+
+ size_t n_size = eval->size, d_size = Hi->size1, dc_size = xHi->size1;
+
+ double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+ gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+
+ gsl_vector_const_view xHi_col_i1 =
+ gsl_matrix_const_column(xHi, k * d_size + i1);
+ gsl_vector_const_view xHi_col_j1 =
+ gsl_matrix_const_column(xHi, k * d_size + j1);
+ gsl_vector_const_view xHi_col_i2 =
+ gsl_matrix_const_column(xHi, k * d_size + i2);
+ gsl_vector_const_view xHi_col_j2 =
+ gsl_matrix_const_column(xHi, k * d_size + j2);
+
+ d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+ d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+ d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+ d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+ if (i1 == j1) {
+ gsl_matrix_set_zero(mat_dcdc);
+ gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector,
+ mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+ if (i2 != j2) {
+ gsl_matrix_set_zero(mat_dcdc);
+ gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector,
+ mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+ }
+ } else {
+ gsl_matrix_set_zero(mat_dcdc);
+ gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector,
+ mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+ gsl_matrix_set_zero(mat_dcdc);
+ gsl_blas_dger(d_Hi_i1i2, &xHi_col_j1.vector, &xHi_col_j2.vector,
+ mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+ if (i2 != j2) {
+ gsl_matrix_set_zero(mat_dcdc);
+ gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector,
+ mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+
+ gsl_matrix_set_zero(mat_dcdc);
+ gsl_blas_dger(d_Hi_i1j2, &xHi_col_j1.vector, &xHi_col_i2.vector,
+ mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_scale(mat_dcdc, delta);
+ gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
+ }
+ }
+ }
+
+ gsl_matrix_free(mat_dcdc);
-void Calc_xHiDHiDHix (const gsl_vector *eval, const gsl_matrix *Hi,
- const gsl_matrix *xHi, const size_t i1, const size_t j1,
- const size_t i2, const size_t j2,
- gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee,
- gsl_matrix *xHiDHiDHix_ge) {
- gsl_matrix_set_zero(xHiDHiDHix_gg);
- gsl_matrix_set_zero(xHiDHiDHix_ee);
- gsl_matrix_set_zero(xHiDHiDHix_ge);
-
- size_t n_size=eval->size, d_size=Hi->size1, dc_size=xHi->size1;
-
- double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
- gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
-
- gsl_vector_const_view xHi_col_i1=
- gsl_matrix_const_column (xHi, k*d_size+i1);
- gsl_vector_const_view xHi_col_j1=
- gsl_matrix_const_column (xHi, k*d_size+j1);
- gsl_vector_const_view xHi_col_i2=
- gsl_matrix_const_column (xHi, k*d_size+i2);
- gsl_vector_const_view xHi_col_j2=
- gsl_matrix_const_column (xHi, k*d_size+j2);
-
- d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
- d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
- d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
- d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
- if (i1==j1) {
- gsl_matrix_set_zero (mat_dcdc);
- gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector,
- &xHi_col_j2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
- if (i2!=j2) {
- gsl_matrix_set_zero (mat_dcdc);
- gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector,
- &xHi_col_i2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
- }
- } else {
- gsl_matrix_set_zero (mat_dcdc);
- gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector,
- &xHi_col_j2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
- gsl_matrix_set_zero (mat_dcdc);
- gsl_blas_dger (d_Hi_i1i2, &xHi_col_j1.vector,
- &xHi_col_j2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
- if (i2!=j2) {
- gsl_matrix_set_zero (mat_dcdc);
- gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector,
- &xHi_col_i2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
- gsl_matrix_set_zero (mat_dcdc);
- gsl_blas_dger (d_Hi_i1j2, &xHi_col_j1.vector,
- &xHi_col_i2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
- gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
- }
- }
- }
-
- gsl_matrix_free(mat_dcdc);
-
- return;
+ return;
}
-void Calc_traceHiD (const gsl_vector *eval, const gsl_matrix *Hi,
- const size_t i, const size_t j, double &tHiD_g,
- double &tHiD_e) {
- tHiD_g=0.0;
- tHiD_e=0.0;
-
- size_t n_size=eval->size, d_size=Hi->size1;
- double delta, d;
-
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
- d=gsl_matrix_get (Hi, j, k*d_size+i);
-
- if (i==j) {
- tHiD_g+=delta*d;
- tHiD_e+=d;
- } else {
- tHiD_g+=delta*d*2.0;
- tHiD_e+=d*2.0;
- }
- }
-
- return;
+void Calc_traceHiD(const gsl_vector *eval, const gsl_matrix *Hi, const size_t i,
+ const size_t j, double &tHiD_g, double &tHiD_e) {
+ tHiD_g = 0.0;
+ tHiD_e = 0.0;
+
+ size_t n_size = eval->size, d_size = Hi->size1;
+ double delta, d;
+
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ d = gsl_matrix_get(Hi, j, k * d_size + i);
+
+ if (i == j) {
+ tHiD_g += delta * d;
+ tHiD_e += d;
+ } else {
+ tHiD_g += delta * d * 2.0;
+ tHiD_e += d * 2.0;
+ }
+ }
+
+ return;
}
-void Calc_traceHiDHiD (const gsl_vector *eval, const gsl_matrix *Hi,
- const size_t i1, const size_t j1, const size_t i2,
- const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee,
- double &tHiDHiD_ge) {
- tHiDHiD_gg=0.0;
- tHiDHiD_ee=0.0;
- tHiDHiD_ge=0.0;
-
- size_t n_size=eval->size, d_size=Hi->size1;
- double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get (eval, k);
-
- d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
- d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
- d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
- d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
- if (i1==j1) {
- tHiDHiD_gg+=delta*delta*d_Hi_i1j2*d_Hi_j1i2;
- tHiDHiD_ee+=d_Hi_i1j2*d_Hi_j1i2;
- tHiDHiD_ge+=delta*d_Hi_i1j2*d_Hi_j1i2;
-
- if (i2!=j2) {
- tHiDHiD_gg+=delta*delta*d_Hi_i1i2*d_Hi_j1j2;
- tHiDHiD_ee+=d_Hi_i1i2*d_Hi_j1j2;
- tHiDHiD_ge+=delta*d_Hi_i1i2*d_Hi_j1j2;
- }
- } else {
- tHiDHiD_gg+=delta*delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*
- d_Hi_i1i2);
- tHiDHiD_ee+=(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
- tHiDHiD_ge+=delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
-
- if (i2!=j2) {
- tHiDHiD_gg+=delta*delta*(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*
- d_Hi_i1j2);
- tHiDHiD_ee+=(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*d_Hi_i1j2);
- tHiDHiD_ge+=delta*(d_Hi_i1i2*d_Hi_j1j2 +
- d_Hi_j1i2*d_Hi_i1j2);
- }
- }
- }
-
- return;
+void Calc_traceHiDHiD(const gsl_vector *eval, const gsl_matrix *Hi,
+ const size_t i1, const size_t j1, const size_t i2,
+ const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee,
+ double &tHiDHiD_ge) {
+ tHiDHiD_gg = 0.0;
+ tHiDHiD_ee = 0.0;
+ tHiDHiD_ge = 0.0;
+
+ size_t n_size = eval->size, d_size = Hi->size1;
+ double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
+
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+
+ d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2);
+ d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2);
+ d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2);
+ d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2);
+
+ if (i1 == j1) {
+ tHiDHiD_gg += delta * delta * d_Hi_i1j2 * d_Hi_j1i2;
+ tHiDHiD_ee += d_Hi_i1j2 * d_Hi_j1i2;
+ tHiDHiD_ge += delta * d_Hi_i1j2 * d_Hi_j1i2;
+
+ if (i2 != j2) {
+ tHiDHiD_gg += delta * delta * d_Hi_i1i2 * d_Hi_j1j2;
+ tHiDHiD_ee += d_Hi_i1i2 * d_Hi_j1j2;
+ tHiDHiD_ge += delta * d_Hi_i1i2 * d_Hi_j1j2;
+ }
+ } else {
+ tHiDHiD_gg +=
+ delta * delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+ tHiDHiD_ee += (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+ tHiDHiD_ge += delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2);
+
+ if (i2 != j2) {
+ tHiDHiD_gg +=
+ delta * delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+ tHiDHiD_ee += (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+ tHiDHiD_ge += delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2);
+ }
+ }
+ }
+
+ return;
}
// trace(PD) = trace((Hi-HixQixHi)D)=trace(HiD) - trace(HixQixHiD)
-void Calc_tracePD (const gsl_vector *eval, const gsl_matrix *Qi,
- const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g,
- const gsl_matrix *xHiDHix_all_e, const size_t i,
- const size_t j, double &tPD_g, double &tPD_e) {
- size_t dc_size=Qi->size1, d_size=Hi->size1;
- size_t v=GetIndex(i, j, d_size);
-
- double d;
-
- // Calculate the first part: trace(HiD).
- Calc_traceHiD (eval, Hi, i, j, tPD_g, tPD_e);
-
- // Calculate the second part: -trace(HixQixHiD).
- for (size_t k=0; k<dc_size; k++) {
- gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, k);
- gsl_vector_const_view xHiDHix_g_col =
- gsl_matrix_const_column (xHiDHix_all_g, v*dc_size+k);
- gsl_vector_const_view xHiDHix_e_col =
- gsl_matrix_const_column (xHiDHix_all_e, v*dc_size+k);
-
- gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d);
- tPD_g-=d;
- gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d);
- tPD_e-=d;
- }
-
- return;
+void Calc_tracePD(const gsl_vector *eval, const gsl_matrix *Qi,
+ const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g,
+ const gsl_matrix *xHiDHix_all_e, const size_t i,
+ const size_t j, double &tPD_g, double &tPD_e) {
+ size_t dc_size = Qi->size1, d_size = Hi->size1;
+ size_t v = GetIndex(i, j, d_size);
+
+ double d;
+
+ // Calculate the first part: trace(HiD).
+ Calc_traceHiD(eval, Hi, i, j, tPD_g, tPD_e);
+
+ // Calculate the second part: -trace(HixQixHiD).
+ for (size_t k = 0; k < dc_size; k++) {
+ gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, k);
+ gsl_vector_const_view xHiDHix_g_col =
+ gsl_matrix_const_column(xHiDHix_all_g, v * dc_size + k);
+ gsl_vector_const_view xHiDHix_e_col =
+ gsl_matrix_const_column(xHiDHix_all_e, v * dc_size + k);
+
+ gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d);
+ tPD_g -= d;
+ gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d);
+ tPD_e -= d;
+ }
+
+ return;
}
// trace(PDPD) = trace((Hi-HixQixHi)D(Hi-HixQixHi)D)
// = trace(HiDHiD) - trace(HixQixHiDHiD)
// - trace(HiDHixQixHiD) + trace(HixQixHiDHixQixHiD)
-void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi,
- const gsl_matrix *Hi, const gsl_matrix *xHi,
- const gsl_matrix *QixHiDHix_all_g,
- const gsl_matrix *QixHiDHix_all_e,
- const gsl_matrix *xHiDHiDHix_all_gg,
- const gsl_matrix *xHiDHiDHix_all_ee,
- const gsl_matrix *xHiDHiDHix_all_ge,
- const size_t i1, const size_t j1,
- const size_t i2, const size_t j2,
- double &tPDPD_gg, double &tPDPD_ee,
- double &tPDPD_ge) {
- size_t dc_size=Qi->size1, d_size=Hi->size1;
- size_t v_size=d_size*(d_size+1)/2;
- size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size);
-
- double d;
-
- // Calculate the first part: trace(HiDHiD).
- Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee,
- tPDPD_ge);
-
- // Calculate the second and third parts:
- // -trace(HixQixHiDHiD) - trace(HiDHixQixHiD)
- for (size_t i=0; i<dc_size; i++) {
- gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, i);
- gsl_vector_const_view xHiDHiDHix_gg_col=
- gsl_matrix_const_column(xHiDHiDHix_all_gg,
- (v1*v_size+v2)*dc_size+i);
- gsl_vector_const_view xHiDHiDHix_ee_col =
- gsl_matrix_const_column(xHiDHiDHix_all_ee,
- (v1*v_size+v2)*dc_size+i);
- gsl_vector_const_view xHiDHiDHix_ge_col =
- gsl_matrix_const_column(xHiDHiDHix_all_ge,
- (v1*v_size+v2)*dc_size+i);
-
- gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d);
- tPDPD_gg-=d*2.0;
- gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d);
- tPDPD_ee-=d*2.0;
- gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d);
- tPDPD_ge-=d*2.0;
- }
-
- // Calculate the fourth part: trace(HixQixHiDHixQixHiD).
- for (size_t i=0; i<dc_size; i++) {
-
- gsl_vector_const_view QixHiDHix_g_fullrow1 =
- gsl_matrix_const_row (QixHiDHix_all_g, i);
- gsl_vector_const_view QixHiDHix_e_fullrow1 =
- gsl_matrix_const_row (QixHiDHix_all_e, i);
- gsl_vector_const_view QixHiDHix_g_row1 =
- gsl_vector_const_subvector (&QixHiDHix_g_fullrow1.vector,
- v1*dc_size, dc_size);
- gsl_vector_const_view QixHiDHix_e_row1 =
- gsl_vector_const_subvector (&QixHiDHix_e_fullrow1.vector,
- v1*dc_size, dc_size);
-
- gsl_vector_const_view QixHiDHix_g_col2 =
- gsl_matrix_const_column (QixHiDHix_all_g, v2*dc_size+i);
- gsl_vector_const_view QixHiDHix_e_col2 =
- gsl_matrix_const_column (QixHiDHix_all_e, v2*dc_size+i);
-
- gsl_blas_ddot(&QixHiDHix_g_row1.vector,&QixHiDHix_g_col2.vector,&d);
- tPDPD_gg+=d;
- gsl_blas_ddot(&QixHiDHix_e_row1.vector,&QixHiDHix_e_col2.vector,&d);
- tPDPD_ee+=d;
- gsl_blas_ddot(&QixHiDHix_g_row1.vector,&QixHiDHix_e_col2.vector,&d);
- tPDPD_ge+=d;
- }
-
- return;
+void Calc_tracePDPD(const gsl_vector *eval, const gsl_matrix *Qi,
+ const gsl_matrix *Hi, const gsl_matrix *xHi,
+ const gsl_matrix *QixHiDHix_all_g,
+ const gsl_matrix *QixHiDHix_all_e,
+ const gsl_matrix *xHiDHiDHix_all_gg,
+ const gsl_matrix *xHiDHiDHix_all_ee,
+ const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1,
+ const size_t j1, const size_t i2, const size_t j2,
+ double &tPDPD_gg, double &tPDPD_ee, double &tPDPD_ge) {
+ size_t dc_size = Qi->size1, d_size = Hi->size1;
+ size_t v_size = d_size * (d_size + 1) / 2;
+ size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size);
+
+ double d;
+
+ // Calculate the first part: trace(HiDHiD).
+ Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
+
+ // Calculate the second and third parts:
+ // -trace(HixQixHiDHiD) - trace(HiDHixQixHiD)
+ for (size_t i = 0; i < dc_size; i++) {
+ gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, i);
+ gsl_vector_const_view xHiDHiDHix_gg_col = gsl_matrix_const_column(
+ xHiDHiDHix_all_gg, (v1 * v_size + v2) * dc_size + i);
+ gsl_vector_const_view xHiDHiDHix_ee_col = gsl_matrix_const_column(
+ xHiDHiDHix_all_ee, (v1 * v_size + v2) * dc_size + i);
+ gsl_vector_const_view xHiDHiDHix_ge_col = gsl_matrix_const_column(
+ xHiDHiDHix_all_ge, (v1 * v_size + v2) * dc_size + i);
+
+ gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d);
+ tPDPD_gg -= d * 2.0;
+ gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d);
+ tPDPD_ee -= d * 2.0;
+ gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d);
+ tPDPD_ge -= d * 2.0;
+ }
+
+ // Calculate the fourth part: trace(HixQixHiDHixQixHiD).
+ for (size_t i = 0; i < dc_size; i++) {
+
+ gsl_vector_const_view QixHiDHix_g_fullrow1 =
+ gsl_matrix_const_row(QixHiDHix_all_g, i);
+ gsl_vector_const_view QixHiDHix_e_fullrow1 =
+ gsl_matrix_const_row(QixHiDHix_all_e, i);
+ gsl_vector_const_view QixHiDHix_g_row1 = gsl_vector_const_subvector(
+ &QixHiDHix_g_fullrow1.vector, v1 * dc_size, dc_size);
+ gsl_vector_const_view QixHiDHix_e_row1 = gsl_vector_const_subvector(
+ &QixHiDHix_e_fullrow1.vector, v1 * dc_size, dc_size);
+
+ gsl_vector_const_view QixHiDHix_g_col2 =
+ gsl_matrix_const_column(QixHiDHix_all_g, v2 * dc_size + i);
+ gsl_vector_const_view QixHiDHix_e_col2 =
+ gsl_matrix_const_column(QixHiDHix_all_e, v2 * dc_size + i);
+
+ gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_g_col2.vector, &d);
+ tPDPD_gg += d;
+ gsl_blas_ddot(&QixHiDHix_e_row1.vector, &QixHiDHix_e_col2.vector, &d);
+ tPDPD_ee += d;
+ gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_e_col2.vector, &d);
+ tPDPD_ge += d;
+ }
+
+ return;
}
// Calculate (xHiDHiy) for every pair (i,j).
-void Calc_xHiDHiy_all (const gsl_vector *eval, const gsl_matrix *xHi,
- const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g,
- gsl_matrix *xHiDHiy_all_e) {
- gsl_matrix_set_zero(xHiDHiy_all_g);
- gsl_matrix_set_zero(xHiDHiy_all_e);
-
- size_t d_size=Hiy->size1;
- size_t v;
-
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<d_size; j++) {
- if (j<i) {continue;}
- v=GetIndex(i, j, d_size);
-
- gsl_vector_view xHiDHiy_g=gsl_matrix_column (xHiDHiy_all_g, v);
- gsl_vector_view xHiDHiy_e=gsl_matrix_column (xHiDHiy_all_e, v);
-
- Calc_xHiDHiy (eval, xHi, Hiy, i, j, &xHiDHiy_g.vector,
- &xHiDHiy_e.vector);
- }
- }
- return;
+void Calc_xHiDHiy_all(const gsl_vector *eval, const gsl_matrix *xHi,
+ const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g,
+ gsl_matrix *xHiDHiy_all_e) {
+ gsl_matrix_set_zero(xHiDHiy_all_g);
+ gsl_matrix_set_zero(xHiDHiy_all_e);
+
+ size_t d_size = Hiy->size1;
+ size_t v;
+
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j < d_size; j++) {
+ if (j < i) {
+ continue;
+ }
+ v = GetIndex(i, j, d_size);
+
+ gsl_vector_view xHiDHiy_g = gsl_matrix_column(xHiDHiy_all_g, v);
+ gsl_vector_view xHiDHiy_e = gsl_matrix_column(xHiDHiy_all_e, v);
+
+ Calc_xHiDHiy(eval, xHi, Hiy, i, j, &xHiDHiy_g.vector, &xHiDHiy_e.vector);
+ }
+ }
+ return;
}
// Calculate (xHiDHix) for every pair (i,j).
-void Calc_xHiDHix_all (const gsl_vector *eval, const gsl_matrix *xHi,
- gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) {
+void Calc_xHiDHix_all(const gsl_vector *eval, const gsl_matrix *xHi,
+ gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) {
gsl_matrix_set_zero(xHiDHix_all_g);
gsl_matrix_set_zero(xHiDHix_all_e);
-
- size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1;
+
+ size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1;
size_t v;
-
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<d_size; j++) {
- if (j<i) {continue;}
- v=GetIndex(i, j, d_size);
-
+
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j < d_size; j++) {
+ if (j < i) {
+ continue;
+ }
+ v = GetIndex(i, j, d_size);
+
gsl_matrix_view xHiDHix_g =
- gsl_matrix_submatrix (xHiDHix_all_g, 0, v*dc_size, dc_size, dc_size);
+ gsl_matrix_submatrix(xHiDHix_all_g, 0, v * dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHix_e =
- gsl_matrix_submatrix (xHiDHix_all_e, 0, v*dc_size, dc_size, dc_size);
-
- Calc_xHiDHix (eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix);
+ gsl_matrix_submatrix(xHiDHix_all_e, 0, v * dc_size, dc_size, dc_size);
+
+ Calc_xHiDHix(eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix);
}
}
return;
}
// Calculate (xHiDHiy) for every pair (i,j).
-void Calc_xHiDHiDHiy_all (const size_t v_size, const gsl_vector *eval,
- const gsl_matrix *Hi, const gsl_matrix *xHi,
- const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg,
- gsl_matrix *xHiDHiDHiy_all_ee,
- gsl_matrix *xHiDHiDHiy_all_ge) {
- gsl_matrix_set_zero(xHiDHiDHiy_all_gg);
- gsl_matrix_set_zero(xHiDHiDHiy_all_ee);
- gsl_matrix_set_zero(xHiDHiDHiy_all_ge);
-
- size_t d_size=Hiy->size1;
- size_t v1, v2;
-
- for (size_t i1=0; i1<d_size; i1++) {
- for (size_t j1=0; j1<d_size; j1++) {
- if (j1<i1) {continue;}
- v1=GetIndex(i1, j1, d_size);
-
- for (size_t i2=0; i2<d_size; i2++) {
- for (size_t j2=0; j2<d_size; j2++) {
- if (j2<i2) {continue;}
- v2=GetIndex(i2, j2, d_size);
-
- gsl_vector_view xHiDHiDHiy_gg =
- gsl_matrix_column (xHiDHiDHiy_all_gg, v1*v_size+v2);
- gsl_vector_view xHiDHiDHiy_ee =
- gsl_matrix_column (xHiDHiDHiy_all_ee, v1*v_size+v2);
- gsl_vector_view xHiDHiDHiy_ge =
- gsl_matrix_column (xHiDHiDHiy_all_ge, v1*v_size+v2);
-
- Calc_xHiDHiDHiy (eval, Hi, xHi, Hiy, i1, j1, i2, j2, &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector, &xHiDHiDHiy_ge.vector);
- }
- }
- }
- }
- return;
+void Calc_xHiDHiDHiy_all(const size_t v_size, const gsl_vector *eval,
+ const gsl_matrix *Hi, const gsl_matrix *xHi,
+ const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg,
+ gsl_matrix *xHiDHiDHiy_all_ee,
+ gsl_matrix *xHiDHiDHiy_all_ge) {
+ gsl_matrix_set_zero(xHiDHiDHiy_all_gg);
+ gsl_matrix_set_zero(xHiDHiDHiy_all_ee);
+ gsl_matrix_set_zero(xHiDHiDHiy_all_ge);
+
+ size_t d_size = Hiy->size1;
+ size_t v1, v2;
+
+ for (size_t i1 = 0; i1 < d_size; i1++) {
+ for (size_t j1 = 0; j1 < d_size; j1++) {
+ if (j1 < i1) {
+ continue;
+ }
+ v1 = GetIndex(i1, j1, d_size);
+
+ for (size_t i2 = 0; i2 < d_size; i2++) {
+ for (size_t j2 = 0; j2 < d_size; j2++) {
+ if (j2 < i2) {
+ continue;
+ }
+ v2 = GetIndex(i2, j2, d_size);
+
+ gsl_vector_view xHiDHiDHiy_gg =
+ gsl_matrix_column(xHiDHiDHiy_all_gg, v1 * v_size + v2);
+ gsl_vector_view xHiDHiDHiy_ee =
+ gsl_matrix_column(xHiDHiDHiy_all_ee, v1 * v_size + v2);
+ gsl_vector_view xHiDHiDHiy_ge =
+ gsl_matrix_column(xHiDHiDHiy_all_ge, v1 * v_size + v2);
+
+ Calc_xHiDHiDHiy(eval, Hi, xHi, Hiy, i1, j1, i2, j2,
+ &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector,
+ &xHiDHiDHiy_ge.vector);
+ }
+ }
+ }
+ }
+ return;
}
// Calculate (xHiDHix) for every pair (i,j).
-void Calc_xHiDHiDHix_all (const size_t v_size, const gsl_vector *eval,
- const gsl_matrix *Hi, const gsl_matrix *xHi,
- gsl_matrix *xHiDHiDHix_all_gg,
- gsl_matrix *xHiDHiDHix_all_ee,
- gsl_matrix *xHiDHiDHix_all_ge) {
- gsl_matrix_set_zero(xHiDHiDHix_all_gg);
- gsl_matrix_set_zero(xHiDHiDHix_all_ee);
- gsl_matrix_set_zero(xHiDHiDHix_all_ge);
-
- size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1;
- size_t v1, v2;
-
- for (size_t i1=0; i1<d_size; i1++) {
- for (size_t j1=0; j1<d_size; j1++) {
- if (j1<i1) {continue;}
- v1=GetIndex(i1, j1, d_size);
-
- for (size_t i2=0; i2<d_size; i2++) {
- for (size_t j2=0; j2<d_size; j2++) {
- if (j2<i2) {continue;}
- v2=GetIndex(i2, j2, d_size);
-
- if (v2<v1) {continue;}
-
- gsl_matrix_view xHiDHiDHix_gg1 =
- gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0,
- (v1*v_size+v2)*dc_size,
- dc_size, dc_size);
- gsl_matrix_view xHiDHiDHix_ee1 =
- gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0,
- (v1*v_size+v2)*dc_size,
- dc_size, dc_size);
- gsl_matrix_view xHiDHiDHix_ge1 =
- gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0,
- (v1*v_size+v2)*dc_size,
- dc_size, dc_size);
-
- Calc_xHiDHiDHix (eval, Hi, xHi, i1, j1, i2, j2,
- &xHiDHiDHix_gg1.matrix,
- &xHiDHiDHix_ee1.matrix,
- &xHiDHiDHix_ge1.matrix);
-
- if (v2!=v1) {
- gsl_matrix_view xHiDHiDHix_gg2 =
- gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0,
- (v2*v_size+v1)*dc_size,
- dc_size, dc_size);
- gsl_matrix_view xHiDHiDHix_ee2 =
- gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0,
- (v2*v_size+v1)*dc_size,
- dc_size, dc_size);
- gsl_matrix_view xHiDHiDHix_ge2 =
- gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0,
- (v2*v_size+v1)*dc_size,
- dc_size, dc_size);
-
- gsl_matrix_memcpy (&xHiDHiDHix_gg2.matrix,
- &xHiDHiDHix_gg1.matrix);
- gsl_matrix_memcpy (&xHiDHiDHix_ee2.matrix,
- &xHiDHiDHix_ee1.matrix);
- gsl_matrix_memcpy (&xHiDHiDHix_ge2.matrix,
- &xHiDHiDHix_ge1.matrix);
- }
- }
- }
- }
- }
-
- return;
+void Calc_xHiDHiDHix_all(const size_t v_size, const gsl_vector *eval,
+ const gsl_matrix *Hi, const gsl_matrix *xHi,
+ gsl_matrix *xHiDHiDHix_all_gg,
+ gsl_matrix *xHiDHiDHix_all_ee,
+ gsl_matrix *xHiDHiDHix_all_ge) {
+ gsl_matrix_set_zero(xHiDHiDHix_all_gg);
+ gsl_matrix_set_zero(xHiDHiDHix_all_ee);
+ gsl_matrix_set_zero(xHiDHiDHix_all_ge);
+
+ size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1;
+ size_t v1, v2;
+
+ for (size_t i1 = 0; i1 < d_size; i1++) {
+ for (size_t j1 = 0; j1 < d_size; j1++) {
+ if (j1 < i1) {
+ continue;
+ }
+ v1 = GetIndex(i1, j1, d_size);
+
+ for (size_t i2 = 0; i2 < d_size; i2++) {
+ for (size_t j2 = 0; j2 < d_size; j2++) {
+ if (j2 < i2) {
+ continue;
+ }
+ v2 = GetIndex(i2, j2, d_size);
+
+ if (v2 < v1) {
+ continue;
+ }
+
+ gsl_matrix_view xHiDHiDHix_gg1 = gsl_matrix_submatrix(
+ xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size,
+ dc_size);
+ gsl_matrix_view xHiDHiDHix_ee1 = gsl_matrix_submatrix(
+ xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size,
+ dc_size);
+ gsl_matrix_view xHiDHiDHix_ge1 = gsl_matrix_submatrix(
+ xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size,
+ dc_size);
+
+ Calc_xHiDHiDHix(eval, Hi, xHi, i1, j1, i2, j2, &xHiDHiDHix_gg1.matrix,
+ &xHiDHiDHix_ee1.matrix, &xHiDHiDHix_ge1.matrix);
+
+ if (v2 != v1) {
+ gsl_matrix_view xHiDHiDHix_gg2 = gsl_matrix_submatrix(
+ xHiDHiDHix_all_gg, 0, (v2 * v_size + v1) * dc_size, dc_size,
+ dc_size);
+ gsl_matrix_view xHiDHiDHix_ee2 = gsl_matrix_submatrix(
+ xHiDHiDHix_all_ee, 0, (v2 * v_size + v1) * dc_size, dc_size,
+ dc_size);
+ gsl_matrix_view xHiDHiDHix_ge2 = gsl_matrix_submatrix(
+ xHiDHiDHix_all_ge, 0, (v2 * v_size + v1) * dc_size, dc_size,
+ dc_size);
+
+ gsl_matrix_memcpy(&xHiDHiDHix_gg2.matrix, &xHiDHiDHix_gg1.matrix);
+ gsl_matrix_memcpy(&xHiDHiDHix_ee2.matrix, &xHiDHiDHix_ee1.matrix);
+ gsl_matrix_memcpy(&xHiDHiDHix_ge2.matrix, &xHiDHiDHix_ge1.matrix);
+ }
+ }
+ }
+ }
+ }
+
+ return;
}
// Calculate (xHiDHix)Qi(xHiy) for every pair (i,j).
-void Calc_xHiDHixQixHiy_all (const gsl_matrix *xHiDHix_all_g,
- const gsl_matrix *xHiDHix_all_e,
- const gsl_vector *QixHiy,
- gsl_matrix *xHiDHixQixHiy_all_g,
- gsl_matrix *xHiDHixQixHiy_all_e) {
- size_t dc_size=xHiDHix_all_g->size1;
- size_t v_size=xHiDHix_all_g->size2/dc_size;
-
- for (size_t i=0; i<v_size; i++) {
- gsl_matrix_const_view xHiDHix_g =
- gsl_matrix_const_submatrix (xHiDHix_all_g, 0, i*dc_size,
- dc_size, dc_size);
- gsl_matrix_const_view xHiDHix_e =
- gsl_matrix_const_submatrix (xHiDHix_all_e, 0, i*dc_size,
- dc_size, dc_size);
-
- gsl_vector_view xHiDHixQixHiy_g =
- gsl_matrix_column (xHiDHixQixHiy_all_g, i);
- gsl_vector_view xHiDHixQixHiy_e =
- gsl_matrix_column (xHiDHixQixHiy_all_e, i);
-
- gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_g.matrix,
- QixHiy, 0.0, &xHiDHixQixHiy_g.vector);
- gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_e.matrix,
- QixHiy, 0.0, &xHiDHixQixHiy_e.vector);
- }
-
- return;
+void Calc_xHiDHixQixHiy_all(const gsl_matrix *xHiDHix_all_g,
+ const gsl_matrix *xHiDHix_all_e,
+ const gsl_vector *QixHiy,
+ gsl_matrix *xHiDHixQixHiy_all_g,
+ gsl_matrix *xHiDHixQixHiy_all_e) {
+ size_t dc_size = xHiDHix_all_g->size1;
+ size_t v_size = xHiDHix_all_g->size2 / dc_size;
+
+ for (size_t i = 0; i < v_size; i++) {
+ gsl_matrix_const_view xHiDHix_g = gsl_matrix_const_submatrix(
+ xHiDHix_all_g, 0, i * dc_size, dc_size, dc_size);
+ gsl_matrix_const_view xHiDHix_e = gsl_matrix_const_submatrix(
+ xHiDHix_all_e, 0, i * dc_size, dc_size, dc_size);
+
+ gsl_vector_view xHiDHixQixHiy_g = gsl_matrix_column(xHiDHixQixHiy_all_g, i);
+ gsl_vector_view xHiDHixQixHiy_e = gsl_matrix_column(xHiDHixQixHiy_all_e, i);
+
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_g.matrix, QixHiy, 0.0,
+ &xHiDHixQixHiy_g.vector);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_e.matrix, QixHiy, 0.0,
+ &xHiDHixQixHiy_e.vector);
+ }
+
+ return;
}
// Calculate Qi(xHiDHiy) and Qi(xHiDHix)Qi(xHiy) for each pair of i,j (i<=j).
-void Calc_QiVec_all (const gsl_matrix *Qi, const gsl_matrix *vec_all_g,
- const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g,
- gsl_matrix *Qivec_all_e) {
- for (size_t i=0; i<vec_all_g->size2; i++) {
- gsl_vector_const_view vec_g=gsl_matrix_const_column (vec_all_g, i);
- gsl_vector_const_view vec_e=gsl_matrix_const_column (vec_all_e, i);
-
- gsl_vector_view Qivec_g=gsl_matrix_column (Qivec_all_g, i);
- gsl_vector_view Qivec_e=gsl_matrix_column (Qivec_all_e, i);
-
- gsl_blas_dgemv(CblasNoTrans,1.0,Qi,&vec_g.vector,0.0,
- &Qivec_g.vector);
- gsl_blas_dgemv(CblasNoTrans,1.0,Qi,&vec_e.vector,0.0,
- &Qivec_e.vector);
- }
-
- return;
+void Calc_QiVec_all(const gsl_matrix *Qi, const gsl_matrix *vec_all_g,
+ const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g,
+ gsl_matrix *Qivec_all_e) {
+ for (size_t i = 0; i < vec_all_g->size2; i++) {
+ gsl_vector_const_view vec_g = gsl_matrix_const_column(vec_all_g, i);
+ gsl_vector_const_view vec_e = gsl_matrix_const_column(vec_all_e, i);
+
+ gsl_vector_view Qivec_g = gsl_matrix_column(Qivec_all_g, i);
+ gsl_vector_view Qivec_e = gsl_matrix_column(Qivec_all_e, i);
+
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_g.vector, 0.0, &Qivec_g.vector);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_e.vector, 0.0, &Qivec_e.vector);
+ }
+
+ return;
}
// Calculate Qi(xHiDHix) for each pair of i,j (i<=j).
-void Calc_QiMat_all (const gsl_matrix *Qi, const gsl_matrix *mat_all_g,
- const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g,
- gsl_matrix *Qimat_all_e) {
- size_t dc_size=Qi->size1;
- size_t v_size=mat_all_g->size2/mat_all_g->size1;
-
- for (size_t i=0; i<v_size; i++) {
- gsl_matrix_const_view mat_g =
- gsl_matrix_const_submatrix (mat_all_g, 0, i*dc_size,
- dc_size, dc_size);
- gsl_matrix_const_view mat_e =
- gsl_matrix_const_submatrix (mat_all_e, 0, i*dc_size,
- dc_size, dc_size);
-
- gsl_matrix_view Qimat_g =
- gsl_matrix_submatrix (Qimat_all_g, 0, i*dc_size, dc_size,
- dc_size);
- gsl_matrix_view Qimat_e =
- gsl_matrix_submatrix (Qimat_all_e, 0, i*dc_size, dc_size,
- dc_size);
-
- gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi,
- &mat_g.matrix, 0.0, &Qimat_g.matrix);
- gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi,
- &mat_e.matrix, 0.0, &Qimat_e.matrix);
- }
-
- return;
+void Calc_QiMat_all(const gsl_matrix *Qi, const gsl_matrix *mat_all_g,
+ const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g,
+ gsl_matrix *Qimat_all_e) {
+ size_t dc_size = Qi->size1;
+ size_t v_size = mat_all_g->size2 / mat_all_g->size1;
+
+ for (size_t i = 0; i < v_size; i++) {
+ gsl_matrix_const_view mat_g =
+ gsl_matrix_const_submatrix(mat_all_g, 0, i * dc_size, dc_size, dc_size);
+ gsl_matrix_const_view mat_e =
+ gsl_matrix_const_submatrix(mat_all_e, 0, i * dc_size, dc_size, dc_size);
+
+ gsl_matrix_view Qimat_g =
+ gsl_matrix_submatrix(Qimat_all_g, 0, i * dc_size, dc_size, dc_size);
+ gsl_matrix_view Qimat_e =
+ gsl_matrix_submatrix(Qimat_all_e, 0, i * dc_size, dc_size, dc_size);
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_g.matrix, 0.0,
+ &Qimat_g.matrix);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_e.matrix, 0.0,
+ &Qimat_e.matrix);
+ }
+
+ return;
}
// Calculate yPDPy
// yPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)y
// = ytHiDHiy - (yHix)Qi(xHiDHiy) - (yHiDHix)Qi(xHiy)
// + (yHix)Qi(xHiDHix)Qi(xtHiy)
-void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy,
- const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
- const gsl_matrix *xHiDHiy_all_e,
- const gsl_matrix *xHiDHixQixHiy_all_g,
- const gsl_matrix *xHiDHixQixHiy_all_e,
- const size_t i, const size_t j,
- double &yPDPy_g, double &yPDPy_e) {
- size_t d_size=Hiy->size1;
- size_t v=GetIndex(i, j, d_size);
-
- double d;
-
- // First part: ytHiDHiy.
- Calc_yHiDHiy (eval, Hiy, i, j, yPDPy_g, yPDPy_e);
-
- // Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
- gsl_vector_const_view xHiDHiy_g =
- gsl_matrix_const_column (xHiDHiy_all_g, v);
- gsl_vector_const_view xHiDHiy_e =
- gsl_matrix_const_column (xHiDHiy_all_e, v);
-
- gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d);
- yPDPy_g-=d*2.0;
- gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d);
- yPDPy_e-=d*2.0;
-
- // Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy).
- gsl_vector_const_view xHiDHixQixHiy_g =
- gsl_matrix_const_column (xHiDHixQixHiy_all_g, v);
- gsl_vector_const_view xHiDHixQixHiy_e =
- gsl_matrix_const_column (xHiDHixQixHiy_all_e, v);
-
- gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d);
- yPDPy_g+=d;
- gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d);
- yPDPy_e+=d;
-
- return;
+void Calc_yPDPy(const gsl_vector *eval, const gsl_matrix *Hiy,
+ const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
+ const gsl_matrix *xHiDHiy_all_e,
+ const gsl_matrix *xHiDHixQixHiy_all_g,
+ const gsl_matrix *xHiDHixQixHiy_all_e, const size_t i,
+ const size_t j, double &yPDPy_g, double &yPDPy_e) {
+ size_t d_size = Hiy->size1;
+ size_t v = GetIndex(i, j, d_size);
+
+ double d;
+
+ // First part: ytHiDHiy.
+ Calc_yHiDHiy(eval, Hiy, i, j, yPDPy_g, yPDPy_e);
+
+ // Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
+ gsl_vector_const_view xHiDHiy_g = gsl_matrix_const_column(xHiDHiy_all_g, v);
+ gsl_vector_const_view xHiDHiy_e = gsl_matrix_const_column(xHiDHiy_all_e, v);
+
+ gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d);
+ yPDPy_g -= d * 2.0;
+ gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d);
+ yPDPy_e -= d * 2.0;
+
+ // Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy).
+ gsl_vector_const_view xHiDHixQixHiy_g =
+ gsl_matrix_const_column(xHiDHixQixHiy_all_g, v);
+ gsl_vector_const_view xHiDHixQixHiy_e =
+ gsl_matrix_const_column(xHiDHixQixHiy_all_e, v);
+
+ gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d);
+ yPDPy_g += d;
+ gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d);
+ yPDPy_e += d;
+
+ return;
}
// calculate yPDPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)D(Hi-HixQixHi)y
@@ -1912,3445 +1897,3503 @@ void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy,
// + (yHiDHix)Qi(xHiDHix)Qi(xHiy)
// + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
// - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy)
-void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi,
- const gsl_matrix *xHi, const gsl_matrix *Hiy,
- const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g,
- const gsl_matrix *xHiDHiy_all_e,
- const gsl_matrix *QixHiDHiy_all_g,
- const gsl_matrix *QixHiDHiy_all_e,
- const gsl_matrix *xHiDHixQixHiy_all_g,
- const gsl_matrix *xHiDHixQixHiy_all_e,
- const gsl_matrix *QixHiDHixQixHiy_all_g,
- const gsl_matrix *QixHiDHixQixHiy_all_e,
- const gsl_matrix *xHiDHiDHiy_all_gg,
- const gsl_matrix *xHiDHiDHiy_all_ee,
- const gsl_matrix *xHiDHiDHiy_all_ge,
- const gsl_matrix *xHiDHiDHix_all_gg,
- const gsl_matrix *xHiDHiDHix_all_ee,
- const gsl_matrix *xHiDHiDHix_all_ge,
- const size_t i1, const size_t j1, const size_t i2,
- const size_t j2, double &yPDPDPy_gg, double &yPDPDPy_ee,
- double &yPDPDPy_ge) {
- size_t d_size=Hi->size1, dc_size=xHi->size1;
- size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size);
- size_t v_size=d_size*(d_size+1)/2;
-
- double d;
-
- gsl_vector *xHiDHiDHixQixHiy=gsl_vector_alloc (dc_size);
-
- // First part: yHiDHiDHiy.
- Calc_yHiDHiDHiy (eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg,
- yPDPDPy_ee, yPDPDPy_ge);
-
- // Second and third parts:
- // -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy).
- gsl_vector_const_view xHiDHiDHiy_gg1 =
- gsl_matrix_const_column (xHiDHiDHiy_all_gg, v1*v_size+v2);
- gsl_vector_const_view xHiDHiDHiy_ee1 =
- gsl_matrix_const_column (xHiDHiDHiy_all_ee, v1*v_size+v2);
- gsl_vector_const_view xHiDHiDHiy_ge1 =
- gsl_matrix_const_column (xHiDHiDHiy_all_ge, v1*v_size+v2);
-
- gsl_vector_const_view xHiDHiDHiy_gg2 =
- gsl_matrix_const_column (xHiDHiDHiy_all_gg, v2*v_size+v1);
- gsl_vector_const_view xHiDHiDHiy_ee2 =
- gsl_matrix_const_column (xHiDHiDHiy_all_ee, v2*v_size+v1);
- gsl_vector_const_view xHiDHiDHiy_ge2 =
- gsl_matrix_const_column (xHiDHiDHiy_all_ge, v2*v_size+v1);
-
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
- yPDPDPy_gg-=d;
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
- yPDPDPy_ee-=d;
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
- yPDPDPy_ge-=d;
-
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
- yPDPDPy_gg-=d;
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
- yPDPDPy_ee-=d;
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
- yPDPDPy_ge-=d;
-
- // Fourth part: - (yHiDHix)Qi(xHiDHiy).
- gsl_vector_const_view xHiDHiy_g1 =
- gsl_matrix_const_column (xHiDHiy_all_g, v1);
- gsl_vector_const_view xHiDHiy_e1 =
- gsl_matrix_const_column (xHiDHiy_all_e, v1);
- gsl_vector_const_view QixHiDHiy_g2 =
- gsl_matrix_const_column (QixHiDHiy_all_g, v2);
- gsl_vector_const_view QixHiDHiy_e2 =
- gsl_matrix_const_column (QixHiDHiy_all_e, v2);
-
- gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
- yPDPDPy_gg-=d;
- gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
- yPDPDPy_ee-=d;
- gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
- yPDPDPy_ge-=d;
-
- // Fifth and sixth parts:
- // + (yHix)Qi(xHiDHix)Qi(xHiDHiy) +
- // (yHiDHix)Qi(xHiDHix)Qi(xHiy)
- gsl_vector_const_view QixHiDHiy_g1 =
- gsl_matrix_const_column (QixHiDHiy_all_g, v1);
- gsl_vector_const_view QixHiDHiy_e1 =
- gsl_matrix_const_column (QixHiDHiy_all_e, v1);
-
- gsl_vector_const_view xHiDHixQixHiy_g1 =
- gsl_matrix_const_column (xHiDHixQixHiy_all_g, v1);
- gsl_vector_const_view xHiDHixQixHiy_e1 =
- gsl_matrix_const_column (xHiDHixQixHiy_all_e, v1);
- gsl_vector_const_view xHiDHixQixHiy_g2 =
- gsl_matrix_const_column (xHiDHixQixHiy_all_g, v2);
- gsl_vector_const_view xHiDHixQixHiy_e2 =
- gsl_matrix_const_column (xHiDHixQixHiy_all_e, v2);
-
- gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
- yPDPDPy_gg+=d;
- gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d);
- yPDPDPy_gg+=d;
-
- gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
- yPDPDPy_ee+=d;
- gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d);
- yPDPDPy_ee+=d;
-
- gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
- yPDPDPy_ge+=d;
- gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d);
- yPDPDPy_ge+=d;
-
- // Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
- gsl_matrix_const_view xHiDHiDHix_gg =
- gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0,
- (v1*v_size+v2)*dc_size,
- dc_size, dc_size);
- gsl_matrix_const_view xHiDHiDHix_ee =
- gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0,
- (v1*v_size+v2)*dc_size,
- dc_size, dc_size);
- gsl_matrix_const_view xHiDHiDHix_ge =
- gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0,
- (v1*v_size+v2)*dc_size,
- dc_size, dc_size);
-
- gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix,
- QixHiy, 0.0, xHiDHiDHixQixHiy);
- gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
- yPDPDPy_gg+=d;
- gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix,
- QixHiy, 0.0, xHiDHiDHixQixHiy);
- gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
- yPDPDPy_ee+=d;
- gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix,
- QixHiy, 0.0, xHiDHiDHixQixHiy);
- gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
- yPDPDPy_ge+=d;
-
- // Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy).
- gsl_vector_const_view QixHiDHixQixHiy_g1 =
- gsl_matrix_const_column (QixHiDHixQixHiy_all_g, v1);
- gsl_vector_const_view QixHiDHixQixHiy_e1 =
- gsl_matrix_const_column (QixHiDHixQixHiy_all_e, v1);
-
- gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector,&xHiDHixQixHiy_g2.vector,&d);
- yPDPDPy_gg-=d;
- gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector,&xHiDHixQixHiy_e2.vector,&d);
- yPDPDPy_ee-=d;
- gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector,&xHiDHixQixHiy_e2.vector,&d);
- yPDPDPy_ge-=d;
-
- // Free memory.
- gsl_vector_free(xHiDHiDHixQixHiy);
-
- return;
+void Calc_yPDPDPy(
+ const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matrix *xHi,
+ const gsl_matrix *Hiy, const gsl_vector *QixHiy,
+ const gsl_matrix *xHiDHiy_all_g, const gsl_matrix *xHiDHiy_all_e,
+ const gsl_matrix *QixHiDHiy_all_g, const gsl_matrix *QixHiDHiy_all_e,
+ const gsl_matrix *xHiDHixQixHiy_all_g,
+ const gsl_matrix *xHiDHixQixHiy_all_e,
+ const gsl_matrix *QixHiDHixQixHiy_all_g,
+ const gsl_matrix *QixHiDHixQixHiy_all_e,
+ const gsl_matrix *xHiDHiDHiy_all_gg, const gsl_matrix *xHiDHiDHiy_all_ee,
+ const gsl_matrix *xHiDHiDHiy_all_ge, const gsl_matrix *xHiDHiDHix_all_gg,
+ const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge,
+ const size_t i1, const size_t j1, const size_t i2, const size_t j2,
+ double &yPDPDPy_gg, double &yPDPDPy_ee, double &yPDPDPy_ge) {
+ size_t d_size = Hi->size1, dc_size = xHi->size1;
+ size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size);
+ size_t v_size = d_size * (d_size + 1) / 2;
+
+ double d;
+
+ gsl_vector *xHiDHiDHixQixHiy = gsl_vector_alloc(dc_size);
+
+ // First part: yHiDHiDHiy.
+ Calc_yHiDHiDHiy(eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee,
+ yPDPDPy_ge);
+
+ // Second and third parts:
+ // -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy).
+ gsl_vector_const_view xHiDHiDHiy_gg1 =
+ gsl_matrix_const_column(xHiDHiDHiy_all_gg, v1 * v_size + v2);
+ gsl_vector_const_view xHiDHiDHiy_ee1 =
+ gsl_matrix_const_column(xHiDHiDHiy_all_ee, v1 * v_size + v2);
+ gsl_vector_const_view xHiDHiDHiy_ge1 =
+ gsl_matrix_const_column(xHiDHiDHiy_all_ge, v1 * v_size + v2);
+
+ gsl_vector_const_view xHiDHiDHiy_gg2 =
+ gsl_matrix_const_column(xHiDHiDHiy_all_gg, v2 * v_size + v1);
+ gsl_vector_const_view xHiDHiDHiy_ee2 =
+ gsl_matrix_const_column(xHiDHiDHiy_all_ee, v2 * v_size + v1);
+ gsl_vector_const_view xHiDHiDHiy_ge2 =
+ gsl_matrix_const_column(xHiDHiDHiy_all_ge, v2 * v_size + v1);
+
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
+ yPDPDPy_gg -= d;
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
+ yPDPDPy_ee -= d;
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
+ yPDPDPy_ge -= d;
+
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
+ yPDPDPy_gg -= d;
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
+ yPDPDPy_ee -= d;
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
+ yPDPDPy_ge -= d;
+
+ // Fourth part: - (yHiDHix)Qi(xHiDHiy).
+ gsl_vector_const_view xHiDHiy_g1 = gsl_matrix_const_column(xHiDHiy_all_g, v1);
+ gsl_vector_const_view xHiDHiy_e1 = gsl_matrix_const_column(xHiDHiy_all_e, v1);
+ gsl_vector_const_view QixHiDHiy_g2 =
+ gsl_matrix_const_column(QixHiDHiy_all_g, v2);
+ gsl_vector_const_view QixHiDHiy_e2 =
+ gsl_matrix_const_column(QixHiDHiy_all_e, v2);
+
+ gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
+ yPDPDPy_gg -= d;
+ gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
+ yPDPDPy_ee -= d;
+ gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
+ yPDPDPy_ge -= d;
+
+ // Fifth and sixth parts:
+ // + (yHix)Qi(xHiDHix)Qi(xHiDHiy) +
+ // (yHiDHix)Qi(xHiDHix)Qi(xHiy)
+ gsl_vector_const_view QixHiDHiy_g1 =
+ gsl_matrix_const_column(QixHiDHiy_all_g, v1);
+ gsl_vector_const_view QixHiDHiy_e1 =
+ gsl_matrix_const_column(QixHiDHiy_all_e, v1);
+
+ gsl_vector_const_view xHiDHixQixHiy_g1 =
+ gsl_matrix_const_column(xHiDHixQixHiy_all_g, v1);
+ gsl_vector_const_view xHiDHixQixHiy_e1 =
+ gsl_matrix_const_column(xHiDHixQixHiy_all_e, v1);
+ gsl_vector_const_view xHiDHixQixHiy_g2 =
+ gsl_matrix_const_column(xHiDHixQixHiy_all_g, v2);
+ gsl_vector_const_view xHiDHixQixHiy_e2 =
+ gsl_matrix_const_column(xHiDHixQixHiy_all_e, v2);
+
+ gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
+ yPDPDPy_gg += d;
+ gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d);
+ yPDPDPy_gg += d;
+
+ gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
+ yPDPDPy_ee += d;
+ gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d);
+ yPDPDPy_ee += d;
+
+ gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
+ yPDPDPy_ge += d;
+ gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d);
+ yPDPDPy_ge += d;
+
+ // Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy)
+ gsl_matrix_const_view xHiDHiDHix_gg = gsl_matrix_const_submatrix(
+ xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+ gsl_matrix_const_view xHiDHiDHix_ee = gsl_matrix_const_submatrix(
+ xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+ gsl_matrix_const_view xHiDHiDHix_ge = gsl_matrix_const_submatrix(
+ xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix, QixHiy, 0.0,
+ xHiDHiDHixQixHiy);
+ gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+ yPDPDPy_gg += d;
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix, QixHiy, 0.0,
+ xHiDHiDHixQixHiy);
+ gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+ yPDPDPy_ee += d;
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix, QixHiy, 0.0,
+ xHiDHiDHixQixHiy);
+ gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
+ yPDPDPy_ge += d;
+
+ // Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy).
+ gsl_vector_const_view QixHiDHixQixHiy_g1 =
+ gsl_matrix_const_column(QixHiDHixQixHiy_all_g, v1);
+ gsl_vector_const_view QixHiDHixQixHiy_e1 =
+ gsl_matrix_const_column(QixHiDHixQixHiy_all_e, v1);
+
+ gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_g2.vector, &d);
+ yPDPDPy_gg -= d;
+ gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector, &xHiDHixQixHiy_e2.vector, &d);
+ yPDPDPy_ee -= d;
+ gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_e2.vector, &d);
+ yPDPDPy_ge -= d;
+
+ // Free memory.
+ gsl_vector_free(xHiDHiDHixQixHiy);
+
+ return;
}
// Calculate Edgeworth correctation factors for small samples notation
// and method follows Thomas J. Rothenberg, Econometirca 1984; 52 (4)
// M=xHiDHix
-void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi,
- const gsl_matrix *QixHiDHix_all_g,
- const gsl_matrix *QixHiDHix_all_e,
- const gsl_matrix *xHiDHiDHix_all_gg,
- const gsl_matrix *xHiDHiDHix_all_ee,
- const gsl_matrix *xHiDHiDHix_all_ge,
- const size_t d_size, double &crt_a,
- double &crt_b, double &crt_c) {
- crt_a=0.0; crt_b=0.0; crt_c=0.0;
-
- size_t dc_size=Qi->size1, v_size=Hessian_inv->size1/2;
- size_t c_size=dc_size/d_size;
- double h_gg, h_ge, h_ee, d, B=0.0, C=0.0, D=0.0;
- double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee;
- double trCC_gg, trCC_ge, trCC_ee, trD_gg=0.0, trD_ge=0.0, trD_ee=0.0;
-
- gsl_matrix *QiMQi_g1=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *QiMQi_e1=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *QiMQi_g2=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *QiMQi_e2=gsl_matrix_alloc (dc_size, dc_size);
-
- gsl_matrix *QiMQisQisi_g1=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *QiMQisQisi_e1=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *QiMQisQisi_g2=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *QiMQisQisi_e2=gsl_matrix_alloc (d_size, d_size);
-
- gsl_matrix *QiMQiMQi_gg=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *QiMQiMQi_ge=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *QiMQiMQi_ee=gsl_matrix_alloc (dc_size, dc_size);
-
- gsl_matrix *QiMMQi_gg=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *QiMMQi_ge=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *QiMMQi_ee=gsl_matrix_alloc (dc_size, dc_size);
-
- gsl_matrix *Qi_si=gsl_matrix_alloc (d_size, d_size);
-
- gsl_matrix *M_dd=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *M_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-
- // Invert Qi_sub to Qi_si.
- gsl_matrix *Qi_sub=gsl_matrix_alloc (d_size, d_size);
-
- gsl_matrix_const_view Qi_s =
- gsl_matrix_const_submatrix (Qi, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
-
- int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (d_size);
-
- gsl_matrix_memcpy (Qi_sub, &Qi_s.matrix);
- LUDecomp (Qi_sub, pmt, &sig);
- LUInvert (Qi_sub, pmt, Qi_si);
-
- gsl_permutation_free(pmt);
- gsl_matrix_free(Qi_sub);
-
- // Calculate correction factors.
- for (size_t v1=0; v1<v_size; v1++) {
-
- // Calculate Qi(xHiDHix)Qi, and subpart of it.
- gsl_matrix_const_view QiM_g1 =
- gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v1*dc_size,
- dc_size, dc_size);
- gsl_matrix_const_view QiM_e1 =
- gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v1*dc_size,
- dc_size, dc_size);
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
- Qi, 0.0, QiMQi_g1);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
- Qi, 0.0, QiMQi_e1);
-
- gsl_matrix_view QiMQi_g1_s =
- gsl_matrix_submatrix (QiMQi_g1, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
- gsl_matrix_view QiMQi_e1_s =
- gsl_matrix_submatrix (QiMQi_e1, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
-
- // Calculate trCg1 and trCe1.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix,
- Qi_si, 0.0, QiMQisQisi_g1);
- trCg1=0.0;
- for (size_t k=0; k<d_size; k++) {
- trCg1-=gsl_matrix_get (QiMQisQisi_g1, k, k);
- }
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix,
- Qi_si, 0.0, QiMQisQisi_e1);
- trCe1=0.0;
- for (size_t k=0; k<d_size; k++) {
- trCe1-=gsl_matrix_get (QiMQisQisi_e1, k, k);
- }
-
- for (size_t v2=0; v2<v_size; v2++) {
- if (v2<v1) {continue;}
-
- // Calculate Qi(xHiDHix)Qi, and subpart of it.
- gsl_matrix_const_view QiM_g2 =
- gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v2*dc_size,
- dc_size, dc_size);
- gsl_matrix_const_view QiM_e2 =
- gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v2*dc_size,
- dc_size, dc_size);
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix,
- Qi, 0.0, QiMQi_g2);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix,
- Qi, 0.0, QiMQi_e2);
-
- gsl_matrix_view QiMQi_g2_s =
- gsl_matrix_submatrix (QiMQi_g2, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
- gsl_matrix_view QiMQi_e2_s =
- gsl_matrix_submatrix (QiMQi_e2, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
-
- // Calculate trCg2 and trCe2.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &QiMQi_g2_s.matrix, Qi_si, 0.0, QiMQisQisi_g2);
- trCg2=0.0;
- for (size_t k=0; k<d_size; k++) {
- trCg2-=gsl_matrix_get (QiMQisQisi_g2, k, k);
- }
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &QiMQi_e2_s.matrix, Qi_si, 0.0, QiMQisQisi_e2);
- trCe2=0.0;
- for (size_t k=0; k<d_size; k++) {
- trCe2-=gsl_matrix_get (QiMQisQisi_e2, k, k);
- }
-
- // Calculate trCC_gg, trCC_ge, trCC_ee.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- QiMQisQisi_g1, QiMQisQisi_g2, 0.0, M_dd);
- trCC_gg=0.0;
- for (size_t k=0; k<d_size; k++) {
- trCC_gg+=gsl_matrix_get (M_dd, k, k);
- }
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
- QiMQisQisi_e2, 0.0, M_dd);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
- QiMQisQisi_g2, 1.0, M_dd);
- trCC_ge=0.0;
- for (size_t k=0; k<d_size; k++) {
- trCC_ge+=gsl_matrix_get (M_dd, k, k);
- }
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
- QiMQisQisi_e2, 0.0, M_dd);
- trCC_ee=0.0;
- for (size_t k=0; k<d_size; k++) {
- trCC_ee+=gsl_matrix_get (M_dd, k, k);
- }
-
- // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
- QiMQi_g2, 0.0, QiMQiMQi_gg);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix,
- QiMQi_e2, 0.0, QiMQiMQi_ge);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
- QiMQi_g2, 1.0, QiMQiMQi_ge);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix,
- QiMQi_e2, 0.0, QiMQiMQi_ee);
-
- gsl_matrix_view QiMQiMQi_gg_s =
- gsl_matrix_submatrix (QiMQiMQi_gg, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
- gsl_matrix_view QiMQiMQi_ge_s =
- gsl_matrix_submatrix (QiMQiMQi_ge, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
- gsl_matrix_view QiMQiMQi_ee_s =
- gsl_matrix_submatrix (QiMQiMQi_ee, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
-
- // and part of trB_gg, trB_ge, trB_ee.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &QiMQiMQi_gg_s.matrix, Qi_si, 0.0, M_dd);
- trB_gg=0.0;
- for (size_t k=0; k<d_size; k++) {
- d=gsl_matrix_get (M_dd, k, k);
- trB_gg-=d;
- }
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &QiMQiMQi_ge_s.matrix, Qi_si, 0.0, M_dd);
- trB_ge=0.0;
- for (size_t k=0; k<d_size; k++) {
- d=gsl_matrix_get (M_dd, k, k);
- trB_ge-=d;
- }
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &QiMQiMQi_ee_s.matrix, Qi_si, 0.0, M_dd);
- trB_ee=0.0;
- for (size_t k=0; k<d_size; k++) {
- d=gsl_matrix_get (M_dd, k, k);
- trB_ee-=d;
- }
-
- // Calculate Qi(xHiDHiDHix)Qi, and subpart of it.
- gsl_matrix_const_view MM_gg =
- gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0,
- (v1*v_size+v2)*dc_size, dc_size,
- dc_size);
- gsl_matrix_const_view MM_ge =
- gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0,
- (v1*v_size+v2)*dc_size, dc_size,
- dc_size);
- gsl_matrix_const_view MM_ee =
- gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0,
- (v1*v_size+v2)*dc_size, dc_size,
- dc_size);
-
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
- &MM_gg.matrix, 0.0, M_dcdc);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
- QiMMQi_gg);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
- &MM_ge.matrix, 0.0, M_dcdc);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc,
- Qi, 0.0, QiMMQi_ge);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi,
- &MM_ee.matrix, 0.0, M_dcdc);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi,
- 0.0, QiMMQi_ee);
-
- gsl_matrix_view QiMMQi_gg_s =
- gsl_matrix_submatrix (QiMMQi_gg, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
- gsl_matrix_view QiMMQi_ge_s =
- gsl_matrix_submatrix (QiMMQi_ge, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
- gsl_matrix_view QiMMQi_ee_s =
- gsl_matrix_submatrix (QiMMQi_ee, (c_size-1)*d_size,
- (c_size-1)*d_size, d_size, d_size);
-
- // Calculate the other part of trB_gg, trB_ge, trB_ee.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &QiMMQi_gg_s.matrix, Qi_si, 0.0, M_dd);
- for (size_t k=0; k<d_size; k++) {
- trB_gg+=gsl_matrix_get (M_dd, k, k);
- }
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &QiMMQi_ge_s.matrix, Qi_si, 0.0, M_dd);
- for (size_t k=0; k<d_size; k++) {
- trB_ge+=2.0*gsl_matrix_get (M_dd, k, k);
- }
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
- &QiMMQi_ee_s.matrix, Qi_si, 0.0, M_dd);
- for (size_t k=0; k<d_size; k++) {
- trB_ee+=gsl_matrix_get (M_dd, k, k);
- }
-
- // Calculate trD_gg, trD_ge, trD_ee.
- trD_gg=2.0*trB_gg;
- trD_ge=2.0*trB_ge;
- trD_ee=2.0*trB_ee;
-
- //calculate B, C and D
- h_gg=-1.0*gsl_matrix_get (Hessian_inv, v1, v2);
- h_ge=-1.0*gsl_matrix_get (Hessian_inv, v1, v2+v_size);
- h_ee=-1.0*gsl_matrix_get (Hessian_inv, v1+v_size, v2+v_size);
-
- B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee;
- C+=h_gg*(trCC_gg+0.5*trCg1*trCg2) +
- h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2) +
- h_ee*(trCC_ee+0.5*trCe1*trCe2);
- D+=h_gg*(trCC_gg+0.5*trD_gg) +
- h_ge*(trCC_ge+0.5*trD_ge) + h_ee*(trCC_ee+0.5*trD_ee);
-
- if (v1!=v2) {
- B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee;
- C+=h_gg*(trCC_gg+0.5*trCg1*trCg2) +
- h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2) +
- h_ee*(trCC_ee+0.5*trCe1*trCe2);
- D+=h_gg*(trCC_gg+0.5*trD_gg) +
- h_ge*(trCC_ge+0.5*trD_ge) +
- h_ee*(trCC_ee+0.5*trD_ee);
- }
- }
- }
-
- // Calculate a, b, c from B C D.
- crt_a=2.0*D-C;
- crt_b=2.0*B;
- crt_c=C;
-
- // Free matrix memory.
- gsl_matrix_free(QiMQi_g1);
- gsl_matrix_free(QiMQi_e1);
- gsl_matrix_free(QiMQi_g2);
- gsl_matrix_free(QiMQi_e2);
-
- gsl_matrix_free(QiMQisQisi_g1);
- gsl_matrix_free(QiMQisQisi_e1);
- gsl_matrix_free(QiMQisQisi_g2);
- gsl_matrix_free(QiMQisQisi_e2);
-
- gsl_matrix_free(QiMQiMQi_gg);
- gsl_matrix_free(QiMQiMQi_ge);
- gsl_matrix_free(QiMQiMQi_ee);
-
- gsl_matrix_free(QiMMQi_gg);
- gsl_matrix_free(QiMMQi_ge);
- gsl_matrix_free(QiMMQi_ee);
-
- gsl_matrix_free(Qi_si);
-
- gsl_matrix_free(M_dd);
- gsl_matrix_free(M_dcdc);
-
- return;
+void CalcCRT(const gsl_matrix *Hessian_inv, const gsl_matrix *Qi,
+ const gsl_matrix *QixHiDHix_all_g,
+ const gsl_matrix *QixHiDHix_all_e,
+ const gsl_matrix *xHiDHiDHix_all_gg,
+ const gsl_matrix *xHiDHiDHix_all_ee,
+ const gsl_matrix *xHiDHiDHix_all_ge, const size_t d_size,
+ double &crt_a, double &crt_b, double &crt_c) {
+ crt_a = 0.0;
+ crt_b = 0.0;
+ crt_c = 0.0;
+
+ size_t dc_size = Qi->size1, v_size = Hessian_inv->size1 / 2;
+ size_t c_size = dc_size / d_size;
+ double h_gg, h_ge, h_ee, d, B = 0.0, C = 0.0, D = 0.0;
+ double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee;
+ double trCC_gg, trCC_ge, trCC_ee, trD_gg = 0.0, trD_ge = 0.0, trD_ee = 0.0;
+
+ gsl_matrix *QiMQi_g1 = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *QiMQi_e1 = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *QiMQi_g2 = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *QiMQi_e2 = gsl_matrix_alloc(dc_size, dc_size);
+
+ gsl_matrix *QiMQisQisi_g1 = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *QiMQisQisi_e1 = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *QiMQisQisi_g2 = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *QiMQisQisi_e2 = gsl_matrix_alloc(d_size, d_size);
+
+ gsl_matrix *QiMQiMQi_gg = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *QiMQiMQi_ge = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *QiMQiMQi_ee = gsl_matrix_alloc(dc_size, dc_size);
+
+ gsl_matrix *QiMMQi_gg = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *QiMMQi_ge = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *QiMMQi_ee = gsl_matrix_alloc(dc_size, dc_size);
+
+ gsl_matrix *Qi_si = gsl_matrix_alloc(d_size, d_size);
+
+ gsl_matrix *M_dd = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *M_dcdc = gsl_matrix_alloc(dc_size, dc_size);
+
+ // Invert Qi_sub to Qi_si.
+ gsl_matrix *Qi_sub = gsl_matrix_alloc(d_size, d_size);
+
+ gsl_matrix_const_view Qi_s = gsl_matrix_const_submatrix(
+ Qi, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+
+ int sig;
+ gsl_permutation *pmt = gsl_permutation_alloc(d_size);
+
+ gsl_matrix_memcpy(Qi_sub, &Qi_s.matrix);
+ LUDecomp(Qi_sub, pmt, &sig);
+ LUInvert(Qi_sub, pmt, Qi_si);
+
+ gsl_permutation_free(pmt);
+ gsl_matrix_free(Qi_sub);
+
+ // Calculate correction factors.
+ for (size_t v1 = 0; v1 < v_size; v1++) {
+
+ // Calculate Qi(xHiDHix)Qi, and subpart of it.
+ gsl_matrix_const_view QiM_g1 = gsl_matrix_const_submatrix(
+ QixHiDHix_all_g, 0, v1 * dc_size, dc_size, dc_size);
+ gsl_matrix_const_view QiM_e1 = gsl_matrix_const_submatrix(
+ QixHiDHix_all_e, 0, v1 * dc_size, dc_size, dc_size);
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, Qi, 0.0,
+ QiMQi_g1);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, Qi, 0.0,
+ QiMQi_e1);
+
+ gsl_matrix_view QiMQi_g1_s = gsl_matrix_submatrix(
+ QiMQi_g1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+ gsl_matrix_view QiMQi_e1_s = gsl_matrix_submatrix(
+ QiMQi_e1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size);
+
+ // Calculate trCg1 and trCe1.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix, Qi_si,
+ 0.0, QiMQisQisi_g1);
+ trCg1 = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ trCg1 -= gsl_matrix_get(QiMQisQisi_g1, k, k);
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix, Qi_si,
+ 0.0, QiMQisQisi_e1);
+ trCe1 = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ trCe1 -= gsl_matrix_get(QiMQisQisi_e1, k, k);
+ }
+
+ for (size_t v2 = 0; v2 < v_size; v2++) {
+ if (v2 < v1) {
+ continue;
+ }
+
+ // Calculate Qi(xHiDHix)Qi, and subpart of it.
+ gsl_matrix_const_view QiM_g2 = gsl_matrix_const_submatrix(
+ QixHiDHix_all_g, 0, v2 * dc_size, dc_size, dc_size);
+ gsl_matrix_const_view QiM_e2 = gsl_matrix_const_submatrix(
+ QixHiDHix_all_e, 0, v2 * dc_size, dc_size, dc_size);
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix, Qi, 0.0,
+ QiMQi_g2);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix, Qi, 0.0,
+ QiMQi_e2);
+
+ gsl_matrix_view QiMQi_g2_s =
+ gsl_matrix_submatrix(QiMQi_g2, (c_size - 1) * d_size,
+ (c_size - 1) * d_size, d_size, d_size);
+ gsl_matrix_view QiMQi_e2_s =
+ gsl_matrix_submatrix(QiMQi_e2, (c_size - 1) * d_size,
+ (c_size - 1) * d_size, d_size, d_size);
+
+ // Calculate trCg2 and trCe2.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g2_s.matrix, Qi_si,
+ 0.0, QiMQisQisi_g2);
+ trCg2 = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ trCg2 -= gsl_matrix_get(QiMQisQisi_g2, k, k);
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e2_s.matrix, Qi_si,
+ 0.0, QiMQisQisi_e2);
+ trCe2 = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ trCe2 -= gsl_matrix_get(QiMQisQisi_e2, k, k);
+ }
+
+ // Calculate trCC_gg, trCC_ge, trCC_ee.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
+ QiMQisQisi_g2, 0.0, M_dd);
+ trCC_gg = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ trCC_gg += gsl_matrix_get(M_dd, k, k);
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1,
+ QiMQisQisi_e2, 0.0, M_dd);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
+ QiMQisQisi_g2, 1.0, M_dd);
+ trCC_ge = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ trCC_ge += gsl_matrix_get(M_dd, k, k);
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1,
+ QiMQisQisi_e2, 0.0, M_dd);
+ trCC_ee = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ trCC_ee += gsl_matrix_get(M_dd, k, k);
+ }
+
+ // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_g2,
+ 0.0, QiMQiMQi_gg);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_e2,
+ 0.0, QiMQiMQi_ge);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_g2,
+ 1.0, QiMQiMQi_ge);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_e2,
+ 0.0, QiMQiMQi_ee);
+
+ gsl_matrix_view QiMQiMQi_gg_s =
+ gsl_matrix_submatrix(QiMQiMQi_gg, (c_size - 1) * d_size,
+ (c_size - 1) * d_size, d_size, d_size);
+ gsl_matrix_view QiMQiMQi_ge_s =
+ gsl_matrix_submatrix(QiMQiMQi_ge, (c_size - 1) * d_size,
+ (c_size - 1) * d_size, d_size, d_size);
+ gsl_matrix_view QiMQiMQi_ee_s =
+ gsl_matrix_submatrix(QiMQiMQi_ee, (c_size - 1) * d_size,
+ (c_size - 1) * d_size, d_size, d_size);
+
+ // and part of trB_gg, trB_ge, trB_ee.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_gg_s.matrix,
+ Qi_si, 0.0, M_dd);
+ trB_gg = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ d = gsl_matrix_get(M_dd, k, k);
+ trB_gg -= d;
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ge_s.matrix,
+ Qi_si, 0.0, M_dd);
+ trB_ge = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ d = gsl_matrix_get(M_dd, k, k);
+ trB_ge -= d;
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ee_s.matrix,
+ Qi_si, 0.0, M_dd);
+ trB_ee = 0.0;
+ for (size_t k = 0; k < d_size; k++) {
+ d = gsl_matrix_get(M_dd, k, k);
+ trB_ee -= d;
+ }
+
+ // Calculate Qi(xHiDHiDHix)Qi, and subpart of it.
+ gsl_matrix_const_view MM_gg = gsl_matrix_const_submatrix(
+ xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+ gsl_matrix_const_view MM_ge = gsl_matrix_const_submatrix(
+ xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+ gsl_matrix_const_view MM_ee = gsl_matrix_const_submatrix(
+ xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size);
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_gg.matrix, 0.0,
+ M_dcdc);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+ QiMMQi_gg);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ge.matrix, 0.0,
+ M_dcdc);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+ QiMMQi_ge);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ee.matrix, 0.0,
+ M_dcdc);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0,
+ QiMMQi_ee);
+
+ gsl_matrix_view QiMMQi_gg_s =
+ gsl_matrix_submatrix(QiMMQi_gg, (c_size - 1) * d_size,
+ (c_size - 1) * d_size, d_size, d_size);
+ gsl_matrix_view QiMMQi_ge_s =
+ gsl_matrix_submatrix(QiMMQi_ge, (c_size - 1) * d_size,
+ (c_size - 1) * d_size, d_size, d_size);
+ gsl_matrix_view QiMMQi_ee_s =
+ gsl_matrix_submatrix(QiMMQi_ee, (c_size - 1) * d_size,
+ (c_size - 1) * d_size, d_size, d_size);
+
+ // Calculate the other part of trB_gg, trB_ge, trB_ee.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_gg_s.matrix,
+ Qi_si, 0.0, M_dd);
+ for (size_t k = 0; k < d_size; k++) {
+ trB_gg += gsl_matrix_get(M_dd, k, k);
+ }
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ge_s.matrix,
+ Qi_si, 0.0, M_dd);
+ for (size_t k = 0; k < d_size; k++) {
+ trB_ge += 2.0 * gsl_matrix_get(M_dd, k, k);
+ }
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ee_s.matrix,
+ Qi_si, 0.0, M_dd);
+ for (size_t k = 0; k < d_size; k++) {
+ trB_ee += gsl_matrix_get(M_dd, k, k);
+ }
+
+ // Calculate trD_gg, trD_ge, trD_ee.
+ trD_gg = 2.0 * trB_gg;
+ trD_ge = 2.0 * trB_ge;
+ trD_ee = 2.0 * trB_ee;
+
+ // calculate B, C and D
+ h_gg = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2);
+ h_ge = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2 + v_size);
+ h_ee = -1.0 * gsl_matrix_get(Hessian_inv, v1 + v_size, v2 + v_size);
+
+ B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee;
+ C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) +
+ h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) +
+ h_ee * (trCC_ee + 0.5 * trCe1 * trCe2);
+ D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) +
+ h_ee * (trCC_ee + 0.5 * trD_ee);
+
+ if (v1 != v2) {
+ B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee;
+ C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) +
+ h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) +
+ h_ee * (trCC_ee + 0.5 * trCe1 * trCe2);
+ D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) +
+ h_ee * (trCC_ee + 0.5 * trD_ee);
+ }
+ }
+ }
+
+ // Calculate a, b, c from B C D.
+ crt_a = 2.0 * D - C;
+ crt_b = 2.0 * B;
+ crt_c = C;
+
+ // Free matrix memory.
+ gsl_matrix_free(QiMQi_g1);
+ gsl_matrix_free(QiMQi_e1);
+ gsl_matrix_free(QiMQi_g2);
+ gsl_matrix_free(QiMQi_e2);
+
+ gsl_matrix_free(QiMQisQisi_g1);
+ gsl_matrix_free(QiMQisQisi_e1);
+ gsl_matrix_free(QiMQisQisi_g2);
+ gsl_matrix_free(QiMQisQisi_e2);
+
+ gsl_matrix_free(QiMQiMQi_gg);
+ gsl_matrix_free(QiMQiMQi_ge);
+ gsl_matrix_free(QiMQiMQi_ee);
+
+ gsl_matrix_free(QiMMQi_gg);
+ gsl_matrix_free(QiMMQi_ge);
+ gsl_matrix_free(QiMMQi_ee);
+
+ gsl_matrix_free(Qi_si);
+
+ gsl_matrix_free(M_dd);
+ gsl_matrix_free(M_dcdc);
+
+ return;
}
// Calculate first-order and second-order derivatives.
-void CalcDev (const char func_name, const gsl_vector *eval,
- const gsl_matrix *Qi, const gsl_matrix *Hi,
- const gsl_matrix *xHi, const gsl_matrix *Hiy,
- const gsl_vector *QixHiy, gsl_vector *gradient,
- gsl_matrix *Hessian_inv, double &crt_a, double &crt_b,
- double &crt_c) {
- if (func_name!='R' && func_name!='L' && func_name!='r' &&
- func_name!='l') {
- cout<<"func_name only takes 'R' or 'L': 'R' for " <<
- "log-restricted likelihood, 'L' for log-likelihood."<<endl;
- return;
- }
-
- size_t dc_size=Qi->size1, d_size=Hi->size1;
- size_t c_size=dc_size/d_size;
- size_t v_size=d_size*(d_size+1)/2;
- size_t v1, v2;
- double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge;
-
- gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
- gsl_matrix *xHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
- gsl_matrix *xHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
- gsl_matrix *xHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size);
- gsl_matrix *xHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
- gsl_matrix *xHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
- gsl_matrix *xHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-
- gsl_matrix *QixHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
- gsl_matrix *QixHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
- gsl_matrix *QixHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size);
- gsl_matrix *QixHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
- gsl_matrix *QixHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
- gsl_matrix *QixHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-
- gsl_matrix *xHiDHiDHiy_all_gg =
- gsl_matrix_alloc (dc_size, v_size*v_size);
- gsl_matrix *xHiDHiDHiy_all_ee =
- gsl_matrix_alloc (dc_size, v_size*v_size);
- gsl_matrix *xHiDHiDHiy_all_ge =
- gsl_matrix_alloc (dc_size, v_size*v_size);
- gsl_matrix *xHiDHiDHix_all_gg =
- gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
- gsl_matrix *xHiDHiDHix_all_ee =
- gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
- gsl_matrix *xHiDHiDHix_all_ge =
- gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
-
- // Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all.
- Calc_xHiDHiy_all (eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
- Calc_xHiDHix_all (eval, xHi, xHiDHix_all_g, xHiDHix_all_e);
- Calc_xHiDHixQixHiy_all (xHiDHix_all_g, xHiDHix_all_e, QixHiy,
- xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e);
-
- Calc_xHiDHiDHiy_all (v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg,
- xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge);
- Calc_xHiDHiDHix_all (v_size, eval, Hi, xHi, xHiDHiDHix_all_gg,
- xHiDHiDHix_all_ee, xHiDHiDHix_all_ge);
-
- // Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all.
- Calc_QiVec_all (Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g,
- QixHiDHiy_all_e);
- Calc_QiVec_all (Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
- QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e);
- Calc_QiMat_all (Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g,
- QixHiDHix_all_e);
-
- double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee;
- double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge;
- double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge;
-
- // Calculate gradient and Hessian for Vg.
- for (size_t i1=0; i1<d_size; i1++) {
- for (size_t j1=0; j1<d_size; j1++) {
- if (j1<i1) {continue;}
- v1=GetIndex (i1, j1, d_size);
-
- Calc_yPDPy (eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
- xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1,
- yPDPy_g, yPDPy_e);
-
- if (func_name=='R' || func_name=='r') {
- Calc_tracePD (eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e,
- i1, j1, tPD_g, tPD_e);
-
- dev1_g=-0.5*tPD_g+0.5*yPDPy_g;
- dev1_e=-0.5*tPD_e+0.5*yPDPy_e;
- } else {
- Calc_traceHiD (eval, Hi, i1, j1, tHiD_g, tHiD_e);
-
- dev1_g=-0.5*tHiD_g+0.5*yPDPy_g;
- dev1_e=-0.5*tHiD_e+0.5*yPDPy_e;
- }
-
- gsl_vector_set (gradient, v1, dev1_g);
- gsl_vector_set (gradient, v1+v_size, dev1_e);
-
- for (size_t i2=0; i2<d_size; i2++) {
- for (size_t j2=0; j2<d_size; j2++) {
- if (j2<i2) {continue;}
- v2=GetIndex (i2, j2, d_size);
-
- if (v2<v1) {continue;}
-
- Calc_yPDPDPy (eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g,
- xHiDHiy_all_e, QixHiDHiy_all_g, QixHiDHiy_all_e,
- xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
- QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e,
- xHiDHiDHiy_all_gg, xHiDHiDHiy_all_ee,
- xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg,
- xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1,
- i2, j2, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
-
- // AI for REML.
- if (func_name=='R' || func_name=='r') {
- Calc_tracePDPD (eval, Qi, Hi, xHi, QixHiDHix_all_g,
- QixHiDHix_all_e, xHiDHiDHix_all_gg,
- xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1,
- i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
-
- dev2_gg=0.5*tPDPD_gg-yPDPDPy_gg;
- dev2_ee=0.5*tPDPD_ee-yPDPDPy_ee;
- dev2_ge=0.5*tPDPD_ge-yPDPDPy_ge;
- } else {
- Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tHiDHiD_gg,
- tHiDHiD_ee, tHiDHiD_ge);
-
- dev2_gg=0.5*tHiDHiD_gg-yPDPDPy_gg;
- dev2_ee=0.5*tHiDHiD_ee-yPDPDPy_ee;
- dev2_ge=0.5*tHiDHiD_ge-yPDPDPy_ge;
- }
-
- // Set up Hessian.
- gsl_matrix_set (Hessian, v1, v2, dev2_gg);
- gsl_matrix_set (Hessian, v1+v_size, v2+v_size, dev2_ee);
- gsl_matrix_set (Hessian, v1, v2+v_size, dev2_ge);
- gsl_matrix_set (Hessian, v2+v_size, v1, dev2_ge);
-
- if (v1!=v2) {
- gsl_matrix_set (Hessian, v2, v1, dev2_gg);
- gsl_matrix_set (Hessian, v2+v_size, v1+v_size, dev2_ee);
- gsl_matrix_set (Hessian, v2, v1+v_size, dev2_ge);
- gsl_matrix_set (Hessian, v1+v_size, v2, dev2_ge);
- }
- }
- }
- }
- }
-
- // Invert Hessian.
- int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (v_size*2);
-
- LUDecomp (Hessian, pmt, &sig);
- LUInvert (Hessian, pmt, Hessian_inv);
-
- gsl_permutation_free(pmt);
- gsl_matrix_free(Hessian);
-
- // Calculate Edgeworth correction factors after inverting
- // Hessian.
- if (c_size>1) {
- CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e,
- xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge,
- d_size, crt_a, crt_b, crt_c);
- } else {
- crt_a=0.0; crt_b=0.0; crt_c=0.0;
- }
-
- gsl_matrix_free(xHiDHiy_all_g);
- gsl_matrix_free(xHiDHiy_all_e);
- gsl_matrix_free(xHiDHix_all_g);
- gsl_matrix_free(xHiDHix_all_e);
- gsl_matrix_free(xHiDHixQixHiy_all_g);
- gsl_matrix_free(xHiDHixQixHiy_all_e);
-
- gsl_matrix_free(QixHiDHiy_all_g);
- gsl_matrix_free(QixHiDHiy_all_e);
- gsl_matrix_free(QixHiDHix_all_g);
- gsl_matrix_free(QixHiDHix_all_e);
- gsl_matrix_free(QixHiDHixQixHiy_all_g);
- gsl_matrix_free(QixHiDHixQixHiy_all_e);
-
- gsl_matrix_free(xHiDHiDHiy_all_gg);
- gsl_matrix_free(xHiDHiDHiy_all_ee);
- gsl_matrix_free(xHiDHiDHiy_all_ge);
- gsl_matrix_free(xHiDHiDHix_all_gg);
- gsl_matrix_free(xHiDHiDHix_all_ee);
- gsl_matrix_free(xHiDHiDHix_all_ge);
-
- return;
+void CalcDev(const char func_name, const gsl_vector *eval, const gsl_matrix *Qi,
+ const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *Hiy,
+ const gsl_vector *QixHiy, gsl_vector *gradient,
+ gsl_matrix *Hessian_inv, double &crt_a, double &crt_b,
+ double &crt_c) {
+ if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+ func_name != 'l') {
+ cout << "func_name only takes 'R' or 'L': 'R' for "
+ << "log-restricted likelihood, 'L' for log-likelihood." << endl;
+ return;
+ }
+
+ size_t dc_size = Qi->size1, d_size = Hi->size1;
+ size_t c_size = dc_size / d_size;
+ size_t v_size = d_size * (d_size + 1) / 2;
+ size_t v1, v2;
+ double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge;
+
+ gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+ gsl_matrix *xHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+ gsl_matrix *xHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+ gsl_matrix *xHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size);
+ gsl_matrix *xHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size);
+ gsl_matrix *xHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+ gsl_matrix *xHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+
+ gsl_matrix *QixHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+ gsl_matrix *QixHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+ gsl_matrix *QixHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size);
+ gsl_matrix *QixHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size);
+ gsl_matrix *QixHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size);
+ gsl_matrix *QixHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size);
+
+ gsl_matrix *xHiDHiDHiy_all_gg = gsl_matrix_alloc(dc_size, v_size * v_size);
+ gsl_matrix *xHiDHiDHiy_all_ee = gsl_matrix_alloc(dc_size, v_size * v_size);
+ gsl_matrix *xHiDHiDHiy_all_ge = gsl_matrix_alloc(dc_size, v_size * v_size);
+ gsl_matrix *xHiDHiDHix_all_gg =
+ gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+ gsl_matrix *xHiDHiDHix_all_ee =
+ gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+ gsl_matrix *xHiDHiDHix_all_ge =
+ gsl_matrix_alloc(dc_size, v_size * v_size * dc_size);
+
+ // Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all.
+ Calc_xHiDHiy_all(eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
+ Calc_xHiDHix_all(eval, xHi, xHiDHix_all_g, xHiDHix_all_e);
+ Calc_xHiDHixQixHiy_all(xHiDHix_all_g, xHiDHix_all_e, QixHiy,
+ xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e);
+
+ Calc_xHiDHiDHiy_all(v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg,
+ xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge);
+ Calc_xHiDHiDHix_all(v_size, eval, Hi, xHi, xHiDHiDHix_all_gg,
+ xHiDHiDHix_all_ee, xHiDHiDHix_all_ge);
+
+ // Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all.
+ Calc_QiVec_all(Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g,
+ QixHiDHiy_all_e);
+ Calc_QiVec_all(Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e,
+ QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e);
+ Calc_QiMat_all(Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g,
+ QixHiDHix_all_e);
+
+ double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee;
+ double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge;
+ double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge;
+
+ // Calculate gradient and Hessian for Vg.
+ for (size_t i1 = 0; i1 < d_size; i1++) {
+ for (size_t j1 = 0; j1 < d_size; j1++) {
+ if (j1 < i1) {
+ continue;
+ }
+ v1 = GetIndex(i1, j1, d_size);
+
+ Calc_yPDPy(eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
+ xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1, yPDPy_g,
+ yPDPy_e);
+
+ if (func_name == 'R' || func_name == 'r') {
+ Calc_tracePD(eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e, i1, j1, tPD_g,
+ tPD_e);
+
+ dev1_g = -0.5 * tPD_g + 0.5 * yPDPy_g;
+ dev1_e = -0.5 * tPD_e + 0.5 * yPDPy_e;
+ } else {
+ Calc_traceHiD(eval, Hi, i1, j1, tHiD_g, tHiD_e);
+
+ dev1_g = -0.5 * tHiD_g + 0.5 * yPDPy_g;
+ dev1_e = -0.5 * tHiD_e + 0.5 * yPDPy_e;
+ }
+
+ gsl_vector_set(gradient, v1, dev1_g);
+ gsl_vector_set(gradient, v1 + v_size, dev1_e);
+
+ for (size_t i2 = 0; i2 < d_size; i2++) {
+ for (size_t j2 = 0; j2 < d_size; j2++) {
+ if (j2 < i2) {
+ continue;
+ }
+ v2 = GetIndex(i2, j2, d_size);
+
+ if (v2 < v1) {
+ continue;
+ }
+
+ Calc_yPDPDPy(eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e,
+ QixHiDHiy_all_g, QixHiDHiy_all_e, xHiDHixQixHiy_all_g,
+ xHiDHixQixHiy_all_e, QixHiDHixQixHiy_all_g,
+ QixHiDHixQixHiy_all_e, xHiDHiDHiy_all_gg,
+ xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg,
+ xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, i2, j2,
+ yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
+
+ // AI for REML.
+ if (func_name == 'R' || func_name == 'r') {
+ Calc_tracePDPD(eval, Qi, Hi, xHi, QixHiDHix_all_g, QixHiDHix_all_e,
+ xHiDHiDHix_all_gg, xHiDHiDHix_all_ee,
+ xHiDHiDHix_all_ge, i1, j1, i2, j2, tPDPD_gg,
+ tPDPD_ee, tPDPD_ge);
+
+ dev2_gg = 0.5 * tPDPD_gg - yPDPDPy_gg;
+ dev2_ee = 0.5 * tPDPD_ee - yPDPDPy_ee;
+ dev2_ge = 0.5 * tPDPD_ge - yPDPDPy_ge;
+ } else {
+ Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tHiDHiD_gg, tHiDHiD_ee,
+ tHiDHiD_ge);
+
+ dev2_gg = 0.5 * tHiDHiD_gg - yPDPDPy_gg;
+ dev2_ee = 0.5 * tHiDHiD_ee - yPDPDPy_ee;
+ dev2_ge = 0.5 * tHiDHiD_ge - yPDPDPy_ge;
+ }
+
+ // Set up Hessian.
+ gsl_matrix_set(Hessian, v1, v2, dev2_gg);
+ gsl_matrix_set(Hessian, v1 + v_size, v2 + v_size, dev2_ee);
+ gsl_matrix_set(Hessian, v1, v2 + v_size, dev2_ge);
+ gsl_matrix_set(Hessian, v2 + v_size, v1, dev2_ge);
+
+ if (v1 != v2) {
+ gsl_matrix_set(Hessian, v2, v1, dev2_gg);
+ gsl_matrix_set(Hessian, v2 + v_size, v1 + v_size, dev2_ee);
+ gsl_matrix_set(Hessian, v2, v1 + v_size, dev2_ge);
+ gsl_matrix_set(Hessian, v1 + v_size, v2, dev2_ge);
+ }
+ }
+ }
+ }
+ }
+
+ // Invert Hessian.
+ int sig;
+ gsl_permutation *pmt = gsl_permutation_alloc(v_size * 2);
+
+ LUDecomp(Hessian, pmt, &sig);
+ LUInvert(Hessian, pmt, Hessian_inv);
+
+ gsl_permutation_free(pmt);
+ gsl_matrix_free(Hessian);
+
+ // Calculate Edgeworth correction factors after inverting
+ // Hessian.
+ if (c_size > 1) {
+ CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e,
+ xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, d_size,
+ crt_a, crt_b, crt_c);
+ } else {
+ crt_a = 0.0;
+ crt_b = 0.0;
+ crt_c = 0.0;
+ }
+
+ gsl_matrix_free(xHiDHiy_all_g);
+ gsl_matrix_free(xHiDHiy_all_e);
+ gsl_matrix_free(xHiDHix_all_g);
+ gsl_matrix_free(xHiDHix_all_e);
+ gsl_matrix_free(xHiDHixQixHiy_all_g);
+ gsl_matrix_free(xHiDHixQixHiy_all_e);
+
+ gsl_matrix_free(QixHiDHiy_all_g);
+ gsl_matrix_free(QixHiDHiy_all_e);
+ gsl_matrix_free(QixHiDHix_all_g);
+ gsl_matrix_free(QixHiDHix_all_e);
+ gsl_matrix_free(QixHiDHixQixHiy_all_g);
+ gsl_matrix_free(QixHiDHixQixHiy_all_e);
+
+ gsl_matrix_free(xHiDHiDHiy_all_gg);
+ gsl_matrix_free(xHiDHiDHiy_all_ee);
+ gsl_matrix_free(xHiDHiDHiy_all_ge);
+ gsl_matrix_free(xHiDHiDHix_all_gg);
+ gsl_matrix_free(xHiDHiDHix_all_ee);
+ gsl_matrix_free(xHiDHiDHix_all_ge);
+
+ return;
}
// Update Vg, Ve.
-void UpdateVgVe (const gsl_matrix *Hessian_inv, const gsl_vector *gradient,
- const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) {
- size_t v_size=gradient->size/2, d_size=V_g->size1;
- size_t v;
+void UpdateVgVe(const gsl_matrix *Hessian_inv, const gsl_vector *gradient,
+ const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) {
+ size_t v_size = gradient->size / 2, d_size = V_g->size1;
+ size_t v;
- gsl_vector *vec_v=gsl_vector_alloc (v_size*2);
+ gsl_vector *vec_v = gsl_vector_alloc(v_size * 2);
- double d;
+ double d;
- // Vectorize Vg and Ve.
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<d_size; j++) {
- if (j<i) {continue;}
- v=GetIndex(i, j, d_size);
+ // Vectorize Vg and Ve.
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j < d_size; j++) {
+ if (j < i) {
+ continue;
+ }
+ v = GetIndex(i, j, d_size);
- d=gsl_matrix_get (V_g, i, j);
- gsl_vector_set (vec_v, v, d);
+ d = gsl_matrix_get(V_g, i, j);
+ gsl_vector_set(vec_v, v, d);
- d=gsl_matrix_get (V_e, i, j);
- gsl_vector_set (vec_v, v+v_size, d);
- }
- }
+ d = gsl_matrix_get(V_e, i, j);
+ gsl_vector_set(vec_v, v + v_size, d);
+ }
+ }
- gsl_blas_dgemv (CblasNoTrans, -1.0*step_scale, Hessian_inv,
- gradient, 1.0, vec_v);
+ gsl_blas_dgemv(CblasNoTrans, -1.0 * step_scale, Hessian_inv, gradient, 1.0,
+ vec_v);
- // Save Vg and Ve.
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<d_size; j++) {
- if (j<i) {continue;}
- v=GetIndex(i, j, d_size);
+ // Save Vg and Ve.
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j < d_size; j++) {
+ if (j < i) {
+ continue;
+ }
+ v = GetIndex(i, j, d_size);
- d=gsl_vector_get (vec_v, v);
- gsl_matrix_set (V_g, i, j, d);
- gsl_matrix_set (V_g, j, i, d);
+ d = gsl_vector_get(vec_v, v);
+ gsl_matrix_set(V_g, i, j, d);
+ gsl_matrix_set(V_g, j, i, d);
- d=gsl_vector_get (vec_v, v+v_size);
- gsl_matrix_set (V_e, i, j, d);
- gsl_matrix_set (V_e, j, i, d);
- }
- }
+ d = gsl_vector_get(vec_v, v + v_size);
+ gsl_matrix_set(V_e, i, j, d);
+ gsl_matrix_set(V_e, j, i, d);
+ }
+ }
- gsl_vector_free(vec_v);
+ gsl_vector_free(vec_v);
- return;
+ return;
}
-double MphNR (const char func_name, const size_t max_iter,
- const double max_prec, const gsl_vector *eval,
- const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *Hi_all,
- gsl_matrix *xHi_all, gsl_matrix *Hiy_all, gsl_matrix *V_g,
- gsl_matrix *V_e, gsl_matrix *Hessian_inv, double &crt_a,
- double &crt_b, double &crt_c) {
- if (func_name!='R' && func_name!='L' && func_name!='r' &&
- func_name!='l') {
- cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<<
- "likelihood, 'L' for log-likelihood."<<endl;
- return 0.0;
- }
- size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
- size_t dc_size=d_size*c_size;
- size_t v_size=d_size*(d_size+1)/2;
-
- double logdet_H, logdet_Q, yPy, logl_const;
- double logl_old=0.0, logl_new=0.0, step_scale;
- int sig;
- size_t step_iter, flag_pd;
-
- gsl_matrix *Vg_save=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *Ve_save=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_temp=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *U_temp=gsl_matrix_alloc (d_size, d_size);
- gsl_vector *D_temp=gsl_vector_alloc (d_size);
- gsl_vector *xHiy=gsl_vector_alloc (dc_size);
- gsl_vector *QixHiy=gsl_vector_alloc (dc_size);
- gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size);
-
- gsl_vector *gradient=gsl_vector_alloc (v_size*2);
-
- // Calculate |XXt| and (XXt)^{-1}.
- gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
- for (size_t i=0; i<c_size; ++i) {
- for (size_t j=0; j<i; ++j) {
- gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i));
- }
- }
-
- gsl_permutation * pmt=gsl_permutation_alloc (c_size);
- LUDecomp (XXt, pmt, &sig);
- gsl_permutation_free (pmt);
-
- // Calculate the constant for logl.
- if (func_name=='R' || func_name=='r') {
- logl_const=-0.5*(double)(n_size-c_size) *
- (double)d_size*log(2.0*M_PI) +
- 0.5*(double)d_size*LULndet (XXt);
- } else {
- logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI);
- }
-
- // Optimization iterations.
- for (size_t t=0; t<max_iter; t++) {
- gsl_matrix_memcpy (Vg_save, V_g);
- gsl_matrix_memcpy (Ve_save, V_e);
-
- step_scale=1.0; step_iter=0;
- do {
- gsl_matrix_memcpy (V_g, Vg_save);
- gsl_matrix_memcpy (V_e, Ve_save);
-
- // Update Vg, Ve, and invert Hessian.
- if (t!=0) {
- UpdateVgVe (Hessian_inv, gradient, step_scale, V_g, V_e);
- }
-
- // Check if both Vg and Ve are positive definite.
- flag_pd=1;
- gsl_matrix_memcpy (V_temp, V_e);
- EigenDecomp(V_temp, U_temp, D_temp, 0);
- for (size_t i=0; i<d_size; i++) {
- if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;}
- }
- gsl_matrix_memcpy (V_temp, V_g);
- EigenDecomp(V_temp, U_temp, D_temp, 0);
- for (size_t i=0; i<d_size; i++) {
- if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;}
- }
-
- // If flag_pd==1, continue to calculate quantities
- // and logl.
- if (flag_pd==1) {
- CalcHiQi(eval,X,V_g,V_e,Hi_all,Qi,logdet_H,logdet_Q);
- Calc_Hiy_all (Y, Hi_all, Hiy_all);
- Calc_xHi_all (X, Hi_all, xHi_all);
-
- // Calculate QixHiy and yPy.
- Calc_xHiy (Y, xHi_all, xHiy);
- gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy);
-
- gsl_blas_ddot (QixHiy, xHiy, &yPy);
- yPy=Calc_yHiy (Y, Hiy_all)-yPy;
-
- // Calculate log likelihood/restricted likelihood value.
- if (func_name=='R' || func_name=='r') {
- logl_new=logl_const-0.5*logdet_H-0.5*logdet_Q-0.5*yPy;
- } else {
- logl_new=logl_const-0.5*logdet_H-0.5*yPy;
- }
- }
-
- step_scale/=2.0;
- step_iter++;
-
- } while ( (flag_pd==0 || logl_new<logl_old ||
- logl_new-logl_old>10 ) && step_iter<10 && t!=0);
-
- // Terminate if change is small.
- if (t!=0) {
- if (logl_new<logl_old || flag_pd==0) {
- gsl_matrix_memcpy (V_g, Vg_save);
- gsl_matrix_memcpy (V_e, Ve_save);
- break;
- }
-
- if (logl_new-logl_old<max_prec) {
- break;
- }
- }
-
- logl_old=logl_new;
-
- CalcDev (func_name, eval, Qi, Hi_all, xHi_all, Hiy_all,
- QixHiy, gradient, Hessian_inv, crt_a, crt_b, crt_c);
- }
-
- // Mutiply Hessian_inv with -1.0.
- // Now Hessian_inv is the variance matrix.
- gsl_matrix_scale (Hessian_inv, -1.0);
-
- gsl_matrix_free(Vg_save);
- gsl_matrix_free(Ve_save);
- gsl_matrix_free(V_temp);
- gsl_matrix_free(U_temp);
- gsl_vector_free(D_temp);
- gsl_vector_free(xHiy);
- gsl_vector_free(QixHiy);
-
- gsl_matrix_free(Qi);
- gsl_matrix_free(XXt);
-
- gsl_vector_free(gradient);
-
- return logl_new;
+double MphNR(const char func_name, const size_t max_iter, const double max_prec,
+ const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y,
+ gsl_matrix *Hi_all, gsl_matrix *xHi_all, gsl_matrix *Hiy_all,
+ gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *Hessian_inv,
+ double &crt_a, double &crt_b, double &crt_c) {
+ if (func_name != 'R' && func_name != 'L' && func_name != 'r' &&
+ func_name != 'l') {
+ cout << "func_name only takes 'R' or 'L': 'R' for log-restricted "
+ << "likelihood, 'L' for log-likelihood." << endl;
+ return 0.0;
+ }
+ size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+ size_t dc_size = d_size * c_size;
+ size_t v_size = d_size * (d_size + 1) / 2;
+
+ double logdet_H, logdet_Q, yPy, logl_const;
+ double logl_old = 0.0, logl_new = 0.0, step_scale;
+ int sig;
+ size_t step_iter, flag_pd;
+
+ gsl_matrix *Vg_save = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *Ve_save = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_temp = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *U_temp = gsl_matrix_alloc(d_size, d_size);
+ gsl_vector *D_temp = gsl_vector_alloc(d_size);
+ gsl_vector *xHiy = gsl_vector_alloc(dc_size);
+ gsl_vector *QixHiy = gsl_vector_alloc(dc_size);
+ gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size);
+ gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size);
+
+ gsl_vector *gradient = gsl_vector_alloc(v_size * 2);
+
+ // Calculate |XXt| and (XXt)^{-1}.
+ gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
+ for (size_t i = 0; i < c_size; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i));
+ }
+ }
+
+ gsl_permutation *pmt = gsl_permutation_alloc(c_size);
+ LUDecomp(XXt, pmt, &sig);
+ gsl_permutation_free(pmt);
+
+ // Calculate the constant for logl.
+ if (func_name == 'R' || func_name == 'r') {
+ logl_const =
+ -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) +
+ 0.5 * (double)d_size * LULndet(XXt);
+ } else {
+ logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI);
+ }
+
+ // Optimization iterations.
+ for (size_t t = 0; t < max_iter; t++) {
+ gsl_matrix_memcpy(Vg_save, V_g);
+ gsl_matrix_memcpy(Ve_save, V_e);
+
+ step_scale = 1.0;
+ step_iter = 0;
+ do {
+ gsl_matrix_memcpy(V_g, Vg_save);
+ gsl_matrix_memcpy(V_e, Ve_save);
+
+ // Update Vg, Ve, and invert Hessian.
+ if (t != 0) {
+ UpdateVgVe(Hessian_inv, gradient, step_scale, V_g, V_e);
+ }
+
+ // Check if both Vg and Ve are positive definite.
+ flag_pd = 1;
+ gsl_matrix_memcpy(V_temp, V_e);
+ EigenDecomp(V_temp, U_temp, D_temp, 0);
+ for (size_t i = 0; i < d_size; i++) {
+ if (gsl_vector_get(D_temp, i) <= 0) {
+ flag_pd = 0;
+ }
+ }
+ gsl_matrix_memcpy(V_temp, V_g);
+ EigenDecomp(V_temp, U_temp, D_temp, 0);
+ for (size_t i = 0; i < d_size; i++) {
+ if (gsl_vector_get(D_temp, i) <= 0) {
+ flag_pd = 0;
+ }
+ }
+
+ // If flag_pd==1, continue to calculate quantities
+ // and logl.
+ if (flag_pd == 1) {
+ CalcHiQi(eval, X, V_g, V_e, Hi_all, Qi, logdet_H, logdet_Q);
+ Calc_Hiy_all(Y, Hi_all, Hiy_all);
+ Calc_xHi_all(X, Hi_all, xHi_all);
+
+ // Calculate QixHiy and yPy.
+ Calc_xHiy(Y, xHi_all, xHiy);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy);
+
+ gsl_blas_ddot(QixHiy, xHiy, &yPy);
+ yPy = Calc_yHiy(Y, Hiy_all) - yPy;
+
+ // Calculate log likelihood/restricted likelihood value.
+ if (func_name == 'R' || func_name == 'r') {
+ logl_new = logl_const - 0.5 * logdet_H - 0.5 * logdet_Q - 0.5 * yPy;
+ } else {
+ logl_new = logl_const - 0.5 * logdet_H - 0.5 * yPy;
+ }
+ }
+
+ step_scale /= 2.0;
+ step_iter++;
+
+ } while (
+ (flag_pd == 0 || logl_new < logl_old || logl_new - logl_old > 10) &&
+ step_iter < 10 && t != 0);
+
+ // Terminate if change is small.
+ if (t != 0) {
+ if (logl_new < logl_old || flag_pd == 0) {
+ gsl_matrix_memcpy(V_g, Vg_save);
+ gsl_matrix_memcpy(V_e, Ve_save);
+ break;
+ }
+
+ if (logl_new - logl_old < max_prec) {
+ break;
+ }
+ }
+
+ logl_old = logl_new;
+
+ CalcDev(func_name, eval, Qi, Hi_all, xHi_all, Hiy_all, QixHiy, gradient,
+ Hessian_inv, crt_a, crt_b, crt_c);
+ }
+
+ // Mutiply Hessian_inv with -1.0.
+ // Now Hessian_inv is the variance matrix.
+ gsl_matrix_scale(Hessian_inv, -1.0);
+
+ gsl_matrix_free(Vg_save);
+ gsl_matrix_free(Ve_save);
+ gsl_matrix_free(V_temp);
+ gsl_matrix_free(U_temp);
+ gsl_vector_free(D_temp);
+ gsl_vector_free(xHiy);
+ gsl_vector_free(QixHiy);
+
+ gsl_matrix_free(Qi);
+ gsl_matrix_free(XXt);
+
+ gsl_vector_free(gradient);
+
+ return logl_new;
}
// Initialize Vg, Ve and B.
void MphInitial(const size_t em_iter, const double em_prec,
- const size_t nr_iter, const double nr_prec,
- const gsl_vector *eval, const gsl_matrix *X,
- const gsl_matrix *Y, const double l_min, const double l_max,
- const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e,
- gsl_matrix *B) {
-
- gsl_matrix_set_zero (V_g);
- gsl_matrix_set_zero (V_e);
- gsl_matrix_set_zero (B);
-
- size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
- double a, b, c;
- double lambda, logl, vg, ve;
-
- // Initialize the diagonal elements of Vg and Ve using univariate
- // LMM and REML estimates.
- gsl_matrix *Xt=gsl_matrix_alloc (n_size, c_size);
- gsl_vector *beta_temp=gsl_vector_alloc(c_size);
- gsl_vector *se_beta_temp=gsl_vector_alloc(c_size);
-
- gsl_matrix_transpose_memcpy (Xt, X);
-
- for (size_t i=0; i<d_size; i++) {
- gsl_vector_const_view Y_row=gsl_matrix_const_row (Y, i);
- CalcLambda ('R', eval, Xt, &Y_row.vector, l_min, l_max,
- n_region, lambda, logl);
- CalcLmmVgVeBeta (eval, Xt, &Y_row.vector, lambda, vg, ve,
- beta_temp, se_beta_temp);
-
- gsl_matrix_set(V_g, i, i, vg);
- gsl_matrix_set(V_e, i, i, ve);
- }
-
- gsl_matrix_free (Xt);
- gsl_vector_free (beta_temp);
- gsl_vector_free (se_beta_temp);
-
- // If number of phenotypes is above four, then obtain the off
- // diagonal elements with two trait models.
- if (d_size>4) {
-
- // First obtain good initial values.
- // Large matrices for EM.
- gsl_matrix *U_hat=gsl_matrix_alloc (2, n_size);
- gsl_matrix *E_hat=gsl_matrix_alloc (2, n_size);
- gsl_matrix *OmegaU=gsl_matrix_alloc (2, n_size);
- gsl_matrix *OmegaE=gsl_matrix_alloc (2, n_size);
- gsl_matrix *UltVehiY=gsl_matrix_alloc (2, n_size);
- gsl_matrix *UltVehiBX=gsl_matrix_alloc (2, n_size);
- gsl_matrix *UltVehiU=gsl_matrix_alloc (2, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (2, n_size);
-
- // Large matrices for NR. Each dxd block is H_k^{-1}.
- gsl_matrix *Hi_all=gsl_matrix_alloc (2, 2*n_size);
-
- // Each column is H_k^{-1}y_k.
- gsl_matrix *Hiy_all=gsl_matrix_alloc (2, n_size);
-
- // Each dcxdc block is x_k\otimes H_k^{-1}.
- gsl_matrix *xHi_all=gsl_matrix_alloc (2*c_size, 2*n_size);
- gsl_matrix *Hessian=gsl_matrix_alloc (6, 6);
-
- // 2 by n matrix of Y.
- gsl_matrix *Y_sub=gsl_matrix_alloc (2, n_size);
- gsl_matrix *Vg_sub=gsl_matrix_alloc (2, 2);
- gsl_matrix *Ve_sub=gsl_matrix_alloc (2, 2);
- gsl_matrix *B_sub=gsl_matrix_alloc (2, c_size);
-
- for (size_t i=0; i<d_size; i++) {
- gsl_vector_view Y_sub1=gsl_matrix_row (Y_sub, 0);
- gsl_vector_const_view Y_1=gsl_matrix_const_row (Y, i);
- gsl_vector_memcpy (&Y_sub1.vector, &Y_1.vector);
-
- for (size_t j=i+1; j<d_size; j++) {
- gsl_vector_view Y_sub2=gsl_matrix_row (Y_sub, 1);
- gsl_vector_const_view Y_2=gsl_matrix_const_row (Y, j);
- gsl_vector_memcpy (&Y_sub2.vector, &Y_2.vector);
-
- gsl_matrix_set_zero (Vg_sub);
- gsl_matrix_set_zero (Ve_sub);
- gsl_matrix_set (Vg_sub, 0, 0, gsl_matrix_get (V_g, i, i));
- gsl_matrix_set (Ve_sub, 0, 0, gsl_matrix_get (V_e, i, i));
- gsl_matrix_set (Vg_sub, 1, 1, gsl_matrix_get (V_g, j, j));
- gsl_matrix_set (Ve_sub, 1, 1, gsl_matrix_get (V_e, j, j));
-
- logl=MphEM ('R', em_iter, em_prec, eval, X, Y_sub, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
- UltVehiU, UltVehiE, Vg_sub, Ve_sub, B_sub);
- logl=MphNR ('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all,
- xHi_all, Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c);
-
- gsl_matrix_set(V_g, i, j, gsl_matrix_get (Vg_sub, 0, 1));
- gsl_matrix_set(V_g, j, i, gsl_matrix_get (Vg_sub, 0, 1));
-
- gsl_matrix_set(V_e, i, j, ve=gsl_matrix_get (Ve_sub, 0, 1));
- gsl_matrix_set(V_e, j, i, ve=gsl_matrix_get (Ve_sub, 0, 1));
- }
- }
-
- // Free matrices.
- gsl_matrix_free(U_hat);
- gsl_matrix_free(E_hat);
- gsl_matrix_free(OmegaU);
- gsl_matrix_free(OmegaE);
- gsl_matrix_free(UltVehiY);
- gsl_matrix_free(UltVehiBX);
- gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
- gsl_matrix_free(Hi_all);
- gsl_matrix_free(Hiy_all);
- gsl_matrix_free(xHi_all);
- gsl_matrix_free(Hessian);
-
- gsl_matrix_free(Y_sub);
- gsl_matrix_free(Vg_sub);
- gsl_matrix_free(Ve_sub);
- gsl_matrix_free(B_sub);
- }
-
- // Calculate B hat using GSL estimate.
- gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-
- gsl_vector *D_l=gsl_vector_alloc (d_size);
- gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *Qi=gsl_matrix_alloc (d_size*c_size, d_size*c_size);
- gsl_vector *XHiy=gsl_vector_alloc (d_size*c_size);
- gsl_vector *beta=gsl_vector_alloc (d_size*c_size);
-
- gsl_vector_set_zero (XHiy);
-
- double logdet_Ve, logdet_Q, dl, d, delta, dx, dy;
-
- // Eigen decomposition and calculate log|Ve|.
- logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
- // Calculate Qi and log|Q|.
- logdet_Q=CalcQi (eval, D_l, X, Qi);
-
- // Calculate UltVehiY.
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,UltVehi,Y,0.0,UltVehiY);
-
- //calculate XHiy
- for (size_t i=0; i<d_size; i++) {
- dl=gsl_vector_get(D_l, i);
-
- for (size_t j=0; j<c_size; j++) {
- d=0.0;
- for (size_t k=0; k<n_size; k++) {
- delta=gsl_vector_get(eval, k);
- dx=gsl_matrix_get(X, j, k);
- dy=gsl_matrix_get(UltVehiY, i, k);
- d+=dy*dx/(delta*dl+1.0);
- }
- gsl_vector_set(XHiy, j*d_size+i, d);
- }
- }
-
- gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta);
-
- // Multiply beta by UltVeh and save to B.
- for (size_t i=0; i<c_size; i++) {
- gsl_vector_view B_col=gsl_matrix_column (B, i);
- gsl_vector_view beta_sub=gsl_vector_subvector(beta,i*d_size,d_size);
- gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0,
- &B_col.vector);
- }
-
- // Free memory.
- gsl_matrix_free(UltVehiY);
-
- gsl_vector_free(D_l);
- gsl_matrix_free(UltVeh);
- gsl_matrix_free(UltVehi);
- gsl_matrix_free(Qi);
- gsl_vector_free(XHiy);
- gsl_vector_free(beta);
-
- return;
+ const size_t nr_iter, const double nr_prec,
+ const gsl_vector *eval, const gsl_matrix *X,
+ const gsl_matrix *Y, const double l_min, const double l_max,
+ const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e,
+ gsl_matrix *B) {
+
+ gsl_matrix_set_zero(V_g);
+ gsl_matrix_set_zero(V_e);
+ gsl_matrix_set_zero(B);
+
+ size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1;
+ double a, b, c;
+ double lambda, logl, vg, ve;
+
+ // Initialize the diagonal elements of Vg and Ve using univariate
+ // LMM and REML estimates.
+ gsl_matrix *Xt = gsl_matrix_alloc(n_size, c_size);
+ gsl_vector *beta_temp = gsl_vector_alloc(c_size);
+ gsl_vector *se_beta_temp = gsl_vector_alloc(c_size);
+
+ gsl_matrix_transpose_memcpy(Xt, X);
+
+ for (size_t i = 0; i < d_size; i++) {
+ gsl_vector_const_view Y_row = gsl_matrix_const_row(Y, i);
+ CalcLambda('R', eval, Xt, &Y_row.vector, l_min, l_max, n_region, lambda,
+ logl);
+ CalcLmmVgVeBeta(eval, Xt, &Y_row.vector, lambda, vg, ve, beta_temp,
+ se_beta_temp);
+
+ gsl_matrix_set(V_g, i, i, vg);
+ gsl_matrix_set(V_e, i, i, ve);
+ }
+
+ gsl_matrix_free(Xt);
+ gsl_vector_free(beta_temp);
+ gsl_vector_free(se_beta_temp);
+
+ // If number of phenotypes is above four, then obtain the off
+ // diagonal elements with two trait models.
+ if (d_size > 4) {
+
+ // First obtain good initial values.
+ // Large matrices for EM.
+ gsl_matrix *U_hat = gsl_matrix_alloc(2, n_size);
+ gsl_matrix *E_hat = gsl_matrix_alloc(2, n_size);
+ gsl_matrix *OmegaU = gsl_matrix_alloc(2, n_size);
+ gsl_matrix *OmegaE = gsl_matrix_alloc(2, n_size);
+ gsl_matrix *UltVehiY = gsl_matrix_alloc(2, n_size);
+ gsl_matrix *UltVehiBX = gsl_matrix_alloc(2, n_size);
+ gsl_matrix *UltVehiU = gsl_matrix_alloc(2, n_size);
+ gsl_matrix *UltVehiE = gsl_matrix_alloc(2, n_size);
+
+ // Large matrices for NR. Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all = gsl_matrix_alloc(2, 2 * n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all = gsl_matrix_alloc(2, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all = gsl_matrix_alloc(2 * c_size, 2 * n_size);
+ gsl_matrix *Hessian = gsl_matrix_alloc(6, 6);
+
+ // 2 by n matrix of Y.
+ gsl_matrix *Y_sub = gsl_matrix_alloc(2, n_size);
+ gsl_matrix *Vg_sub = gsl_matrix_alloc(2, 2);
+ gsl_matrix *Ve_sub = gsl_matrix_alloc(2, 2);
+ gsl_matrix *B_sub = gsl_matrix_alloc(2, c_size);
+
+ for (size_t i = 0; i < d_size; i++) {
+ gsl_vector_view Y_sub1 = gsl_matrix_row(Y_sub, 0);
+ gsl_vector_const_view Y_1 = gsl_matrix_const_row(Y, i);
+ gsl_vector_memcpy(&Y_sub1.vector, &Y_1.vector);
+
+ for (size_t j = i + 1; j < d_size; j++) {
+ gsl_vector_view Y_sub2 = gsl_matrix_row(Y_sub, 1);
+ gsl_vector_const_view Y_2 = gsl_matrix_const_row(Y, j);
+ gsl_vector_memcpy(&Y_sub2.vector, &Y_2.vector);
+
+ gsl_matrix_set_zero(Vg_sub);
+ gsl_matrix_set_zero(Ve_sub);
+ gsl_matrix_set(Vg_sub, 0, 0, gsl_matrix_get(V_g, i, i));
+ gsl_matrix_set(Ve_sub, 0, 0, gsl_matrix_get(V_e, i, i));
+ gsl_matrix_set(Vg_sub, 1, 1, gsl_matrix_get(V_g, j, j));
+ gsl_matrix_set(Ve_sub, 1, 1, gsl_matrix_get(V_e, j, j));
+
+ logl = MphEM('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+ Vg_sub, Ve_sub, B_sub);
+ logl = MphNR('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all, xHi_all,
+ Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c);
+
+ gsl_matrix_set(V_g, i, j, gsl_matrix_get(Vg_sub, 0, 1));
+ gsl_matrix_set(V_g, j, i, gsl_matrix_get(Vg_sub, 0, 1));
+
+ gsl_matrix_set(V_e, i, j, ve = gsl_matrix_get(Ve_sub, 0, 1));
+ gsl_matrix_set(V_e, j, i, ve = gsl_matrix_get(Ve_sub, 0, 1));
+ }
+ }
+
+ // Free matrices.
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_matrix_free(Y_sub);
+ gsl_matrix_free(Vg_sub);
+ gsl_matrix_free(Ve_sub);
+ gsl_matrix_free(B_sub);
+ }
+
+ // Calculate B hat using GSL estimate.
+ gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+
+ gsl_vector *D_l = gsl_vector_alloc(d_size);
+ gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *Qi = gsl_matrix_alloc(d_size * c_size, d_size * c_size);
+ gsl_vector *XHiy = gsl_vector_alloc(d_size * c_size);
+ gsl_vector *beta = gsl_vector_alloc(d_size * c_size);
+
+ gsl_vector_set_zero(XHiy);
+
+ double logdet_Ve, logdet_Q, dl, d, delta, dx, dy;
+
+ // Eigen decomposition and calculate log|Ve|.
+ logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+
+ // Calculate Qi and log|Q|.
+ logdet_Q = CalcQi(eval, D_l, X, Qi);
+
+ // Calculate UltVehiY.
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
+
+ // calculate XHiy
+ for (size_t i = 0; i < d_size; i++) {
+ dl = gsl_vector_get(D_l, i);
+
+ for (size_t j = 0; j < c_size; j++) {
+ d = 0.0;
+ for (size_t k = 0; k < n_size; k++) {
+ delta = gsl_vector_get(eval, k);
+ dx = gsl_matrix_get(X, j, k);
+ dy = gsl_matrix_get(UltVehiY, i, k);
+ d += dy * dx / (delta * dl + 1.0);
+ }
+ gsl_vector_set(XHiy, j * d_size + i, d);
+ }
+ }
+
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta);
+
+ // Multiply beta by UltVeh and save to B.
+ for (size_t i = 0; i < c_size; i++) {
+ gsl_vector_view B_col = gsl_matrix_column(B, i);
+ gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size);
+ gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0,
+ &B_col.vector);
+ }
+
+ // Free memory.
+ gsl_matrix_free(UltVehiY);
+
+ gsl_vector_free(D_l);
+ gsl_matrix_free(UltVeh);
+ gsl_matrix_free(UltVehi);
+ gsl_matrix_free(Qi);
+ gsl_vector_free(XHiy);
+ gsl_vector_free(beta);
+
+ return;
}
// p-value correction
// mode=1 Wald; mode=2 LRT; mode=3 SCORE;
-double PCRT (const size_t mode, const size_t d_size, const double p_value,
- const double crt_a, const double crt_b, const double crt_c) {
- double p_crt=0.0, chisq_crt=0.0, q=(double)d_size;
- double chisq=gsl_cdf_chisq_Qinv(p_value, (double)d_size );
-
- if (mode==1) {
- double a=crt_c/(2.0*q*(q+2.0));
- double b=1.0+(crt_a+crt_b)/(2.0*q);
- chisq_crt=(-1.0*b+sqrt(b*b+4.0*a*chisq))/(2.0*a);
- } else if (mode==2) {
- chisq_crt=chisq/(1.0+crt_a/(2.0*q) );
- } else {
- chisq_crt=chisq;
- }
-
- p_crt=gsl_cdf_chisq_Q (chisq_crt, (double)d_size );
-
- return p_crt;
+double PCRT(const size_t mode, const size_t d_size, const double p_value,
+ const double crt_a, const double crt_b, const double crt_c) {
+ double p_crt = 0.0, chisq_crt = 0.0, q = (double)d_size;
+ double chisq = gsl_cdf_chisq_Qinv(p_value, (double)d_size);
+
+ if (mode == 1) {
+ double a = crt_c / (2.0 * q * (q + 2.0));
+ double b = 1.0 + (crt_a + crt_b) / (2.0 * q);
+ chisq_crt = (-1.0 * b + sqrt(b * b + 4.0 * a * chisq)) / (2.0 * a);
+ } else if (mode == 2) {
+ chisq_crt = chisq / (1.0 + crt_a / (2.0 * q));
+ } else {
+ chisq_crt = chisq;
+ }
+
+ p_crt = gsl_cdf_chisq_Q(chisq_crt, (double)d_size);
+
+ return p_crt;
}
// WJA added.
-void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY) {
- string file_bgen=file_oxford+".bgen";
- ifstream infile (file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bgen file:"<<file_bgen<<endl;
- return;
- }
-
- clock_t time_start=clock();
- time_UtX=0; time_opt=0;
-
- string line;
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
-
- double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
- double crt_a, crt_b, crt_c;
- int n_miss, c_phen;
- double geno, x_mean;
- size_t c=0;
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-
- size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
- // Large matrices for EM.
- gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
- // Large matrices for NR. Each dxd block is H_k^{-1}.
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
- // Each column is H_k^{-1}y_k.
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
- // Each dcxdc block is x_k\otimes H_k^{-1}.
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
- gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
- gsl_vector *x=gsl_vector_alloc (n_size);
- gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
- gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
- gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
- gsl_vector *beta=gsl_vector_alloc (d_size);
- gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
- // Null estimates for initial values.
- gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
- gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
- gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
- gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub =
- gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
- gsl_matrix_transpose_memcpy (Y, UtY);
-
- gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
- gsl_vector_view X_row=gsl_matrix_row(X, c_size);
- gsl_vector_set_zero(&X_row.vector);
- gsl_vector_view B_col=gsl_matrix_column(B, c_size);
- gsl_vector_set_zero(&B_col.vector);
-
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
- Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
- Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub.matrix, se_B_null);
-
- c=0;
- Vg_remle_null.clear();
- Ve_remle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size,
- c+v_size) );
- c++;
- }
- }
- beta_remle_null.clear();
- se_beta_remle_null.clear();
- for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
- }
- logl_remle_H0=logl_H0;
-
- cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
- cout.precision(4);
-
- cout<<"REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
- Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub.matrix, se_B_null);
-
- c=0;
- Vg_mle_null.clear();
- Ve_mle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
- c++;
- }
- }
- beta_mle_null.clear();
- se_beta_mle_null.clear();
- for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
- }
- logl_mle_H0=logl_H0;
-
- cout<<"MLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE likelihood = "<<logl_H0<<endl;
-
-
- vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
- for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
- }
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
- }
-
- gsl_matrix_memcpy (V_g_null, V_g);
- gsl_matrix_memcpy (V_e_null, V_e);
- gsl_matrix_memcpy (B_null, B);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
- infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
- bgen_snp_block_offset-=4;
- infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
- bgen_snp_block_offset-=4;
- infile.ignore(4+bgen_header_length-20);
- bgen_snp_block_offset-=4+bgen_header_length-20;
- infile.read(reinterpret_cast<char*>(&bgen_flags),4);
- bgen_snp_block_offset-=4;
- bool CompressedSNPBlocks=bgen_flags&0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
- double bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- std::cout<<"Warning: WJA hard coded SNP missingness threshold "<<
- "of 10%"<<std::endl;
-
- // Start reading genotypes and analyze.
- size_t csnp=0, t_last=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (indicator_snp[t]==0) {continue;}
- t_last++;
- }
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // Read SNP header.
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char*>(&bgen_N),4);
- infile.read(reinterpret_cast<char*>(&bgen_LS),2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char*>(&bgen_LR),2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char*>(&bgen_LC),2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
-
- infile.read(reinterpret_cast<char*>(&bgen_LA),4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char*>(&bgen_LB),4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3*bgen_N];
-
- if (indicator_snp[t]==0) {
- if(CompressedSNPBlocks)
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- else
- bgen_P=6*bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if(CompressedSNPBlocks) {
-
- infile.read(reinterpret_cast<char*>(&bgen_P),4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size=6*bgen_N;
-
- infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
-
- int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data),
- reinterpret_cast<uLongf*>(&unzipped_data_size),
- reinterpret_cast<Bytef*>(zipped_data),
- static_cast<uLong> (bgen_P));
- assert(result == Z_OK);
-
- } else {
-
- bgen_P=6*bgen_N;
- infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
- }
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<bgen_N; ++i) {
- if (indicator_idv[i]==0) {continue;}
-
- bgen_geno_prob_AA =
- static_cast<double>(unzipped_data[i*3])/32768.0;
- bgen_geno_prob_AB =
- static_cast<double>(unzipped_data[i*3+1])/32768.0;
- bgen_geno_prob_BB =
- static_cast<double>(unzipped_data[i*3+2])/32768.0;
-
- // WJA.
- bgen_geno_prob_non_miss=bgen_geno_prob_AA +
- bgen_geno_prob_AB+bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss<0.9) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- }
- else {
-
- bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
- bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
- bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
- geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=static_cast<double>(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
- }
-
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, x);
- csnp++;
-
- if (csnp%msize==0 || csnp==t_last ) {
- size_t l=0;
- if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-
- gsl_matrix_view Xlarge_sub =
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub =
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
- &UtXlarge_sub.matrix);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Xlarge);
-
- for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
- gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-
- // Initial values.
- gsl_matrix_memcpy (V_g, V_g_null);
- gsl_matrix_memcpy (V_e, V_e_null);
- gsl_matrix_memcpy (B, B_null);
-
- time_start=clock();
-
- // 3 is before 1.
- if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
- V_g_null, V_e_null, UltVehiY, beta, Vbeta);
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
- xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
- crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
- UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
- Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
- crt_a, crt_b, crt_c);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
- (double)d_size );
-
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
- UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
-
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
- Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
- crt_a, crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
- }
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
- }
-
- c=0;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
- }
-
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve,
- v_Vbeta};
- sumStat.push_back(SNPs);
- }
- }
- }
- cout<<endl;
-
- infile.close();
- infile.clear();
-
- gsl_matrix_free(U_hat);
- gsl_matrix_free(E_hat);
- gsl_matrix_free(OmegaU);
- gsl_matrix_free(OmegaE);
- gsl_matrix_free(UltVehiY);
- gsl_matrix_free(UltVehiBX);
- gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
- gsl_matrix_free(Hi_all);
- gsl_matrix_free(Hiy_all);
- gsl_matrix_free(xHi_all);
- gsl_matrix_free(Hessian);
-
- gsl_vector_free(x);
- gsl_vector_free(x_miss);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(X);
- gsl_matrix_free(V_g);
- gsl_matrix_free(V_e);
- gsl_matrix_free(B);
- gsl_vector_free(beta);
- gsl_matrix_free(Vbeta);
-
- gsl_matrix_free(V_g_null);
- gsl_matrix_free(V_e_null);
- gsl_matrix_free(B_null);
- gsl_matrix_free(se_B_null);
-
- gsl_matrix_free(Xlarge);
- gsl_matrix_free(UtXlarge);
-
- return;
+void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY) {
+ string file_bgen = file_oxford + ".bgen";
+ ifstream infile(file_bgen.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bgen file:" << file_bgen << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+ time_UtX = 0;
+ time_opt = 0;
+
+ string line;
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix_set_zero(Xlarge);
+
+ double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+ double crt_a, crt_b, crt_c;
+ int n_miss, c_phen;
+ double geno, x_mean;
+ size_t c = 0;
+ size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+
+ size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+ // Large matrices for EM.
+ gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+ // Large matrices for NR. Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+ gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+ gsl_vector *x = gsl_vector_alloc(n_size);
+ gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+ gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+ gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_vector *beta = gsl_vector_alloc(d_size);
+ gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+ // Null estimates for initial values.
+ gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+ gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+ gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+ gsl_matrix_view xHi_all_sub =
+ gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+ gsl_matrix_transpose_memcpy(Y, UtY);
+
+ gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+ gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+ gsl_vector_set_zero(&X_row.vector);
+ gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+ gsl_vector_set_zero(&B_col.vector);
+
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+ l_max, n_region, V_g, V_e, &B_sub.matrix);
+ logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub.matrix);
+ logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+ &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+ crt_c);
+ MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+ se_B_null);
+
+ c = 0;
+ Vg_remle_null.clear();
+ Ve_remle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_remle_null.clear();
+ se_beta_remle_null.clear();
+ for (size_t i = 0; i < se_B_null->size1; i++) {
+ for (size_t j = 0; j < se_B_null->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+ }
+ }
+ logl_remle_H0 = logl_H0;
+
+ cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ cout.precision(4);
+
+ cout << "REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE likelihood = " << logl_H0 << endl;
+
+ logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub.matrix);
+ logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+ &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+ crt_c);
+ MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+ se_B_null);
+
+ c = 0;
+ Vg_mle_null.clear();
+ Ve_mle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_mle_null.clear();
+ se_beta_mle_null.clear();
+ for (size_t i = 0; i < se_B_null->size1; i++) {
+ for (size_t j = 0; j < se_B_null->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+ }
+ }
+ logl_mle_H0 = logl_H0;
+
+ cout << "MLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE likelihood = " << logl_H0 << endl;
+
+ vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta.push_back(0.0);
+ }
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
+ }
+
+ gsl_matrix_memcpy(V_g_null, V_g);
+ gsl_matrix_memcpy(V_e_null, V_e);
+ gsl_matrix_memcpy(B_null, B);
+
+ // Read in header.
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
+ bgen_snp_block_offset -= 4;
+ infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
+ bgen_snp_block_offset -= 4;
+ infile.ignore(4 + bgen_header_length - 20);
+ bgen_snp_block_offset -= 4 + bgen_header_length - 20;
+ infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
+ bgen_snp_block_offset -= 4;
+ bool CompressedSNPBlocks = bgen_flags & 0x1;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
+ double bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ std::cout << "Warning: WJA hard coded SNP missingness threshold "
+ << "of 10%" << std::endl;
+
+ // Start reading genotypes and analyze.
+ size_t csnp = 0, t_last = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+ t_last++;
+ }
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // Read SNP header.
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char *>(&bgen_N), 4);
+ infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+ infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+ uint16_t unzipped_data[3 * bgen_N];
+
+ if (indicator_snp[t] == 0) {
+ if (CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ else
+ bgen_P = 6 * bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+ if (CompressedSNPBlocks) {
+
+ infile.read(reinterpret_cast<char *>(&bgen_P), 4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size = 6 * bgen_N;
+
+ infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
+
+ int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
+ reinterpret_cast<uLongf *>(&unzipped_data_size),
+ reinterpret_cast<Bytef *>(zipped_data),
+ static_cast<uLong>(bgen_P));
+ assert(result == Z_OK);
+
+ } else {
+
+ bgen_P = 6 * bgen_N;
+ infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
+ }
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i = 0; i < bgen_N; ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
+ bgen_geno_prob_AB =
+ static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
+ bgen_geno_prob_BB =
+ static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
+
+ // WJA.
+ bgen_geno_prob_non_miss =
+ bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss < 0.9) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+
+ bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
+
+ geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+
+ x_mean /= static_cast<double>(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean);
+ }
+ }
+
+ gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, x);
+ csnp++;
+
+ if (csnp % msize == 0 || csnp == t_last) {
+ size_t l = 0;
+ if (csnp % msize == 0) {
+ l = msize;
+ } else {
+ l = csnp % msize;
+ }
+
+ gsl_matrix_view Xlarge_sub =
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub =
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+ time_start = clock();
+ eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_matrix_set_zero(Xlarge);
+
+ for (size_t i = 0; i < l; i++) {
+ gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+ gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+ // Initial values.
+ gsl_matrix_memcpy(V_g, V_g_null);
+ gsl_matrix_memcpy(V_e, V_e_null);
+ gsl_matrix_memcpy(B, B_null);
+
+ time_start = clock();
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+ V_e_null, UltVehiY, beta, Vbeta);
+ if (p_score < p_nr && crt == 1) {
+ logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, B);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+ if (p_lrt < p_nr) {
+ logl_H1 =
+ MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+ if (crt == 1) {
+ p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, B);
+ p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (p_wald < p_nr) {
+ logl_H1 =
+ MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (crt == 1) {
+ p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta[i] = gsl_vector_get(beta, i);
+ }
+
+ c = 0;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg[c] = gsl_matrix_get(V_g, i, j);
+ v_Ve[c] = gsl_matrix_get(V_e, i, j);
+ v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ }
+ }
+ cout << endl;
+
+ infile.close();
+ infile.clear();
+
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(X);
+ gsl_matrix_free(V_g);
+ gsl_matrix_free(V_e);
+ gsl_matrix_free(B);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ gsl_matrix_free(V_g_null);
+ gsl_matrix_free(V_e_null);
+ gsl_matrix_free(B_null);
+ gsl_matrix_free(se_B_null);
+
+ gsl_matrix_free(Xlarge);
+ gsl_matrix_free(UtXlarge);
+
+ return;
}
-void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return;
- }
-
- clock_t time_start=clock();
- time_UtX=0; time_opt=0;
-
- string line;
- char *ch_ptr;
-
- double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
- double crt_a, crt_b, crt_c;
- int n_miss, c_phen;
- double geno, x_mean;
- size_t c=0;
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
-
- size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
-
- // Large matrices for EM.
- gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
- // Large matrices for NR.
- // Each dxd block is H_k^{-1}.
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
- // Each column is H_k^{-1}y_k.
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
- // Each dcxdc block is x_k \otimes H_k^{-1}.
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
- gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
- gsl_vector *x=gsl_vector_alloc (n_size);
- gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
- gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
- gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
- gsl_vector *beta=gsl_vector_alloc (d_size);
- gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
- // Null estimates for initial values.
- gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
- gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
- gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
- gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub =
- gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
- gsl_matrix_transpose_memcpy (Y, UtY);
-
- gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
- gsl_vector_view X_row=gsl_matrix_row(X, c_size);
- gsl_vector_set_zero(&X_row.vector);
- gsl_vector_view B_col=gsl_matrix_column(B, c_size);
- gsl_vector_set_zero(&B_col.vector);
-
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
- Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
- &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian,
- crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub.matrix, se_B_null);
-
- c=0;
- Vg_remle_null.clear();
- Ve_remle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size,
- c+v_size) );
- c++;
- }
- }
- beta_remle_null.clear();
- se_beta_remle_null.clear();
- for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
- }
- logl_remle_H0=logl_H0;
-
- cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
- cout.precision(4);
-
- cout<<"REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
- Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub.matrix, se_B_null);
-
- c=0;
- Vg_mle_null.clear();
- Ve_mle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
- c++;
- }
- }
- beta_mle_null.clear();
- se_beta_mle_null.clear();
- for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
- }
- logl_mle_H0=logl_H0;
-
- cout<<"MLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE likelihood = "<<logl_H0<<endl;
-
- vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
- for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
- }
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
- }
-
- gsl_matrix_memcpy (V_g_null, V_g);
- gsl_matrix_memcpy (V_e_null, V_e);
- gsl_matrix_memcpy (B_null, B);
-
- // Start reading genotypes and analyze.
- size_t csnp=0, t_last=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (indicator_snp[t]==0) {continue;}
- t_last++;
- }
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- }
- else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
- geno=gsl_vector_get(x, i);
- }
-
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, x);
- csnp++;
-
- if (csnp%msize==0 || csnp==t_last ) {
- size_t l=0;
- if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-
- gsl_matrix_view Xlarge_sub =
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub =
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
- &UtXlarge_sub.matrix);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Xlarge);
-
- for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
- gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-
- // Initial values.
- gsl_matrix_memcpy (V_g, V_g_null);
- gsl_matrix_memcpy (V_e, V_e_null);
- gsl_matrix_memcpy (B, B_null);
-
- time_start=clock();
-
- // 3 is before 1.
- if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
- V_g_null, V_e_null, UltVehiY, beta, Vbeta);
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
- xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
- crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
- UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
- Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
- crt_a, crt_b, crt_c);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
- (double)d_size );
-
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
- UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
-
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
- Hi_all, xHi_all, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
- }
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
- }
-
- c=0;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
- }
-
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
- v_Ve, v_Vbeta};
- sumStat.push_back(SNPs);
- }
- }
- }
- cout<<endl;
-
- infile.close();
- infile.clear();
-
- gsl_matrix_free(U_hat);
- gsl_matrix_free(E_hat);
- gsl_matrix_free(OmegaU);
- gsl_matrix_free(OmegaE);
- gsl_matrix_free(UltVehiY);
- gsl_matrix_free(UltVehiBX);
- gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
- gsl_matrix_free(Hi_all);
- gsl_matrix_free(Hiy_all);
- gsl_matrix_free(xHi_all);
- gsl_matrix_free(Hessian);
-
- gsl_vector_free(x);
- gsl_vector_free(x_miss);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(X);
- gsl_matrix_free(V_g);
- gsl_matrix_free(V_e);
- gsl_matrix_free(B);
- gsl_vector_free(beta);
- gsl_matrix_free(Vbeta);
-
- gsl_matrix_free(V_g_null);
- gsl_matrix_free(V_e_null);
- gsl_matrix_free(B_null);
- gsl_matrix_free(se_B_null);
-
- gsl_matrix_free(Xlarge);
- gsl_matrix_free(UtXlarge);
-
- return;
+void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+ time_UtX = 0;
+ time_opt = 0;
+
+ string line;
+ char *ch_ptr;
+
+ double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+ double crt_a, crt_b, crt_c;
+ int n_miss, c_phen;
+ double geno, x_mean;
+ size_t c = 0;
+ size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+
+ size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix_set_zero(Xlarge);
+
+ // Large matrices for EM.
+ gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+ // Each dcxdc block is x_k \otimes H_k^{-1}.
+ gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+ gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+ gsl_vector *x = gsl_vector_alloc(n_size);
+ gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+ gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+ gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_vector *beta = gsl_vector_alloc(d_size);
+ gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+ // Null estimates for initial values.
+ gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+ gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+ gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+ gsl_matrix_view xHi_all_sub =
+ gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+ gsl_matrix_transpose_memcpy(Y, UtY);
+
+ gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+ gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+ gsl_vector_set_zero(&X_row.vector);
+ gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+ gsl_vector_set_zero(&B_col.vector);
+
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+ l_max, n_region, V_g, V_e, &B_sub.matrix);
+ logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub.matrix);
+ logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+ &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+ crt_c);
+ MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+ se_B_null);
+
+ c = 0;
+ Vg_remle_null.clear();
+ Ve_remle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_remle_null.clear();
+ se_beta_remle_null.clear();
+ for (size_t i = 0; i < se_B_null->size1; i++) {
+ for (size_t j = 0; j < se_B_null->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+ }
+ }
+ logl_remle_H0 = logl_H0;
+
+ cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ cout.precision(4);
+
+ cout << "REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE likelihood = " << logl_H0 << endl;
+
+ logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub.matrix);
+ logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+ &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+ crt_c);
+ MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+ se_B_null);
+
+ c = 0;
+ Vg_mle_null.clear();
+ Ve_mle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_mle_null.clear();
+ se_beta_mle_null.clear();
+ for (size_t i = 0; i < se_B_null->size1; i++) {
+ for (size_t j = 0; j < se_B_null->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+ }
+ }
+ logl_mle_H0 = logl_H0;
+
+ cout << "MLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE likelihood = " << logl_H0 << endl;
+
+ vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta.push_back(0.0);
+ }
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
+ }
+
+ gsl_matrix_memcpy(V_g_null, V_g);
+ gsl_matrix_memcpy(V_e_null, V_e);
+ gsl_matrix_memcpy(B_null, B);
+
+ // Start reading genotypes and analyze.
+ size_t csnp = 0, t_last = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+ t_last++;
+ }
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+ geno = atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean);
+ }
+ geno = gsl_vector_get(x, i);
+ }
+
+ gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, x);
+ csnp++;
+
+ if (csnp % msize == 0 || csnp == t_last) {
+ size_t l = 0;
+ if (csnp % msize == 0) {
+ l = msize;
+ } else {
+ l = csnp % msize;
+ }
+
+ gsl_matrix_view Xlarge_sub =
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub =
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+ time_start = clock();
+ eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_matrix_set_zero(Xlarge);
+
+ for (size_t i = 0; i < l; i++) {
+ gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+ gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+ // Initial values.
+ gsl_matrix_memcpy(V_g, V_g_null);
+ gsl_matrix_memcpy(V_e, V_e_null);
+ gsl_matrix_memcpy(B, B_null);
+
+ time_start = clock();
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+ V_e_null, UltVehiY, beta, Vbeta);
+ if (p_score < p_nr && crt == 1) {
+ logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, B);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+ if (p_lrt < p_nr) {
+ logl_H1 =
+ MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+ if (crt == 1) {
+ p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, B);
+ p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (p_wald < p_nr) {
+ logl_H1 =
+ MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (crt == 1) {
+ p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta[i] = gsl_vector_get(beta, i);
+ }
+
+ c = 0;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg[c] = gsl_matrix_get(V_g, i, j);
+ v_Ve[c] = gsl_matrix_get(V_e, i, j);
+ v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ }
+ }
+ cout << endl;
+
+ infile.close();
+ infile.clear();
+
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(X);
+ gsl_matrix_free(V_g);
+ gsl_matrix_free(V_e);
+ gsl_matrix_free(B);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ gsl_matrix_free(V_g_null);
+ gsl_matrix_free(V_e_null);
+ gsl_matrix_free(B_null);
+ gsl_matrix_free(se_B_null);
+
+ gsl_matrix_free(Xlarge);
+ gsl_matrix_free(UtXlarge);
+
+ return;
}
-void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY) {
- string file_bed=file_bfile+".bed";
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
- clock_t time_start=clock();
- time_UtX=0; time_opt=0;
-
- char ch[1];
- bitset<8> b;
-
- double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
- double crt_a, crt_b, crt_c;
- int n_bit, n_miss, ci_total, ci_test;
- double geno, x_mean;
- size_t c=0;
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
- size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
- // Create a large matrix.
- size_t msize=10000;
- gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
-
- // Large matrices for EM.
- gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
- // Large matrices for NR.
- // Each dxd block is H_k^{-1}.
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
- // Each column is H_k^{-1}y_k.
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
- // Each dcxdc block is x_k\otimes H_k^{-1}.
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
-
- gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
- gsl_vector *x=gsl_vector_alloc (n_size);
-
- gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
- gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
- gsl_vector *beta=gsl_vector_alloc (d_size);
- gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
- // Null estimates for initial values.
- gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
- gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
- gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
- gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub =
- gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
- gsl_matrix_transpose_memcpy (Y, UtY);
- gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
- gsl_vector_view X_row=gsl_matrix_row(X, c_size);
- gsl_vector_set_zero(&X_row.vector);
- gsl_vector_view B_col=gsl_matrix_column(B, c_size);
- gsl_vector_set_zero(&B_col.vector);
-
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix,
- Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
-
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
- &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian,
- crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub.matrix, se_B_null);
-
- c=0;
- Vg_remle_null.clear();
- Ve_remle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
- c+v_size));
- c++;
- }
- }
- beta_remle_null.clear();
- se_beta_remle_null.clear();
- for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
- }
- logl_remle_H0=logl_H0;
-
- cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
- cout.precision(4);
- cout<<"REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
- UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y,
- Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub.matrix, se_B_null);
-
- c=0;
- Vg_mle_null.clear();
- Ve_mle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
- c++;
- }
- }
- beta_mle_null.clear();
- se_beta_mle_null.clear();
- for (size_t i=0; i<se_B_null->size1; i++) {
- for (size_t j=0; j<se_B_null->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
- }
- }
- logl_mle_H0=logl_H0;
-
- cout<<"MLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE likelihood = "<<logl_H0<<endl;
-
- vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
- for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
- }
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
- }
-
- gsl_matrix_memcpy (V_g_null, V_g);
- gsl_matrix_memcpy (V_e_null, V_e);
- gsl_matrix_memcpy (B_null, B);
-
- // Start reading genotypes and analyze.
- // Calculate n_bit and c, the number of bit for each snp.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- size_t csnp=0, t_last=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (indicator_snp[t]==0) {continue;}
- t_last++;
- }
- for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {
- ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- //read genotypes
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0;
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
-
- ci_total++;
- ci_test++;
- }
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- geno=gsl_vector_get(x,i);
- if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
- }
-
- gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
- gsl_vector_memcpy (&Xlarge_col.vector, x);
- csnp++;
-
- if (csnp%msize==0 || csnp==t_last ) {
- size_t l=0;
- if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
-
- gsl_matrix_view Xlarge_sub =
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub =
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start=clock();
- eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
- &UtXlarge_sub.matrix);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_matrix_set_zero (Xlarge);
-
- for (size_t i=0; i<l; i++) {
- gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
- gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
-
- // Initial values.
- gsl_matrix_memcpy (V_g, V_g_null);
- gsl_matrix_memcpy (V_e, V_e_null);
- gsl_matrix_memcpy (B, B_null);
-
- time_start=clock();
-
- // 3 is before 1.
- if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
- V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
- xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
- crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
- UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
- Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
- crt_a, crt_b, crt_c);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0),
- (double)d_size );
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
- UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
-
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
- Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian,
- crt_a, crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
- }
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
- }
-
- c=0;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
- }
-
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
- v_Ve, v_Vbeta};
- sumStat.push_back(SNPs);
- }
- }
- }
- cout<<endl;
-
- infile.close();
- infile.clear();
-
- gsl_matrix_free(U_hat);
- gsl_matrix_free(E_hat);
- gsl_matrix_free(OmegaU);
- gsl_matrix_free(OmegaE);
- gsl_matrix_free(UltVehiY);
- gsl_matrix_free(UltVehiBX);
- gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
- gsl_matrix_free(Hi_all);
- gsl_matrix_free(Hiy_all);
- gsl_matrix_free(xHi_all);
- gsl_matrix_free(Hessian);
-
- gsl_vector_free(x);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(X);
- gsl_matrix_free(V_g);
- gsl_matrix_free(V_e);
- gsl_matrix_free(B);
- gsl_vector_free(beta);
- gsl_matrix_free(Vbeta);
-
- gsl_matrix_free(V_g_null);
- gsl_matrix_free(V_e_null);
- gsl_matrix_free(B_null);
- gsl_matrix_free(se_B_null);
-
- gsl_matrix_free(Xlarge);
- gsl_matrix_free(UtXlarge);
-
- return;
+void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY) {
+ string file_bed = file_bfile + ".bed";
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+ time_UtX = 0;
+ time_opt = 0;
+
+ char ch[1];
+ bitset<8> b;
+
+ double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+ double crt_a, crt_b, crt_c;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+ size_t c = 0;
+ size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+ size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+ // Create a large matrix.
+ size_t msize = 10000;
+ gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
+ gsl_matrix_set_zero(Xlarge);
+
+ // Large matrices for EM.
+ gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+
+ gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+ gsl_vector *x = gsl_vector_alloc(n_size);
+
+ gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+ gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_vector *beta = gsl_vector_alloc(d_size);
+ gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+ // Null estimates for initial values.
+ gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
+
+ gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+ gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+ gsl_matrix_view xHi_all_sub =
+ gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+ gsl_matrix_transpose_memcpy(Y, UtY);
+ gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
+
+ gsl_vector_view X_row = gsl_matrix_row(X, c_size);
+ gsl_vector_set_zero(&X_row.vector);
+ gsl_vector_view B_col = gsl_matrix_column(B, c_size);
+ gsl_vector_set_zero(&B_col.vector);
+
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
+ l_max, n_region, V_g, V_e, &B_sub.matrix);
+
+ logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub.matrix);
+ logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+ &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+ crt_c);
+ MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+ se_B_null);
+
+ c = 0;
+ Vg_remle_null.clear();
+ Ve_remle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_remle_null.clear();
+ se_beta_remle_null.clear();
+ for (size_t i = 0; i < se_B_null->size1; i++) {
+ for (size_t j = 0; j < se_B_null->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+ }
+ }
+ logl_remle_H0 = logl_H0;
+
+ cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ cout.precision(4);
+ cout << "REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE likelihood = " << logl_H0 << endl;
+
+ logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub.matrix);
+ logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
+ &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
+ crt_c);
+ MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
+ se_B_null);
+
+ c = 0;
+ Vg_mle_null.clear();
+ Ve_mle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_mle_null.clear();
+ se_beta_mle_null.clear();
+ for (size_t i = 0; i < se_B_null->size1; i++) {
+ for (size_t j = 0; j < se_B_null->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
+ }
+ }
+ logl_mle_H0 = logl_H0;
+
+ cout << "MLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE likelihood = " << logl_H0 << endl;
+
+ vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta.push_back(0.0);
+ }
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
+ }
+
+ gsl_matrix_memcpy(V_g_null, V_g);
+ gsl_matrix_memcpy(V_e_null, V_e);
+ gsl_matrix_memcpy(B_null, B);
+
+ // Start reading genotypes and analyze.
+ // Calculate n_bit and c, the number of bit for each snp.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ size_t csnp = 0, t_last = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+ t_last++;
+ }
+ for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+ if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+ ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // read genotypes
+ x_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ ci_test = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean += 2.0;
+ } else {
+ gsl_vector_set(x, ci_test, 1);
+ x_mean += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(x, ci_test, 0);
+ } else {
+ gsl_vector_set(x, ci_test, -9);
+ n_miss++;
+ }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ geno = gsl_vector_get(x, i);
+ if (geno == -9) {
+ gsl_vector_set(x, i, x_mean);
+ geno = x_mean;
+ }
+ }
+
+ gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
+ gsl_vector_memcpy(&Xlarge_col.vector, x);
+ csnp++;
+
+ if (csnp % msize == 0 || csnp == t_last) {
+ size_t l = 0;
+ if (csnp % msize == 0) {
+ l = msize;
+ } else {
+ l = csnp % msize;
+ }
+
+ gsl_matrix_view Xlarge_sub =
+ gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+ gsl_matrix_view UtXlarge_sub =
+ gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
+
+ time_start = clock();
+ eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_matrix_set_zero(Xlarge);
+
+ for (size_t i = 0; i < l; i++) {
+ gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+ gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
+
+ // Initial values.
+ gsl_matrix_memcpy(V_g, V_g_null);
+ gsl_matrix_memcpy(V_e, V_e_null);
+ gsl_matrix_memcpy(B, B_null);
+
+ time_start = clock();
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
+ V_e_null, UltVehiY, beta, Vbeta);
+
+ if (p_score < p_nr && crt == 1) {
+ logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, B);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+ if (p_lrt < p_nr) {
+ logl_H1 =
+ MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+ if (crt == 1) {
+ p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
+ E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
+ UltVehiE, V_g, V_e, B);
+ p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (p_wald < p_nr) {
+ logl_H1 =
+ MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
+ xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (crt == 1) {
+ p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta[i] = gsl_vector_get(beta, i);
+ }
+
+ c = 0;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg[c] = gsl_matrix_get(V_g, i, j);
+ v_Ve[c] = gsl_matrix_get(V_e, i, j);
+ v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ }
+ }
+ cout << endl;
+
+ infile.close();
+ infile.clear();
+
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_vector_free(x);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(X);
+ gsl_matrix_free(V_g);
+ gsl_matrix_free(V_e);
+ gsl_matrix_free(B);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ gsl_matrix_free(V_g_null);
+ gsl_matrix_free(V_e_null);
+ gsl_matrix_free(B_null);
+ gsl_matrix_free(se_B_null);
+
+ gsl_matrix_free(Xlarge);
+ gsl_matrix_free(UtXlarge);
+
+ return;
}
// Calculate Vg, Ve, B, se(B) in the null mvLMM model.
// Both B and se_B are d by c matrices.
-void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
- const gsl_matrix *UtY, const size_t em_iter,
- const size_t nr_iter, const double em_prec,
- const double nr_prec, const double l_min,
- const double l_max, const size_t n_region,
- gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
- gsl_matrix *se_B) {
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
- size_t dc_size=d_size*c_size, v_size=d_size*(d_size+1)/2;
-
- double logl, crt_a, crt_b, crt_c;
-
- // Large matrices for EM.
- gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
- // Large matrices for NR.
- // Each dxd block is H_k^{-1}.
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
- // Each column is H_k^{-1}y_k.
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
- // Each dcxdc block is x_k\otimes H_k^{-1}.
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
- gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
- // Transpose matrices.
- gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *W=gsl_matrix_alloc (c_size, n_size);
- gsl_matrix_transpose_memcpy (Y, UtY);
- gsl_matrix_transpose_memcpy (W, UtW);
-
- // Initial, EM, NR, and calculate B.
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y,
- l_min, l_max, n_region, V_g, V_e, B);
- logl=MphEM ('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat,
- OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, B);
- logl=MphNR ('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all,
- Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
-
- // Free matrices.
- gsl_matrix_free(U_hat);
- gsl_matrix_free(E_hat);
- gsl_matrix_free(OmegaU);
- gsl_matrix_free(OmegaE);
- gsl_matrix_free(UltVehiY);
- gsl_matrix_free(UltVehiBX);
- gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
- gsl_matrix_free(Hi_all);
- gsl_matrix_free(Hiy_all);
- gsl_matrix_free(xHi_all);
- gsl_matrix_free(Hessian);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(W);
-
- return;
+void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_matrix *UtY, const size_t em_iter,
+ const size_t nr_iter, const double em_prec,
+ const double nr_prec, const double l_min,
+ const double l_max, const size_t n_region,
+ gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
+ gsl_matrix *se_B) {
+ size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
+ size_t dc_size = d_size * c_size, v_size = d_size * (d_size + 1) / 2;
+
+ double logl, crt_a, crt_b, crt_c;
+
+ // Large matrices for EM.
+ gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+ gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+ // Transpose matrices.
+ gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *W = gsl_matrix_alloc(c_size, n_size);
+ gsl_matrix_transpose_memcpy(Y, UtY);
+ gsl_matrix_transpose_memcpy(W, UtW);
+
+ // Initial, EM, NR, and calculate B.
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max,
+ n_region, V_g, V_e, B);
+ logl = MphEM('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE,
+ UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ logl = MphNR('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g,
+ V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta(eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
+
+ // Free matrices.
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(W);
+
+ return;
}
-void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY,
- const gsl_vector *env) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return;
- }
-
- clock_t time_start=clock();
- time_UtX=0; time_opt=0;
-
- string line;
- char *ch_ptr;
-
- double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
- double crt_a, crt_b, crt_c;
- int n_miss, c_phen;
- double geno, x_mean;
- size_t c=0;
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
- size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
- // Large matrices for EM.
- gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
- // Large matrices for NR.
- // Each dxd block is H_k^{-1}.
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
- // Each column is H_k^{-1}y_k.
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
- // Each dcxdc block is x_k\otimes H_k^{-1}.
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
- gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
- gsl_vector *x=gsl_vector_alloc (n_size);
- gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
- gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
- gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
- gsl_vector *beta=gsl_vector_alloc (d_size);
- gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
- // Null estimates for initial values; including env but not
- // including x.
- gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
- gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
- gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
-
- gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size);
- gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1);
- gsl_matrix_view xHi_all_sub1=
- gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size);
-
- gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
- gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub2=
- gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
- gsl_matrix_transpose_memcpy (Y, UtY);
-
- gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size);
- gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
- gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
- gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
-
- gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1);
- gsl_vector_set_zero(&X_row1.vector);
- gsl_vector_view X_row2=gsl_matrix_row(X, c_size);
- gsl_vector_set_zero(&X_row2.vector);
-
- gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1);
- gsl_vector_set_zero(&B_col1.vector);
- gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
- gsl_vector_set_zero(&B_col2.vector);
-
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix,
- Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
- UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y,
- Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub1.matrix, se_B_null1);
-
- c=0;
- Vg_remle_null.clear();
- Ve_remle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
- c+v_size));
- c++;
- }
- }
- beta_remle_null.clear();
- se_beta_remle_null.clear();
- for (size_t i=0; i<se_B_null1->size1; i++) {
- for (size_t j=0; j<se_B_null1->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
- }
- }
- logl_remle_H0=logl_H0;
-
- cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
- cout.precision(4);
-
- cout<<"REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y,
- Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub1.matrix, se_B_null1);
-
- c=0;
- Vg_mle_null.clear();
- Ve_mle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
- c++;
- }
- }
- beta_mle_null.clear();
- se_beta_mle_null.clear();
- for (size_t i=0; i<se_B_null1->size1; i++) {
- for (size_t j=0; j<se_B_null1->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
- }
- }
- logl_mle_H0=logl_H0;
-
- cout<<"MLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE likelihood = "<<logl_H0<<endl;
-
- vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
- for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
- }
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
- }
-
- gsl_matrix_memcpy (V_g_null, V_g);
- gsl_matrix_memcpy (V_e_null, V_e);
- gsl_matrix_memcpy (B_null, B);
-
- // Start reading genotypes and analyze.
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- x_mean=0.0; c_phen=0; n_miss=0;
- gsl_vector_set_zero(x_miss);
- for (size_t i=0; i<ni_total; ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
-
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- }
- else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
- geno=gsl_vector_get(x, i);
- if (x_mean>1) {
- gsl_vector_set(x, i, 2-geno);
- }
- }
-
- // Calculate statistics.
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
- gsl_vector_mul (x, env);
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- //initial values
- gsl_matrix_memcpy (V_g, V_g_null);
- gsl_matrix_memcpy (V_e, V_e_null);
- gsl_matrix_memcpy (B, B_null);
-
- if (a_mode==2 || a_mode==3 || a_mode==4) {
- if (a_mode==3 || a_mode==4) {
- logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval,
- &X_sub2.matrix, Y, U_hat, E_hat, OmegaU,
- OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub2.matrix);
- logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval,
- &X_sub2.matrix, Y, Hi_all,
- &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub2.matrix, se_B_null2);
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval,
- &X_sub2.matrix, Y, U_hat, E_hat, OmegaU,
- OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub2.matrix);
- logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval,
- &X_sub2.matrix, Y, Hi_all,
- &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub2.matrix, se_B_null2);
- }
- }
-
- time_start=clock();
-
- // 3 is before 1.
- if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
- V_g_null, V_e_null, UltVehiY, beta, Vbeta);
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all,
- xHi_all, Hiy_all, V_g, V_e, Hessian,
- crt_a, crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
- UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),(double)d_size);
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y,
- Hi_all, xHi_all, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),
- (double)d_size );
-
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY,
- UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y,
- Hi_all, xHi_all, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
- V_g, V_e, UltVehiY, beta, Vbeta);
-
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
- }
-
- c=0;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
- }
-
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg,
- v_Ve, v_Vbeta};
- sumStat.push_back(SNPs);
- }
- cout<<endl;
-
-
- infile.close();
- infile.clear();
-
- gsl_matrix_free(U_hat);
- gsl_matrix_free(E_hat);
- gsl_matrix_free(OmegaU);
- gsl_matrix_free(OmegaE);
- gsl_matrix_free(UltVehiY);
- gsl_matrix_free(UltVehiBX);
- gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
- gsl_matrix_free(Hi_all);
- gsl_matrix_free(Hiy_all);
- gsl_matrix_free(xHi_all);
- gsl_matrix_free(Hessian);
-
- gsl_vector_free(x);
- gsl_vector_free(x_miss);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(X);
- gsl_matrix_free(V_g);
- gsl_matrix_free(V_e);
- gsl_matrix_free(B);
- gsl_vector_free(beta);
- gsl_matrix_free(Vbeta);
-
- gsl_matrix_free(V_g_null);
- gsl_matrix_free(V_e_null);
- gsl_matrix_free(B_null);
- gsl_matrix_free(se_B_null1);
- gsl_matrix_free(se_B_null2);
-
- return;
+void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY,
+ const gsl_vector *env) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+ time_UtX = 0;
+ time_opt = 0;
+
+ string line;
+ char *ch_ptr;
+
+ double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+ double crt_a, crt_b, crt_c;
+ int n_miss, c_phen;
+ double geno, x_mean;
+ size_t c = 0;
+ size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2;
+ size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+ // Large matrices for EM.
+ gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+ // Each column is H_k^{-1}y_k.
+ gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+ gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+ gsl_vector *x = gsl_vector_alloc(n_size);
+ gsl_vector *x_miss = gsl_vector_alloc(n_size);
+
+ gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+ gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_vector *beta = gsl_vector_alloc(d_size);
+ gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+ // Null estimates for initial values; including env but not
+ // including x.
+ gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1);
+ gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size);
+
+ gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size);
+ gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1);
+ gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix(
+ xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size);
+
+ gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+ gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+ gsl_matrix_view xHi_all_sub2 =
+ gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+ gsl_matrix_transpose_memcpy(Y, UtY);
+
+ gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size);
+ gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW);
+ gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
+
+ gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1);
+ gsl_vector_set_zero(&X_row1.vector);
+ gsl_vector_view X_row2 = gsl_matrix_row(X, c_size);
+ gsl_vector_set_zero(&X_row2.vector);
+
+ gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1);
+ gsl_vector_set_zero(&B_col1.vector);
+ gsl_vector_view B_col2 = gsl_matrix_column(B, c_size);
+ gsl_vector_set_zero(&B_col2.vector);
+
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min,
+ l_max, n_region, V_g, V_e, &B_sub1.matrix);
+ logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub1.matrix);
+ logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+ &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+ crt_b, crt_c);
+ MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+ se_B_null1);
+
+ c = 0;
+ Vg_remle_null.clear();
+ Ve_remle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_remle_null.clear();
+ se_beta_remle_null.clear();
+ for (size_t i = 0; i < se_B_null1->size1; i++) {
+ for (size_t j = 0; j < se_B_null1->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+ }
+ }
+ logl_remle_H0 = logl_H0;
+
+ cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ cout.precision(4);
+
+ cout << "REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE likelihood = " << logl_H0 << endl;
+
+ logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub1.matrix);
+ logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+ &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+ crt_b, crt_c);
+ MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+ se_B_null1);
+
+ c = 0;
+ Vg_mle_null.clear();
+ Ve_mle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_mle_null.clear();
+ se_beta_mle_null.clear();
+ for (size_t i = 0; i < se_B_null1->size1; i++) {
+ for (size_t j = 0; j < se_B_null1->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+ }
+ }
+ logl_mle_H0 = logl_H0;
+
+ cout << "MLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE likelihood = " << logl_H0 << endl;
+
+ vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta.push_back(0.0);
+ }
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
+ }
+
+ gsl_matrix_memcpy(V_g_null, V_g);
+ gsl_matrix_memcpy(V_e_null, V_e);
+ gsl_matrix_memcpy(B_null, B);
+
+ // Start reading genotypes and analyze.
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+ geno = atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean);
+ }
+ geno = gsl_vector_get(x, i);
+ if (x_mean > 1) {
+ gsl_vector_set(x, i, 2 - geno);
+ }
+ }
+
+ // Calculate statistics.
+ time_start = clock();
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+ gsl_vector_mul(x, env);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // initial values
+ gsl_matrix_memcpy(V_g, V_g_null);
+ gsl_matrix_memcpy(V_e, V_e_null);
+ gsl_matrix_memcpy(B, B_null);
+
+ if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
+ if (a_mode == 3 || a_mode == 4) {
+ logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+ Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+ Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+ se_B_null2);
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+ Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+ Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+ se_B_null2);
+ }
+ }
+
+ time_start = clock();
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null,
+ V_e_null, UltVehiY, beta, Vbeta);
+ if (p_score < p_nr && crt == 1) {
+ logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+ V_g, V_e, B);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+ if (p_lrt < p_nr) {
+ logl_H1 =
+ MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+ if (crt == 1) {
+ p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+ V_g, V_e, B);
+ p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (p_wald < p_nr) {
+ logl_H1 =
+ MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (crt == 1) {
+ p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (x_mean > 1) {
+ gsl_vector_scale(beta, -1.0);
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta[i] = gsl_vector_get(beta, i);
+ }
+
+ c = 0;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg[c] = gsl_matrix_get(V_g, i, j);
+ v_Ve[c] = gsl_matrix_get(V_e, i, j);
+ v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ cout << endl;
+
+ infile.close();
+ infile.clear();
+
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(X);
+ gsl_matrix_free(V_g);
+ gsl_matrix_free(V_e);
+ gsl_matrix_free(B);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ gsl_matrix_free(V_g_null);
+ gsl_matrix_free(V_e_null);
+ gsl_matrix_free(B_null);
+ gsl_matrix_free(se_B_null1);
+ gsl_matrix_free(se_B_null2);
+
+ return;
}
-void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY,
- const gsl_vector *env) {
- string file_bed=file_bfile+".bed";
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return;
- }
-
- clock_t time_start=clock();
- time_UtX=0; time_opt=0;
-
- char ch[1];
- bitset<8> b;
-
- double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
- double crt_a, crt_b, crt_c;
- int n_bit, n_miss, ci_total, ci_test;
- double geno, x_mean;
- size_t c=0;
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
- size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
- // Large matrices for EM.
- gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
- // Large matrices for NR.
- // Each dxd block is H_k^{-1}.
- gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size);
-
- // Each column is H_k^{-1}y_k
- gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size);
-
- // Each dcxdc block is x_k\otimes H_k^{-1}.
- gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size);
- gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
- gsl_vector *x=gsl_vector_alloc (n_size);
-
- gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
- gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
- gsl_vector *beta=gsl_vector_alloc (d_size);
- gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
- // Null estimates for initial values.
- gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
- gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
- gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
-
- gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size);
- gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1);
- gsl_matrix_view xHi_all_sub1=
- gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size);
-
- gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
- gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub2=
- gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
- gsl_matrix_transpose_memcpy (Y, UtY);
-
- gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size);
- gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
- gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
- gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
-
- gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1);
- gsl_vector_set_zero(&X_row1.vector);
- gsl_vector_view X_row2=gsl_matrix_row(X, c_size);
- gsl_vector_set_zero(&X_row2.vector);
-
- gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1);
- gsl_vector_set_zero(&B_col1.vector);
- gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
- gsl_vector_set_zero(&B_col2.vector);
-
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix,
- Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
-
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y,
- Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub1.matrix, se_B_null1);
-
- c=0;
- Vg_remle_null.clear();
- Ve_remle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size,
- c+v_size));
- c++;
- }
- }
- beta_remle_null.clear();
- se_beta_remle_null.clear();
- for (size_t i=0; i<se_B_null1->size1; i++) {
- for (size_t j=0; j<se_B_null1->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
- }
- }
- logl_remle_H0=logl_H0;
-
- cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
- cout.precision(4);
- cout<<"REMLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
- logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y,
- U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
- UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
- logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y,
- Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e,
- Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub1.matrix, se_B_null1);
-
- c=0;
- Vg_mle_null.clear();
- Ve_mle_null.clear();
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
- Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
- VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
- VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size));
- c++;
- }
- }
- beta_mle_null.clear();
- se_beta_mle_null.clear();
- for (size_t i=0; i<se_B_null1->size1; i++) {
- for (size_t j=0; j<se_B_null1->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
- }
- }
- logl_mle_H0=logl_H0;
-
- cout<<"MLE estimate for Vg in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_g, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Vg): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE estimate for Ve in the null model: "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- cout<<gsl_matrix_get(V_e, i, j)<<"\t";
- }
- cout<<endl;
- }
- cout<<"se(Ve): "<<endl;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=0; j<=i; j++) {
- c=GetIndex(i, j, d_size);
- cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
- }
- cout<<endl;
- }
- cout<<"MLE likelihood = "<<logl_H0<<endl;
-
- vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
- for (size_t i=0; i<d_size; i++) {
- v_beta.push_back(0.0);
- }
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
- }
-
- gsl_matrix_memcpy (V_g_null, V_g);
- gsl_matrix_memcpy (V_e_null, V_e);
- gsl_matrix_memcpy (B_null, B);
-
- // Start reading genotypes and analyze.
- // Calculate n_bit and c, the number of bit for each SNP.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {
- ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
-
- if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
- if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
- else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
- }
- else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
- else {gsl_vector_set(x, ci_test, -9); n_miss++; }
- }
-
- ci_total++;
- ci_test++;
- }
- }
-
- x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
- geno=gsl_vector_get(x,i);
- if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
- if (x_mean>1) {
- gsl_vector_set(x, i, 2-geno);
- }
- }
-
- // Calculate statistics.
- time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
- gsl_vector_mul (x, env);
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
- time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Initial values.
- gsl_matrix_memcpy (V_g, V_g_null);
- gsl_matrix_memcpy (V_e, V_e_null);
- gsl_matrix_memcpy (B, B_null);
-
- if (a_mode==2 || a_mode==3 || a_mode==4) {
- if (a_mode==3 || a_mode==4) {
- logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval,
- &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE,
- UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
- V_e, &B_sub2.matrix);
- logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval,
- &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix,
- Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub2.matrix, se_B_null2);
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval,
- &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE,
- UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
- V_e, &B_sub2.matrix);
- logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval,
- &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix,
- Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY,
- &B_sub2.matrix, se_B_null2);
- }
- }
-
- time_start=clock();
-
- // 3 is before 1.
- if (a_mode==3 || a_mode==4) {
- p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y,
- V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-
- if (p_score<p_nr && crt==1) {
- logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all,
- Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode==2 || a_mode==4) {
- logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
- UltVehiU, UltVehiE, V_g, V_e, B);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
- if (p_lrt<p_nr) {
- logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all,
- xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
- crt_b, crt_c);
-
- // Calculate beta and Vbeta.
- p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
- if (crt==1) {
- p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (a_mode==1 || a_mode==4) {
- logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
- UltVehiU, UltVehiE, V_g, V_e, B);
- p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
-
- if (p_wald<p_nr) {
- logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all,
- xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a,
- crt_b, crt_c);
- p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g,
- V_e, UltVehiY, beta, Vbeta);
-
- if (crt==1) {
- p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
- time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- // Store summary data.
- for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
- }
-
- c=0;
- for (size_t i=0; i<d_size; i++) {
- for (size_t j=i; j<d_size; j++) {
- v_Vg[c]=gsl_matrix_get (V_g, i, j);
- v_Ve[c]=gsl_matrix_get (V_e, i, j);
- v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
- c++;
- }
- }
-
- MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score,
- v_Vg, v_Ve, v_Vbeta};
- sumStat.push_back(SNPs);
- }
- cout<<endl;
-
- infile.close();
- infile.clear();
-
- gsl_matrix_free(U_hat);
- gsl_matrix_free(E_hat);
- gsl_matrix_free(OmegaU);
- gsl_matrix_free(OmegaE);
- gsl_matrix_free(UltVehiY);
- gsl_matrix_free(UltVehiBX);
- gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
- gsl_matrix_free(Hi_all);
- gsl_matrix_free(Hiy_all);
- gsl_matrix_free(xHi_all);
- gsl_matrix_free(Hessian);
-
- gsl_vector_free(x);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(X);
- gsl_matrix_free(V_g);
- gsl_matrix_free(V_e);
- gsl_matrix_free(B);
- gsl_vector_free(beta);
- gsl_matrix_free(Vbeta);
-
- gsl_matrix_free(V_g_null);
- gsl_matrix_free(V_e_null);
- gsl_matrix_free(B_null);
- gsl_matrix_free(se_B_null1);
- gsl_matrix_free(se_B_null2);
-
- return;
+void MVLMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY,
+ const gsl_vector *env) {
+ string file_bed = file_bfile + ".bed";
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return;
+ }
+
+ clock_t time_start = clock();
+ time_UtX = 0;
+ time_opt = 0;
+
+ char ch[1];
+ bitset<8> b;
+
+ double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
+ double crt_a, crt_b, crt_c;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+ size_t c = 0;
+ size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2;
+ size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
+
+ // Large matrices for EM.
+ gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
+
+ // Large matrices for NR.
+ // Each dxd block is H_k^{-1}.
+ gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
+
+ // Each column is H_k^{-1}y_k
+ gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
+
+ // Each dcxdc block is x_k\otimes H_k^{-1}.
+ gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
+ gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
+
+ gsl_vector *x = gsl_vector_alloc(n_size);
+
+ gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
+ gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
+ gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_vector *beta = gsl_vector_alloc(d_size);
+ gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
+
+ // Null estimates for initial values.
+ gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
+ gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
+ gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1);
+ gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size);
+
+ gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size);
+ gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1);
+ gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix(
+ xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size);
+
+ gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
+ gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
+ gsl_matrix_view xHi_all_sub2 =
+ gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
+
+ gsl_matrix_transpose_memcpy(Y, UtY);
+
+ gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size);
+ gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW);
+ gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
+
+ gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1);
+ gsl_vector_set_zero(&X_row1.vector);
+ gsl_vector_view X_row2 = gsl_matrix_row(X, c_size);
+ gsl_vector_set_zero(&X_row2.vector);
+
+ gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1);
+ gsl_vector_set_zero(&B_col1.vector);
+ gsl_vector_view B_col2 = gsl_matrix_column(B, c_size);
+ gsl_vector_set_zero(&B_col2.vector);
+
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min,
+ l_max, n_region, V_g, V_e, &B_sub1.matrix);
+
+ logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub1.matrix);
+ logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+ &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+ crt_b, crt_c);
+ MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+ se_B_null1);
+
+ c = 0;
+ Vg_remle_null.clear();
+ Ve_remle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_remle_null.clear();
+ se_beta_remle_null.clear();
+ for (size_t i = 0; i < se_B_null1->size1; i++) {
+ for (size_t j = 0; j < se_B_null1->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+ }
+ }
+ logl_remle_H0 = logl_H0;
+
+ cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ cout.precision(4);
+ cout << "REMLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "REMLE likelihood = " << logl_H0 << endl;
+
+ logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
+ V_e, &B_sub1.matrix);
+ logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all,
+ &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a,
+ crt_b, crt_c);
+ MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix,
+ se_B_null1);
+
+ c = 0;
+ Vg_mle_null.clear();
+ Ve_mle_null.clear();
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
+ Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
+ VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
+ VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
+ c++;
+ }
+ }
+ beta_mle_null.clear();
+ se_beta_mle_null.clear();
+ for (size_t i = 0; i < se_B_null1->size1; i++) {
+ for (size_t j = 0; j < se_B_null1->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j));
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j));
+ }
+ }
+ logl_mle_H0 = logl_H0;
+
+ cout << "MLE estimate for Vg in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_g, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Vg): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE estimate for Ve in the null model: " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ cout << gsl_matrix_get(V_e, i, j) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "se(Ve): " << endl;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ c = GetIndex(i, j, d_size);
+ cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
+ }
+ cout << endl;
+ }
+ cout << "MLE likelihood = " << logl_H0 << endl;
+
+ vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta.push_back(0.0);
+ }
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
+ }
+
+ gsl_matrix_memcpy(V_g_null, V_g);
+ gsl_matrix_memcpy(V_e_null, V_e);
+ gsl_matrix_memcpy(B_null, B);
+
+ // Start reading genotypes and analyze.
+ // Calculate n_bit and c, the number of bit for each SNP.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+ if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+ ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ x_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ ci_test = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j = 0; j < 4; ++j) {
+
+ if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean += 2.0;
+ } else {
+ gsl_vector_set(x, ci_test, 1);
+ x_mean += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(x, ci_test, 0);
+ } else {
+ gsl_vector_set(x, ci_test, -9);
+ n_miss++;
+ }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean /= (double)(ni_test - n_miss);
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ geno = gsl_vector_get(x, i);
+ if (geno == -9) {
+ gsl_vector_set(x, i, x_mean);
+ geno = x_mean;
+ }
+ if (x_mean > 1) {
+ gsl_vector_set(x, i, 2 - geno);
+ }
+ }
+
+ // Calculate statistics.
+ time_start = clock();
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+ gsl_vector_mul(x, env);
+ gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+ time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Initial values.
+ gsl_matrix_memcpy(V_g, V_g_null);
+ gsl_matrix_memcpy(V_e, V_e_null);
+ gsl_matrix_memcpy(B, B_null);
+
+ if (a_mode == 2 || a_mode == 3 || a_mode == 4) {
+ if (a_mode == 3 || a_mode == 4) {
+ logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+ Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+ Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+ se_B_null2);
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix,
+ Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX,
+ UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix,
+ Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e,
+ Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix,
+ se_B_null2);
+ }
+ }
+
+ time_start = clock();
+
+ // 3 is before 1.
+ if (a_mode == 3 || a_mode == 4) {
+ p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null,
+ V_e_null, UltVehiY, beta, Vbeta);
+
+ if (p_score < p_nr && crt == 1) {
+ logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode == 2 || a_mode == 4) {
+ logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+ V_g, V_e, B);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+
+ if (p_lrt < p_nr) {
+ logl_H1 =
+ MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+ // Calculate beta and Vbeta.
+ p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+ p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
+ if (crt == 1) {
+ p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode == 1 || a_mode == 4) {
+ logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat,
+ OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE,
+ V_g, V_e, B);
+ p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (p_wald < p_nr) {
+ logl_H1 =
+ MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
+ Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e,
+ UltVehiY, beta, Vbeta);
+
+ if (crt == 1) {
+ p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (x_mean > 1) {
+ gsl_vector_scale(beta, -1.0);
+ }
+
+ time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ // Store summary data.
+ for (size_t i = 0; i < d_size; i++) {
+ v_beta[i] = gsl_vector_get(beta, i);
+ }
+
+ c = 0;
+ for (size_t i = 0; i < d_size; i++) {
+ for (size_t j = i; j < d_size; j++) {
+ v_Vg[c] = gsl_matrix_get(V_g, i, j);
+ v_Ve[c] = gsl_matrix_get(V_e, i, j);
+ v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ cout << endl;
+
+ infile.close();
+ infile.clear();
+
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_vector_free(x);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(X);
+ gsl_matrix_free(V_g);
+ gsl_matrix_free(V_e);
+ gsl_matrix_free(B);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ gsl_matrix_free(V_g_null);
+ gsl_matrix_free(V_e_null);
+ gsl_matrix_free(B_null);
+ gsl_matrix_free(se_B_null1);
+ gsl_matrix_free(se_B_null2);
+
+ return;
}
diff --git a/src/mvlmm.h b/src/mvlmm.h
index d495c26..4329ad1 100644
--- a/src/mvlmm.h
+++ b/src/mvlmm.h
@@ -19,89 +19,86 @@
#ifndef __MVLMM_H__
#define __MVLMM_H__
-#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
#include "io.h"
+#include "param.h"
using namespace std;
class MVLMM {
public:
- // IO-related parameters.
- int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests.
- size_t d_pace; // Display pace.
-
- string file_bfile;
- string file_geno;
- string file_oxford;
- string file_out;
- string path_out;
-
- // MVLMM-related parameters.
- double l_min;
- double l_max;
- size_t n_region;
- double logl_remle_H0, logl_mle_H0;
- vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
- vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
- vector<double> VVe_mle_null;
- vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
- vector<double> se_beta_mle_null;
- double p_nr;
- size_t em_iter, nr_iter;
- double em_prec, nr_prec;
- size_t crt;
-
- // Summary statistics.
- size_t ni_total, ni_test; // Number of individuals.
- size_t ns_total, ns_test; // Number of SNPs.
- size_t n_cvt;
- size_t n_ph;
- double time_UtX; // Time spent on optimization iterations.
- double time_opt; // Time spent on optimization iterations.
-
- // Indicator for individuals (phenotypes): 0 missing, 1
- // available for analysis.
- vector<int> indicator_idv;
-
- // Sequence indicator for SNPs: 0 ignored because of (a) maf,
- // (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
-
- vector<SNPINFO> snpInfo; // Record SNP information.
-
- // Not included in PARAM.
- vector<MPHSUMSTAT> sumStat; // Output SNPSummary Data.
-
- // Main functions
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
- void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY);
- void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY);
- void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY);
- void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY,
- const gsl_vector *env);
- void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY,
- const gsl_vector *env);
- void WriteFiles ();
-
+ // IO-related parameters.
+ int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests.
+ size_t d_pace; // Display pace.
+
+ string file_bfile;
+ string file_geno;
+ string file_oxford;
+ string file_out;
+ string path_out;
+
+ // MVLMM-related parameters.
+ double l_min;
+ double l_max;
+ size_t n_region;
+ double logl_remle_H0, logl_mle_H0;
+ vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
+ vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
+ vector<double> VVe_mle_null;
+ vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
+ vector<double> se_beta_mle_null;
+ double p_nr;
+ size_t em_iter, nr_iter;
+ double em_prec, nr_prec;
+ size_t crt;
+
+ // Summary statistics.
+ size_t ni_total, ni_test; // Number of individuals.
+ size_t ns_total, ns_test; // Number of SNPs.
+ size_t n_cvt;
+ size_t n_ph;
+ double time_UtX; // Time spent on optimization iterations.
+ double time_opt; // Time spent on optimization iterations.
+
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_idv;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
+
+ vector<SNPINFO> snpInfo; // Record SNP information.
+
+ // Not included in PARAM.
+ vector<MPHSUMSTAT> sumStat; // Output SNPSummary Data.
+
+ // Main functions
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
+ void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY);
+ void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY);
+ void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY);
+ void AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY,
+ const gsl_vector *env);
+ void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_matrix *UtY,
+ const gsl_vector *env);
+ void WriteFiles();
};
-void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW,
- const gsl_matrix *UtY, const size_t em_iter,
- const size_t nr_iter, const double em_prec,
- const double nr_prec, const double l_min,
- const double l_max, const size_t n_region,
- gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
- gsl_matrix *se_B);
+void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
+ const gsl_matrix *UtY, const size_t em_iter,
+ const size_t nr_iter, const double em_prec,
+ const double nr_prec, const double l_min,
+ const double l_max, const size_t n_region,
+ gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B,
+ gsl_matrix *se_B);
#endif
-
-
diff --git a/src/param.cpp b/src/param.cpp
index 413d517..2572bbb 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -16,1322 +16,1357 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include <algorithm>
+#include <cmath>
+#include <cstring>
#include <fstream>
+#include <iostream>
#include <string>
-#include <cstring>
#include <sys/stat.h>
-#include <cmath>
-#include <algorithm>
-#include "gsl/gsl_randist.h"
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
-#include "gsl/gsl_blas.h"
+#include "gsl/gsl_randist.h"
+#include "gsl/gsl_vector.h"
#include "eigenlib.h"
+#include "io.h"
#include "mathfunc.h"
#include "param.h"
-#include "io.h"
using namespace std;
-PARAM::PARAM(void):
-mode_silence (false), a_mode (0), k_mode(1), d_pace (100000),
-file_out("result"), path_out("./output/"),
-miss_level(0.05), maf_level(0.01), hwe_level(0), r2_level(0.9999),
-l_min(1e-5), l_max(1e5), n_region(10),p_nr(0.001),em_prec(0.0001),
-nr_prec(0.0001),em_iter(10000),nr_iter(100),crt(0),
-pheno_mean(0), noconstrain (false),
-h_min(-1), h_max(-1), h_scale(-1),
-rho_min(0.0), rho_max(1.0), rho_scale(-1),
-logp_min(0.0), logp_max(0.0), logp_scale(-1),
-h_ngrid(10), rho_ngrid(10),
-s_min(0), s_max(300),
-w_step(100000), s_step(1000000),
-r_pace(10), w_pace(1000),
-n_accept(0),
-n_mh(10),
-geo_mean(2000.0),
-randseed(-1),
-window_cm(0), window_bp(0), window_ns(0), n_block(200),
-error(false),
-ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0),
-time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0),
-time_UtZ(0.0), time_opt(0.0), time_Omega(0.0)
-{}
+PARAM::PARAM(void)
+ : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000),
+ file_out("result"), path_out("./output/"), miss_level(0.05),
+ maf_level(0.01), hwe_level(0), r2_level(0.9999), l_min(1e-5), l_max(1e5),
+ n_region(10), p_nr(0.001), em_prec(0.0001), nr_prec(0.0001),
+ em_iter(10000), nr_iter(100), crt(0), pheno_mean(0), noconstrain(false),
+ h_min(-1), h_max(-1), h_scale(-1), rho_min(0.0), rho_max(1.0),
+ rho_scale(-1), logp_min(0.0), logp_max(0.0), logp_scale(-1), h_ngrid(10),
+ rho_ngrid(10), s_min(0), s_max(300), w_step(100000), s_step(1000000),
+ r_pace(10), w_pace(1000), n_accept(0), n_mh(10), geo_mean(2000.0),
+ randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200),
+ error(false), ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0),
+ time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0),
+ time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {}
// Read files: obtain ns_total, ng_total, ns_test, ni_test.
-void PARAM::ReadFiles (void) {
- string file_str;
-
- // Read cat file.
- if (!file_mcat.empty()) {
- if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;}
- } else if (!file_cat.empty()) {
- if (ReadFile_cat (file_cat, mapRS2cat, n_vc)==false) {error=true;}
- }
-
- // Read snp weight files.
- if (!file_wcat.empty()) {
- if (ReadFile_wsnp (file_wcat, n_vc, mapRS2wcat)==false) {error=true;}
- }
- if (!file_wsnp.empty()) {
- if (ReadFile_wsnp (file_wsnp, mapRS2wsnp)==false) {error=true;}
- }
-
- // Count number of kinship files.
- if (!file_mk.empty()) {
- if (CountFileLines (file_mk, n_vc)==false) {error=true;}
- }
-
- // Read SNP set.
- if (!file_snps.empty()) {
- if (ReadFile_snps (file_snps, setSnps)==false) {error=true;}
- } else {
- setSnps.clear();
- }
-
- // For prediction.
- if (!file_epm.empty()) {
- if (ReadFile_est (file_epm, est_column, mapRS2est)==false) {
- error=true;
- }
- if (!file_bfile.empty()) {
- file_str=file_bfile+".bim";
- if (ReadFile_bim (file_str, snpInfo)==false) {
- error=true;
- }
- file_str=file_bfile+".fam";
- if (ReadFile_fam (file_str, indicator_pheno, pheno,
- mapID2num, p_column)==false) {
- error=true;
- }
- }
-
- if (!file_geno.empty()) {
- if (ReadFile_pheno (file_pheno, indicator_pheno,
- pheno, p_column)==false) {
- error=true;
- }
-
- if (CountFileLines (file_geno, ns_total)==false) {
- error=true;
- }
- }
-
- if (!file_ebv.empty() ) {
- if (ReadFile_column (file_ebv, indicator_bv,
- vec_bv, 1)==false) {
- error=true;
- }
- }
-
- if (!file_log.empty() ) {
- if (ReadFile_log (file_log, pheno_mean)==false) {
- error=true;
- }
- }
-
- // Convert indicator_pheno to indicator_idv.
- int k=1;
- for (size_t i=0; i<indicator_pheno.size(); i++) {
- k=1;
- for (size_t j=0; j<indicator_pheno[i].size(); j++) {
- if (indicator_pheno[i][j]==0) {k=0;}
- }
- indicator_idv.push_back(k);
- }
-
- ns_test=0;
-
- return;
- }
-
- // Read covariates before the genotype files.
- if (!file_cvt.empty() ) {
- if (ReadFile_cvt (file_cvt, indicator_cvt,
- cvt, n_cvt)==false) {
- error=true;
- }
- if ((indicator_cvt).size()==0) {
- n_cvt=1;
- }
- } else {
- n_cvt=1;
- }
-
- if (!file_gxe.empty() ) {
- if (ReadFile_column (file_gxe, indicator_gxe, gxe, 1)==false) {
- error=true;
- }
- }
- if (!file_weight.empty() ) {
- if (ReadFile_column (file_weight, indicator_weight,
- weight, 1)==false) {
- error=true;
- }
- }
-
- // WJA added.
- // Read genotype and phenotype file for bgen format.
- if (!file_oxford.empty()) {
- file_str=file_oxford+".sample";
- if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,
- indicator_cvt, cvt, n_cvt)==false) {
- error=true;
- }
- if ((indicator_cvt).size()==0) {
- n_cvt=1;
- }
-
- // Post-process covariates and phenotypes, obtain
- // ni_test, save all useful covariates.
- ProcessCvtPhen();
-
- // Obtain covariate matrix.
- gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
- CopyCvt (W);
-
- file_str=file_oxford+".bgen";
- if (ReadFile_bgen (file_str, setSnps, W, indicator_idv,
- indicator_snp, snpInfo, maf_level,
- miss_level, hwe_level, r2_level,
- ns_test)==false) {
- error=true;
- }
- gsl_matrix_free(W);
-
- ns_total=indicator_snp.size();
- }
-
- // Read genotype and phenotype file for PLINK format.
- if (!file_bfile.empty()) {
- file_str=file_bfile+".bim";
- snpInfo.clear();
- if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
-
- // If both fam file and pheno files are used, use
- // phenotypes inside the pheno file.
- if (!file_pheno.empty()) {
-
- // Phenotype file before genotype file.
- if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
- p_column)==false) {error=true;}
- } else {
- file_str=file_bfile+".fam";
- if (ReadFile_fam (file_str, indicator_pheno, pheno,
- mapID2num, p_column)==false) {error=true;}
- }
-
- // Post-process covariates and phenotypes, obtain
- // ni_test, save all useful covariates.
- ProcessCvtPhen();
-
- // Obtain covariate matrix.
- gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
- CopyCvt (W);
-
- file_str=file_bfile+".bed";
- if (ReadFile_bed (file_str, setSnps, W, indicator_idv,
- indicator_snp, snpInfo, maf_level,
- miss_level, hwe_level, r2_level,
- ns_test) == false) {
- error=true;
- }
- gsl_matrix_free(W);
- ns_total=indicator_snp.size();
- }
-
- // Read genotype and phenotype file for BIMBAM format.
- if (!file_geno.empty()) {
-
- // Annotation file before genotype file.
- if (!file_anno.empty() ) {
- if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp,
- mapRS2cM)==false) {
- error=true;
- }
- }
-
- // Phenotype file before genotype file.
- if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
- p_column) == false) {
- error=true;
- }
-
- // Post-process covariates and phenotypes, obtain
- // ni_test, save all useful covariates.
- ProcessCvtPhen();
-
- // Obtain covariate matrix.
- gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
- CopyCvt (W);
-
- if (ReadFile_geno (file_geno, setSnps, W, indicator_idv,
- indicator_snp, maf_level, miss_level,
- hwe_level, r2_level, mapRS2chr, mapRS2bp,
- mapRS2cM, snpInfo, ns_test)==false) {
- error=true;
- }
- gsl_matrix_free(W);
- ns_total=indicator_snp.size();
- }
-
- // Read genotype file for multiple PLINK files.
- if (!file_mbfile.empty()) {
- igzstream infile (file_mbfile.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error! fail to open mbfile file: " << file_mbfile<<endl;
- return;
- }
-
- string file_name;
- size_t t=0, ns_test_tmp=0;
- gsl_matrix *W;
- while (!safeGetline(infile, file_name).eof()) {
- file_str=file_name+".bim";
-
- if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
-
- if (t==0) {
-
- // If both fam file and pheno files are used, use
- // phenotypes inside the pheno file.
- if (!file_pheno.empty()) {
-
- // Phenotype file before genotype file.
- if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
- p_column)==false) {
- error=true;
- }
- } else {
- file_str=file_name+".fam";
- if (ReadFile_fam (file_str, indicator_pheno, pheno,
- mapID2num, p_column)==false) {
- error=true;
- }
- }
-
- // Post-process covariates and phenotypes, obtain
- // ni_test, save all useful covariates.
- ProcessCvtPhen();
-
- // Obtain covariate matrix.
- W=gsl_matrix_alloc (ni_test, n_cvt);
- CopyCvt (W);
- }
-
- file_str=file_name+".bed";
- if (ReadFile_bed (file_str, setSnps, W, indicator_idv,
- indicator_snp, snpInfo, maf_level,
- miss_level, hwe_level, r2_level,
- ns_test_tmp)==false) {
- error=true;
- }
- mindicator_snp.push_back(indicator_snp);
- msnpInfo.push_back(snpInfo);
- ns_test+=ns_test_tmp;
- ns_total+=indicator_snp.size();
-
- t++;
- }
-
- gsl_matrix_free(W);
-
- infile.close();
- infile.clear();
- }
-
- // Read genotype and phenotype file for multiple BIMBAM files.
- if (!file_mgeno.empty()) {
-
- // Annotation file before genotype file.
- if (!file_anno.empty() ) {
- if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp,
- mapRS2cM)==false) {
- error=true;
- }
- }
-
- // Phenotype file before genotype file.
- if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
- p_column)==false) {
- error=true;
- }
-
- // Post-process covariates and phenotypes, obtain ni_test,
- // save all useful covariates.
- ProcessCvtPhen();
-
- // Obtain covariate matrix.
- gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
- CopyCvt (W);
-
- igzstream infile (file_mgeno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error! fail to open mgeno file: "<<file_mgeno<<endl;
- return;
- }
-
- string file_name;
- size_t ns_test_tmp;
- while (!safeGetline(infile, file_name).eof()) {
- if (ReadFile_geno (file_name, setSnps, W, indicator_idv,
- indicator_snp, maf_level, miss_level,
- hwe_level, r2_level, mapRS2chr, mapRS2bp,
- mapRS2cM, snpInfo, ns_test_tmp)==false) {
- error=true;
- }
-
- mindicator_snp.push_back(indicator_snp);
- msnpInfo.push_back(snpInfo);
- ns_test+=ns_test_tmp;
- ns_total+=indicator_snp.size();
- }
-
- gsl_matrix_free(W);
-
- infile.close();
- infile.clear();
- }
-
- if (!file_gene.empty()) {
- if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
- p_column)==false) {error=true;}
-
- // Convert indicator_pheno to indicator_idv.
- int k=1;
- for (size_t i=0; i<indicator_pheno.size(); i++) {
- k=1;
- for (size_t j=0; j<indicator_pheno[i].size(); j++) {
- if (indicator_pheno[i][j]==0) {k=0;}
- }
- indicator_idv.push_back(k);
- }
-
- // Post-process covariates and phenotypes, obtain
- // ni_test, save all useful covariates.
- ProcessCvtPhen();
-
- // Obtain covariate matrix.
- gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
- CopyCvt (W);
-
- if (ReadFile_gene (file_gene, vec_read, snpInfo,
- ng_total)==false) {
- error=true;
- }
- }
-
- // Read is after gene file.
- if (!file_read.empty() ) {
- if (ReadFile_column (file_read, indicator_read,
- vec_read, 1)==false) {
- error=true;
- }
-
- ni_test=0;
- for (vector<int>::size_type i=0;
- i<(indicator_idv).size();
- ++i) {
- indicator_idv[i]*=indicator_read[i];
- ni_test+=indicator_idv[i];
- }
-
- if (ni_test==0) {
- error=true;
- cout<<"error! number of analyzed individuals equals 0. "<<
- endl;
- return;
- }
- }
-
- // For ridge prediction, read phenotype only.
- if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) {
- if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
- p_column)==false) {
- error=true;
- }
-
- // Post-process covariates and phenotypes, obtain
- // ni_test, save all useful covariates.
- ProcessCvtPhen();
- }
- return;
+void PARAM::ReadFiles(void) {
+ string file_str;
+
+ // Read cat file.
+ if (!file_mcat.empty()) {
+ if (ReadFile_mcat(file_mcat, mapRS2cat, n_vc) == false) {
+ error = true;
+ }
+ } else if (!file_cat.empty()) {
+ if (ReadFile_cat(file_cat, mapRS2cat, n_vc) == false) {
+ error = true;
+ }
+ }
+
+ // Read snp weight files.
+ if (!file_wcat.empty()) {
+ if (ReadFile_wsnp(file_wcat, n_vc, mapRS2wcat) == false) {
+ error = true;
+ }
+ }
+ if (!file_wsnp.empty()) {
+ if (ReadFile_wsnp(file_wsnp, mapRS2wsnp) == false) {
+ error = true;
+ }
+ }
+
+ // Count number of kinship files.
+ if (!file_mk.empty()) {
+ if (CountFileLines(file_mk, n_vc) == false) {
+ error = true;
+ }
+ }
+
+ // Read SNP set.
+ if (!file_snps.empty()) {
+ if (ReadFile_snps(file_snps, setSnps) == false) {
+ error = true;
+ }
+ } else {
+ setSnps.clear();
+ }
+
+ // For prediction.
+ if (!file_epm.empty()) {
+ if (ReadFile_est(file_epm, est_column, mapRS2est) == false) {
+ error = true;
+ }
+ if (!file_bfile.empty()) {
+ file_str = file_bfile + ".bim";
+ if (ReadFile_bim(file_str, snpInfo) == false) {
+ error = true;
+ }
+ file_str = file_bfile + ".fam";
+ if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, p_column) ==
+ false) {
+ error = true;
+ }
+ }
+
+ if (!file_geno.empty()) {
+ if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) ==
+ false) {
+ error = true;
+ }
+
+ if (CountFileLines(file_geno, ns_total) == false) {
+ error = true;
+ }
+ }
+
+ if (!file_ebv.empty()) {
+ if (ReadFile_column(file_ebv, indicator_bv, vec_bv, 1) == false) {
+ error = true;
+ }
+ }
+
+ if (!file_log.empty()) {
+ if (ReadFile_log(file_log, pheno_mean) == false) {
+ error = true;
+ }
+ }
+
+ // Convert indicator_pheno to indicator_idv.
+ int k = 1;
+ for (size_t i = 0; i < indicator_pheno.size(); i++) {
+ k = 1;
+ for (size_t j = 0; j < indicator_pheno[i].size(); j++) {
+ if (indicator_pheno[i][j] == 0) {
+ k = 0;
+ }
+ }
+ indicator_idv.push_back(k);
+ }
+
+ ns_test = 0;
+
+ return;
+ }
+
+ // Read covariates before the genotype files.
+ if (!file_cvt.empty()) {
+ if (ReadFile_cvt(file_cvt, indicator_cvt, cvt, n_cvt) == false) {
+ error = true;
+ }
+ if ((indicator_cvt).size() == 0) {
+ n_cvt = 1;
+ }
+ } else {
+ n_cvt = 1;
+ }
+
+ if (!file_gxe.empty()) {
+ if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) {
+ error = true;
+ }
+ }
+ if (!file_weight.empty()) {
+ if (ReadFile_column(file_weight, indicator_weight, weight, 1) == false) {
+ error = true;
+ }
+ }
+
+ // WJA added.
+ // Read genotype and phenotype file for bgen format.
+ if (!file_oxford.empty()) {
+ file_str = file_oxford + ".sample";
+ if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,
+ indicator_cvt, cvt, n_cvt) == false) {
+ error = true;
+ }
+ if ((indicator_cvt).size() == 0) {
+ n_cvt = 1;
+ }
+
+ // Post-process covariates and phenotypes, obtain
+ // ni_test, save all useful covariates.
+ ProcessCvtPhen();
+
+ // Obtain covariate matrix.
+ gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+ CopyCvt(W);
+
+ file_str = file_oxford + ".bgen";
+ if (ReadFile_bgen(file_str, setSnps, W, indicator_idv, indicator_snp,
+ snpInfo, maf_level, miss_level, hwe_level, r2_level,
+ ns_test) == false) {
+ error = true;
+ }
+ gsl_matrix_free(W);
+
+ ns_total = indicator_snp.size();
+ }
+
+ // Read genotype and phenotype file for PLINK format.
+ if (!file_bfile.empty()) {
+ file_str = file_bfile + ".bim";
+ snpInfo.clear();
+ if (ReadFile_bim(file_str, snpInfo) == false) {
+ error = true;
+ }
+
+ // If both fam file and pheno files are used, use
+ // phenotypes inside the pheno file.
+ if (!file_pheno.empty()) {
+
+ // Phenotype file before genotype file.
+ if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) ==
+ false) {
+ error = true;
+ }
+ } else {
+ file_str = file_bfile + ".fam";
+ if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, p_column) ==
+ false) {
+ error = true;
+ }
+ }
+
+ // Post-process covariates and phenotypes, obtain
+ // ni_test, save all useful covariates.
+ ProcessCvtPhen();
+
+ // Obtain covariate matrix.
+ gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+ CopyCvt(W);
+
+ file_str = file_bfile + ".bed";
+ if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp,
+ snpInfo, maf_level, miss_level, hwe_level, r2_level,
+ ns_test) == false) {
+ error = true;
+ }
+ gsl_matrix_free(W);
+ ns_total = indicator_snp.size();
+ }
+
+ // Read genotype and phenotype file for BIMBAM format.
+ if (!file_geno.empty()) {
+
+ // Annotation file before genotype file.
+ if (!file_anno.empty()) {
+ if (ReadFile_anno(file_anno, mapRS2chr, mapRS2bp, mapRS2cM) == false) {
+ error = true;
+ }
+ }
+
+ // Phenotype file before genotype file.
+ if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+ error = true;
+ }
+
+ // Post-process covariates and phenotypes, obtain
+ // ni_test, save all useful covariates.
+ ProcessCvtPhen();
+
+ // Obtain covariate matrix.
+ gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+ CopyCvt(W);
+
+ if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp,
+ maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
+ mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
+ error = true;
+ }
+ gsl_matrix_free(W);
+ ns_total = indicator_snp.size();
+ }
+
+ // Read genotype file for multiple PLINK files.
+ if (!file_mbfile.empty()) {
+ igzstream infile(file_mbfile.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open mbfile file: " << file_mbfile << endl;
+ return;
+ }
+
+ string file_name;
+ size_t t = 0, ns_test_tmp = 0;
+ gsl_matrix *W;
+ while (!safeGetline(infile, file_name).eof()) {
+ file_str = file_name + ".bim";
+
+ if (ReadFile_bim(file_str, snpInfo) == false) {
+ error = true;
+ }
+
+ if (t == 0) {
+
+ // If both fam file and pheno files are used, use
+ // phenotypes inside the pheno file.
+ if (!file_pheno.empty()) {
+
+ // Phenotype file before genotype file.
+ if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) ==
+ false) {
+ error = true;
+ }
+ } else {
+ file_str = file_name + ".fam";
+ if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num,
+ p_column) == false) {
+ error = true;
+ }
+ }
+
+ // Post-process covariates and phenotypes, obtain
+ // ni_test, save all useful covariates.
+ ProcessCvtPhen();
+
+ // Obtain covariate matrix.
+ W = gsl_matrix_alloc(ni_test, n_cvt);
+ CopyCvt(W);
+ }
+
+ file_str = file_name + ".bed";
+ if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp,
+ snpInfo, maf_level, miss_level, hwe_level, r2_level,
+ ns_test_tmp) == false) {
+ error = true;
+ }
+ mindicator_snp.push_back(indicator_snp);
+ msnpInfo.push_back(snpInfo);
+ ns_test += ns_test_tmp;
+ ns_total += indicator_snp.size();
+
+ t++;
+ }
+
+ gsl_matrix_free(W);
+
+ infile.close();
+ infile.clear();
+ }
+
+ // Read genotype and phenotype file for multiple BIMBAM files.
+ if (!file_mgeno.empty()) {
+
+ // Annotation file before genotype file.
+ if (!file_anno.empty()) {
+ if (ReadFile_anno(file_anno, mapRS2chr, mapRS2bp, mapRS2cM) == false) {
+ error = true;
+ }
+ }
+
+ // Phenotype file before genotype file.
+ if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+ error = true;
+ }
+
+ // Post-process covariates and phenotypes, obtain ni_test,
+ // save all useful covariates.
+ ProcessCvtPhen();
+
+ // Obtain covariate matrix.
+ gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+ CopyCvt(W);
+
+ igzstream infile(file_mgeno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error! fail to open mgeno file: " << file_mgeno << endl;
+ return;
+ }
+
+ string file_name;
+ size_t ns_test_tmp;
+ while (!safeGetline(infile, file_name).eof()) {
+ if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp,
+ maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
+ mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) {
+ error = true;
+ }
+
+ mindicator_snp.push_back(indicator_snp);
+ msnpInfo.push_back(snpInfo);
+ ns_test += ns_test_tmp;
+ ns_total += indicator_snp.size();
+ }
+
+ gsl_matrix_free(W);
+
+ infile.close();
+ infile.clear();
+ }
+
+ if (!file_gene.empty()) {
+ if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+ error = true;
+ }
+
+ // Convert indicator_pheno to indicator_idv.
+ int k = 1;
+ for (size_t i = 0; i < indicator_pheno.size(); i++) {
+ k = 1;
+ for (size_t j = 0; j < indicator_pheno[i].size(); j++) {
+ if (indicator_pheno[i][j] == 0) {
+ k = 0;
+ }
+ }
+ indicator_idv.push_back(k);
+ }
+
+ // Post-process covariates and phenotypes, obtain
+ // ni_test, save all useful covariates.
+ ProcessCvtPhen();
+
+ // Obtain covariate matrix.
+ gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+ CopyCvt(W);
+
+ if (ReadFile_gene(file_gene, vec_read, snpInfo, ng_total) == false) {
+ error = true;
+ }
+ }
+
+ // Read is after gene file.
+ if (!file_read.empty()) {
+ if (ReadFile_column(file_read, indicator_read, vec_read, 1) == false) {
+ error = true;
+ }
+
+ ni_test = 0;
+ for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+ indicator_idv[i] *= indicator_read[i];
+ ni_test += indicator_idv[i];
+ }
+
+ if (ni_test == 0) {
+ error = true;
+ cout << "error! number of analyzed individuals equals 0. " << endl;
+ return;
+ }
+ }
+
+ // For ridge prediction, read phenotype only.
+ if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) {
+ if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+ error = true;
+ }
+
+ // Post-process covariates and phenotypes, obtain
+ // ni_test, save all useful covariates.
+ ProcessCvtPhen();
+ }
+ return;
}
-void PARAM::CheckParam (void) {
- struct stat fileInfo;
- string str;
-
- // Check parameters.
- if (k_mode!=1 && k_mode!=2) {
- cout<<"error! unknown kinship/relatedness input mode: "<<
- k_mode<<endl;
- error=true;
- }
- if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5
- && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 &&
- a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 &&
- a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 &&
- a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 &&
- a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 &&
- a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 &&
- a_mode!=71) {
- cout<<"error! unknown analysis mode: "<<a_mode<<
- ". make sure -gk or -eigen or -lmm or -bslmm -predict or " <<
- "-calccov is sepcified correctly."<<endl;
- error=true;
- }
- if (miss_level>1) {
- cout<<"error! missing level needs to be between 0 and 1. " <<
- "current value = "<<miss_level<<endl;
- error=true;
- }
- if (maf_level>0.5) {
- cout<<"error! maf level needs to be between 0 and 0.5. " <<
- "current value = "<<maf_level<<endl;
- error=true;
- }
- if (hwe_level>1) {
- cout<<"error! hwe level needs to be between 0 and 1. " <<
- "current value = "<<hwe_level<<endl;
- error=true;
- }
- if (r2_level>1) {
- cout<<"error! r2 level needs to be between 0 and 1. " <<
- "current value = "<<r2_level<<endl;
- error=true;
- }
-
- if (l_max<l_min) {
- cout<<"error! maximum lambda value must be larger than the " <<
- "minimal value. current values = "<<l_max<<" and "<<l_min<<endl;
- error=true;
- }
- if (h_max<h_min) {
- cout<<"error! maximum h value must be larger than the minimal "<<
- "value. current values = "<<h_max<<" and "<<h_min<<endl;
- error=true;
- }
- if (s_max<s_min) {
- cout<<"error! maximum s value must be larger than the minimal "<<
- "value. current values = "<<s_max<<" and "<<s_min<<endl;
- error=true;
- }
- if (rho_max<rho_min) {
- cout<<"error! maximum rho value must be larger than the"<<
- "minimal value. current values = "<<rho_max<<" and "<<
- rho_min<<endl;
- error=true;
- }
- if (logp_max<logp_min) {
- cout<<"error! maximum logp value must be larger than the "<<
- "minimal value. current values = "<<logp_max/log(10)<<
- " and "<<logp_min/log(10)<<endl;
- error=true;
- }
-
- if (h_max>1) {
- cout<<"error! h values must be bewtween 0 and 1. current "<<
- "values = "<<h_max<<" and "<<h_min<<endl;
- error=true;
- }
- if (rho_max>1) {
- cout<<"error! rho values must be between 0 and 1. current "<<
- "values = "<<rho_max<<" and "<<rho_min<<endl;
- error=true;
- }
- if (logp_max>0) {
- cout<<"error! maximum logp value must be smaller than 0. "<<
- "current values = "<<logp_max/log(10)<<" and "<<
- logp_min/log(10)<<endl;
- error=true;
- }
- if (l_max<l_min) {
- cout<<"error! maximum lambda value must be larger than the "<<
- "minimal value. current values = "<<l_max<<" and "<<l_min<<endl;
- error=true;
- }
-
- if (h_scale>1.0) {
- cout<<"error! hscale value must be between 0 and 1. "<<
- "current value = "<<h_scale<<endl;
- error=true;
- }
- if (rho_scale>1.0) {
- cout<<"error! rscale value must be between 0 and 1. "<<
- "current value = "<<rho_scale<<endl;
- error=true;
- }
- if (logp_scale>1.0) {
- cout<<"error! pscale value must be between 0 and 1. "<<
- "current value = "<<logp_scale<<endl;
- error=true;
- }
-
- if (rho_max==1 && rho_min==1 && a_mode==12) {
- cout<<"error! ridge regression does not support a rho "<<
- "parameter. current values = "<<rho_max<<" and "<<rho_min<<endl;
- error=true;
- }
-
- if (window_cm<0) {
- cout<<"error! windowcm values must be non-negative. "<<
- "current values = "<<window_cm<<endl;
- error=true;
- }
-
- if (window_cm==0 && window_bp==0 && window_ns==0) {
- window_bp=1000000;
- }
-
- // Check p_column, and (no need to) sort p_column into
- // ascending order.
- if (p_column.size()==0) {
- p_column.push_back(1);
- } else {
- for (size_t i=0; i<p_column.size(); i++) {
- for (size_t j=0; j<i; j++) {
- if (p_column[i]==p_column[j]) {
- cout<<"error! identical phenotype "<<
- "columns: "<<p_column[i]<<endl;
- error=
- true;}
- }
- }
- }
-
- n_ph=p_column.size();
-
- // Only LMM option (and one prediction option) can deal with
- // multiple phenotypes and no gene expression files.
- if (n_ph>1 && a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 &&
- a_mode!=43) {
- cout<<"error! the current analysis mode "<<a_mode<<
- " can not deal with multiple phenotypes."<<endl;
- error=true;
- }
- if (n_ph>1 && !file_gene.empty() ) {
- cout<<"error! multiple phenotype analysis option not "<<
- "allowed with gene expression files. "<<endl;
- error=true;
- }
-
- if (p_nr>1) {
- cout<<"error! pnr value must be between 0 and 1. current value = "<<
- p_nr<<endl;
- error=true;
- }
-
- //check est_column
- if (est_column.size()==0) {
- if (file_ebv.empty()) {
- est_column.push_back(2);
- est_column.push_back(5);
- est_column.push_back(6);
- est_column.push_back(7);
- } else {
- est_column.push_back(2);
- est_column.push_back(0);
- est_column.push_back(6);
- est_column.push_back(7);
- }
- }
-
- if (est_column.size()!=4) {
- cout<<"error! -en not followed by four numbers. current number = "<<
- est_column.size()<<endl;
- error=true;
- }
- if (est_column[0]==0) {
- cout<<"error! -en rs column can not be zero. current number = "<<
- est_column.size()<<endl;
- error=true;
- }
-
- // Check if files are compatible with each other, and if files exist.
- if (!file_bfile.empty()) {
- str=file_bfile+".bim";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .bim file: "<<str<<endl;
- error=true;
- }
- str=file_bfile+".bed";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .bed file: "<<str<<endl;
- error=true;
- }
- str=file_bfile+".fam";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .fam file: "<<str<<endl;
- error=true;
- }
- }
-
- if (!file_oxford.empty()) {
- str=file_oxford+".bgen";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .bgen file: "<<str<<endl;
- error=true;
- }
- str=file_oxford+".sample";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .sample file: "<<str<<endl;
- error=true;
- }
- }
-
- if ((!file_geno.empty() || !file_gene.empty()) ) {
- str=file_pheno;
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open phenotype file: "<<str<<endl;
- error=true;
- }
- }
-
- str=file_geno;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open mean genotype file: "<<str<<endl;
- error=true;
- }
-
- str=file_gene;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open gene expression file: "<<str<<endl;
- error=true;
- }
-
- str=file_cat;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open category file: "<<str<<endl;
- error=true;
- }
-
- str=file_mcat;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open mcategory file: "<<str<<endl;
- error=true;
- }
-
- str=file_beta;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open beta file: "<<str<<endl;
- error=true;
- }
-
- str=file_cor;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open correlation file: "<<str<<endl;
- error=true;
- }
-
- if (!file_study.empty()) {
- str=file_study+".Vq.txt";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .Vq.txt file: "<<str<<endl;
- error=true;
- }
- str=file_study+".q.txt";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .q.txt file: "<<str<<endl;
- error=true;
- }
- str=file_study+".size.txt";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .size.txt file: "<<str<<endl;
- error=true;
- }
- }
-
- if (!file_ref.empty()) {
- str=file_ref+".S.txt";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .S.txt file: "<<str<<endl;
- error=true;
- }
- str=file_ref+".size.txt";
- if (stat(str.c_str(),&fileInfo)==-1) {
- cout<<"error! fail to open .size.txt file: "<<str<<endl;
- error=true;
- }
- }
-
- str=file_mstudy;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open mstudy file: "<<str<<endl;
- error=true;
- }
-
- str=file_mref;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open mref file: "<<str<<endl;
- error=true;
- }
-
- str=file_mgeno;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open mgeno file: "<<str<<endl;
- error=true;
- }
-
- str=file_mbfile;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open mbfile file: "<<str<<endl;
- error=true;
- }
-
- size_t flag=0;
- if (!file_bfile.empty()) {flag++;}
- if (!file_geno.empty()) {flag++;}
- if (!file_gene.empty()) {flag++;}
-
- // WJA added.
- if (!file_oxford.empty()) {flag++;}
-
- if (flag!=1 && a_mode!=15 && a_mode!=27 && a_mode!=28 &&
- a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62 &&
- a_mode!=63 && a_mode!=66 && a_mode!=67) {
- cout<<"error! either plink binary files, or bimbam mean"<<
- "genotype files, or gene expression files are required."<<endl;
- error=true;
- }
-
- if (file_pheno.empty() && (a_mode==43 || a_mode==5) ) {
- cout<<"error! phenotype file is required."<<endl; error=true;
- }
-
- if (a_mode==61 || a_mode==62) {
- if (!file_beta.empty()) {
- if ( file_mbfile.empty() && file_bfile.empty() &&
- file_mgeno.empty() && file_geno.empty() &&
- file_mref.empty() && file_ref.empty() ) {
- cout<<"error! missing genotype file or ref/mref file."<<endl;
- error=true;
- }
- } else if (!file_pheno.empty()) {
- if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) &&
- file_mk.empty() ) {
- cout<<"error! missing relatedness file. "<<endl; error=true;
- }
- } else if ( (file_mstudy.empty() && file_study.empty()) ||
- (file_mref.empty() && file_ref.empty() ) ) {
- cout<<"error! either beta file, or phenotype files or "<<
- "study/ref mstudy/mref files are required."<<endl;
- error=true;
- }
- }
-
-
- if (a_mode==63) {
- if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) &&
- file_mk.empty() ) {
- cout<<"error! missing relatedness file. "<<endl; error=true;
- }
- if ( file_pheno.empty() ) {
- cout<<"error! missing phenotype file."<<endl; error=true;
- }
- }
-
- if (a_mode==66 || a_mode==67) {
- if (file_beta.empty() ||
- (file_mbfile.empty() && file_bfile.empty() &&
- file_mgeno.empty() && file_geno.empty()) ) {
- cout<<"error! missing beta file or genotype file."<<endl;
- error=true;
- }
- }
-
-
- if (!file_epm.empty() && file_bfile.empty() && file_geno.empty()) {
- cout<<"error! estimated parameter file also requires genotype "<<
- "file."<<endl;
- error=true;
- }
- if (!file_ebv.empty() && file_kin.empty()) {
- cout<<"error! estimated breeding value file also requires "<<
- "relatedness file."<<endl;
- error=true;
- }
-
- if (!file_log.empty() && pheno_mean!=0) {
- cout<<"error! either log file or mu value can be provide."<<endl;
- error=true;
- }
-
- str=file_snps;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open snps file: "<<str<<endl;
- error=true;
- }
-
- str=file_log;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open log file: "<<str<<endl;
- error=true;
- }
-
- str=file_anno;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open annotation file: "<<str<<endl;
- error=true;
- }
-
- str=file_kin;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open relatedness matrix file: "<<str<<endl;
- error=true;
- }
-
- str=file_mk;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open relatedness matrix file: "<<str<<endl;
- error=true;
- }
-
- str=file_cvt;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open covariates file: "<<str<<endl;
- error=true;
- }
-
- str=file_gxe;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open environmental covariate file: "<<
- str<<endl;
- error=true;
- }
-
- str=file_weight;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open the residual weight file: "<<str<<endl;
- error=true;
- }
-
- str=file_epm;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open estimated parameter file: "<<str<<endl;
- error=true;
- }
-
- str=file_ebv;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open estimated breeding value file: "<<
- str<<endl;
- error=true;
- }
-
- str=file_read;
- if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {
- cout<<"error! fail to open total read file: "<<str<<endl;
- error=true;
- }
-
- // Check if files are compatible with analysis mode.
- if (k_mode==2 && !file_geno.empty() ) {
- cout<<"error! use \"-km 1\" when using bimbam mean genotype "<<
- "file. "<<endl;
- error=true;
- }
-
- if ((a_mode==1 || a_mode==2 || a_mode==3 || a_mode==4 ||
- a_mode==5 || a_mode==31) &&
- (file_kin.empty() && (file_ku.empty()||file_kd.empty()))) {
- cout<<"error! missing relatedness file. "<<endl;
- error=true;
- }
-
- if ((a_mode==43) && file_kin.empty()) {
- cout<<"error! missing relatedness file. -predict option requires "<<
- "-k option to provide a relatedness file."<<endl;
- error=true;
- }
-
- if ((a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14 ||
- a_mode==16) && !file_cvt.empty()) {
- cout<<"error! -bslmm option does not support covariates files."<<
- endl;
- error=true;
- }
-
- if (a_mode==41 || a_mode==42) {
- if (!file_cvt.empty() ) {
- cout<<"error! -predict option does not support "<<
- "covariates files."<<endl;
- error=true;
- }
- if (file_epm.empty() ) {
- cout<<"error! -predict option requires estimated "<<
- "parameter files."<<endl;
- error=true;
- }
- }
-
- if (file_beta.empty() && (a_mode==27 || a_mode==28) ) {
- cout<<"error! beta effects file is required."<<endl;
- error=true;
- }
-
- return;
+void PARAM::CheckParam(void) {
+ struct stat fileInfo;
+ string str;
+
+ // Check parameters.
+ if (k_mode != 1 && k_mode != 2) {
+ cout << "error! unknown kinship/relatedness input mode: " << k_mode << endl;
+ error = true;
+ }
+ if (a_mode != 1 && a_mode != 2 && a_mode != 3 && a_mode != 4 && a_mode != 5 &&
+ a_mode != 11 && a_mode != 12 && a_mode != 13 && a_mode != 14 &&
+ a_mode != 15 && a_mode != 21 && a_mode != 22 && a_mode != 25 &&
+ a_mode != 26 && a_mode != 27 && a_mode != 28 && a_mode != 31 &&
+ a_mode != 41 && a_mode != 42 && a_mode != 43 && a_mode != 51 &&
+ a_mode != 52 && a_mode != 53 && a_mode != 54 && a_mode != 61 &&
+ a_mode != 62 && a_mode != 63 && a_mode != 66 && a_mode != 67 &&
+ a_mode != 71) {
+ cout << "error! unknown analysis mode: " << a_mode
+ << ". make sure -gk or -eigen or -lmm or -bslmm -predict or "
+ << "-calccov is sepcified correctly." << endl;
+ error = true;
+ }
+ if (miss_level > 1) {
+ cout << "error! missing level needs to be between 0 and 1. "
+ << "current value = " << miss_level << endl;
+ error = true;
+ }
+ if (maf_level > 0.5) {
+ cout << "error! maf level needs to be between 0 and 0.5. "
+ << "current value = " << maf_level << endl;
+ error = true;
+ }
+ if (hwe_level > 1) {
+ cout << "error! hwe level needs to be between 0 and 1. "
+ << "current value = " << hwe_level << endl;
+ error = true;
+ }
+ if (r2_level > 1) {
+ cout << "error! r2 level needs to be between 0 and 1. "
+ << "current value = " << r2_level << endl;
+ error = true;
+ }
+
+ if (l_max < l_min) {
+ cout << "error! maximum lambda value must be larger than the "
+ << "minimal value. current values = " << l_max << " and " << l_min
+ << endl;
+ error = true;
+ }
+ if (h_max < h_min) {
+ cout << "error! maximum h value must be larger than the minimal "
+ << "value. current values = " << h_max << " and " << h_min << endl;
+ error = true;
+ }
+ if (s_max < s_min) {
+ cout << "error! maximum s value must be larger than the minimal "
+ << "value. current values = " << s_max << " and " << s_min << endl;
+ error = true;
+ }
+ if (rho_max < rho_min) {
+ cout << "error! maximum rho value must be larger than the"
+ << "minimal value. current values = " << rho_max << " and " << rho_min
+ << endl;
+ error = true;
+ }
+ if (logp_max < logp_min) {
+ cout << "error! maximum logp value must be larger than the "
+ << "minimal value. current values = " << logp_max / log(10) << " and "
+ << logp_min / log(10) << endl;
+ error = true;
+ }
+
+ if (h_max > 1) {
+ cout << "error! h values must be bewtween 0 and 1. current "
+ << "values = " << h_max << " and " << h_min << endl;
+ error = true;
+ }
+ if (rho_max > 1) {
+ cout << "error! rho values must be between 0 and 1. current "
+ << "values = " << rho_max << " and " << rho_min << endl;
+ error = true;
+ }
+ if (logp_max > 0) {
+ cout << "error! maximum logp value must be smaller than 0. "
+ << "current values = " << logp_max / log(10) << " and "
+ << logp_min / log(10) << endl;
+ error = true;
+ }
+ if (l_max < l_min) {
+ cout << "error! maximum lambda value must be larger than the "
+ << "minimal value. current values = " << l_max << " and " << l_min
+ << endl;
+ error = true;
+ }
+
+ if (h_scale > 1.0) {
+ cout << "error! hscale value must be between 0 and 1. "
+ << "current value = " << h_scale << endl;
+ error = true;
+ }
+ if (rho_scale > 1.0) {
+ cout << "error! rscale value must be between 0 and 1. "
+ << "current value = " << rho_scale << endl;
+ error = true;
+ }
+ if (logp_scale > 1.0) {
+ cout << "error! pscale value must be between 0 and 1. "
+ << "current value = " << logp_scale << endl;
+ error = true;
+ }
+
+ if (rho_max == 1 && rho_min == 1 && a_mode == 12) {
+ cout << "error! ridge regression does not support a rho "
+ << "parameter. current values = " << rho_max << " and " << rho_min
+ << endl;
+ error = true;
+ }
+
+ if (window_cm < 0) {
+ cout << "error! windowcm values must be non-negative. "
+ << "current values = " << window_cm << endl;
+ error = true;
+ }
+
+ if (window_cm == 0 && window_bp == 0 && window_ns == 0) {
+ window_bp = 1000000;
+ }
+
+ // Check p_column, and (no need to) sort p_column into
+ // ascending order.
+ if (p_column.size() == 0) {
+ p_column.push_back(1);
+ } else {
+ for (size_t i = 0; i < p_column.size(); i++) {
+ for (size_t j = 0; j < i; j++) {
+ if (p_column[i] == p_column[j]) {
+ cout << "error! identical phenotype "
+ << "columns: " << p_column[i] << endl;
+ error = true;
+ }
+ }
+ }
+ }
+
+ n_ph = p_column.size();
+
+ // Only LMM option (and one prediction option) can deal with
+ // multiple phenotypes and no gene expression files.
+ if (n_ph > 1 && a_mode != 1 && a_mode != 2 && a_mode != 3 && a_mode != 4 &&
+ a_mode != 43) {
+ cout << "error! the current analysis mode " << a_mode
+ << " can not deal with multiple phenotypes." << endl;
+ error = true;
+ }
+ if (n_ph > 1 && !file_gene.empty()) {
+ cout << "error! multiple phenotype analysis option not "
+ << "allowed with gene expression files. " << endl;
+ error = true;
+ }
+
+ if (p_nr > 1) {
+ cout << "error! pnr value must be between 0 and 1. current value = " << p_nr
+ << endl;
+ error = true;
+ }
+
+ // check est_column
+ if (est_column.size() == 0) {
+ if (file_ebv.empty()) {
+ est_column.push_back(2);
+ est_column.push_back(5);
+ est_column.push_back(6);
+ est_column.push_back(7);
+ } else {
+ est_column.push_back(2);
+ est_column.push_back(0);
+ est_column.push_back(6);
+ est_column.push_back(7);
+ }
+ }
+
+ if (est_column.size() != 4) {
+ cout << "error! -en not followed by four numbers. current number = "
+ << est_column.size() << endl;
+ error = true;
+ }
+ if (est_column[0] == 0) {
+ cout << "error! -en rs column can not be zero. current number = "
+ << est_column.size() << endl;
+ error = true;
+ }
+
+ // Check if files are compatible with each other, and if files exist.
+ if (!file_bfile.empty()) {
+ str = file_bfile + ".bim";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .bim file: " << str << endl;
+ error = true;
+ }
+ str = file_bfile + ".bed";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .bed file: " << str << endl;
+ error = true;
+ }
+ str = file_bfile + ".fam";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .fam file: " << str << endl;
+ error = true;
+ }
+ }
+
+ if (!file_oxford.empty()) {
+ str = file_oxford + ".bgen";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .bgen file: " << str << endl;
+ error = true;
+ }
+ str = file_oxford + ".sample";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .sample file: " << str << endl;
+ error = true;
+ }
+ }
+
+ if ((!file_geno.empty() || !file_gene.empty())) {
+ str = file_pheno;
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open phenotype file: " << str << endl;
+ error = true;
+ }
+ }
+
+ str = file_geno;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open mean genotype file: " << str << endl;
+ error = true;
+ }
+
+ str = file_gene;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open gene expression file: " << str << endl;
+ error = true;
+ }
+
+ str = file_cat;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open category file: " << str << endl;
+ error = true;
+ }
+
+ str = file_mcat;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open mcategory file: " << str << endl;
+ error = true;
+ }
+
+ str = file_beta;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open beta file: " << str << endl;
+ error = true;
+ }
+
+ str = file_cor;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open correlation file: " << str << endl;
+ error = true;
+ }
+
+ if (!file_study.empty()) {
+ str = file_study + ".Vq.txt";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .Vq.txt file: " << str << endl;
+ error = true;
+ }
+ str = file_study + ".q.txt";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .q.txt file: " << str << endl;
+ error = true;
+ }
+ str = file_study + ".size.txt";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .size.txt file: " << str << endl;
+ error = true;
+ }
+ }
+
+ if (!file_ref.empty()) {
+ str = file_ref + ".S.txt";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .S.txt file: " << str << endl;
+ error = true;
+ }
+ str = file_ref + ".size.txt";
+ if (stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open .size.txt file: " << str << endl;
+ error = true;
+ }
+ }
+
+ str = file_mstudy;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open mstudy file: " << str << endl;
+ error = true;
+ }
+
+ str = file_mref;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open mref file: " << str << endl;
+ error = true;
+ }
+
+ str = file_mgeno;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open mgeno file: " << str << endl;
+ error = true;
+ }
+
+ str = file_mbfile;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open mbfile file: " << str << endl;
+ error = true;
+ }
+
+ size_t flag = 0;
+ if (!file_bfile.empty()) {
+ flag++;
+ }
+ if (!file_geno.empty()) {
+ flag++;
+ }
+ if (!file_gene.empty()) {
+ flag++;
+ }
+
+ // WJA added.
+ if (!file_oxford.empty()) {
+ flag++;
+ }
+
+ if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 &&
+ a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 &&
+ a_mode != 63 && a_mode != 66 && a_mode != 67) {
+ cout << "error! either plink binary files, or bimbam mean"
+ << "genotype files, or gene expression files are required." << endl;
+ error = true;
+ }
+
+ if (file_pheno.empty() && (a_mode == 43 || a_mode == 5)) {
+ cout << "error! phenotype file is required." << endl;
+ error = true;
+ }
+
+ if (a_mode == 61 || a_mode == 62) {
+ if (!file_beta.empty()) {
+ if (file_mbfile.empty() && file_bfile.empty() && file_mgeno.empty() &&
+ file_geno.empty() && file_mref.empty() && file_ref.empty()) {
+ cout << "error! missing genotype file or ref/mref file." << endl;
+ error = true;
+ }
+ } else if (!file_pheno.empty()) {
+ if (file_kin.empty() && (file_ku.empty() || file_kd.empty()) &&
+ file_mk.empty()) {
+ cout << "error! missing relatedness file. " << endl;
+ error = true;
+ }
+ } else if ((file_mstudy.empty() && file_study.empty()) ||
+ (file_mref.empty() && file_ref.empty())) {
+ cout << "error! either beta file, or phenotype files or "
+ << "study/ref mstudy/mref files are required." << endl;
+ error = true;
+ }
+ }
+
+ if (a_mode == 63) {
+ if (file_kin.empty() && (file_ku.empty() || file_kd.empty()) &&
+ file_mk.empty()) {
+ cout << "error! missing relatedness file. " << endl;
+ error = true;
+ }
+ if (file_pheno.empty()) {
+ cout << "error! missing phenotype file." << endl;
+ error = true;
+ }
+ }
+
+ if (a_mode == 66 || a_mode == 67) {
+ if (file_beta.empty() || (file_mbfile.empty() && file_bfile.empty() &&
+ file_mgeno.empty() && file_geno.empty())) {
+ cout << "error! missing beta file or genotype file." << endl;
+ error = true;
+ }
+ }
+
+ if (!file_epm.empty() && file_bfile.empty() && file_geno.empty()) {
+ cout << "error! estimated parameter file also requires genotype "
+ << "file." << endl;
+ error = true;
+ }
+ if (!file_ebv.empty() && file_kin.empty()) {
+ cout << "error! estimated breeding value file also requires "
+ << "relatedness file." << endl;
+ error = true;
+ }
+
+ if (!file_log.empty() && pheno_mean != 0) {
+ cout << "error! either log file or mu value can be provide." << endl;
+ error = true;
+ }
+
+ str = file_snps;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open snps file: " << str << endl;
+ error = true;
+ }
+
+ str = file_log;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open log file: " << str << endl;
+ error = true;
+ }
+
+ str = file_anno;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open annotation file: " << str << endl;
+ error = true;
+ }
+
+ str = file_kin;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open relatedness matrix file: " << str << endl;
+ error = true;
+ }
+
+ str = file_mk;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open relatedness matrix file: " << str << endl;
+ error = true;
+ }
+
+ str = file_cvt;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open covariates file: " << str << endl;
+ error = true;
+ }
+
+ str = file_gxe;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open environmental covariate file: " << str << endl;
+ error = true;
+ }
+
+ str = file_weight;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open the residual weight file: " << str << endl;
+ error = true;
+ }
+
+ str = file_epm;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open estimated parameter file: " << str << endl;
+ error = true;
+ }
+
+ str = file_ebv;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open estimated breeding value file: " << str
+ << endl;
+ error = true;
+ }
+
+ str = file_read;
+ if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
+ cout << "error! fail to open total read file: " << str << endl;
+ error = true;
+ }
+
+ // Check if files are compatible with analysis mode.
+ if (k_mode == 2 && !file_geno.empty()) {
+ cout << "error! use \"-km 1\" when using bimbam mean genotype "
+ << "file. " << endl;
+ error = true;
+ }
+
+ if ((a_mode == 1 || a_mode == 2 || a_mode == 3 || a_mode == 4 ||
+ a_mode == 5 || a_mode == 31) &&
+ (file_kin.empty() && (file_ku.empty() || file_kd.empty()))) {
+ cout << "error! missing relatedness file. " << endl;
+ error = true;
+ }
+
+ if ((a_mode == 43) && file_kin.empty()) {
+ cout << "error! missing relatedness file. -predict option requires "
+ << "-k option to provide a relatedness file." << endl;
+ error = true;
+ }
+
+ if ((a_mode == 11 || a_mode == 12 || a_mode == 13 || a_mode == 14 ||
+ a_mode == 16) &&
+ !file_cvt.empty()) {
+ cout << "error! -bslmm option does not support covariates files." << endl;
+ error = true;
+ }
+
+ if (a_mode == 41 || a_mode == 42) {
+ if (!file_cvt.empty()) {
+ cout << "error! -predict option does not support "
+ << "covariates files." << endl;
+ error = true;
+ }
+ if (file_epm.empty()) {
+ cout << "error! -predict option requires estimated "
+ << "parameter files." << endl;
+ error = true;
+ }
+ }
+
+ if (file_beta.empty() && (a_mode == 27 || a_mode == 28)) {
+ cout << "error! beta effects file is required." << endl;
+ error = true;
+ }
+
+ return;
}
-void PARAM::CheckData (void) {
+void PARAM::CheckData(void) {
// WJA NOTE: I added this condition so that covariates can be added
// through sample, probably not exactly what is wanted.
- if(file_oxford.empty())
- {
- if ((file_cvt).empty() || (indicator_cvt).size()==0) {
- n_cvt=1;
- }
- }
-
- if ( (a_mode==66 || a_mode==67) && (v_pve.size()!=n_vc)) {
- cout<<"error! the number of pve estimates does not equal to "<<
- "the number of categories in the cat file:"<<v_pve.size()<<" "<<
- n_vc<<endl;
- error=true;
- }
-
- if ( (indicator_cvt).size()!=0 &&
- (indicator_cvt).size()!=(indicator_idv).size()) {
- error=true;
- cout << "error! number of rows in the covariates file do not "<<
- "match the number of individuals. "<<endl;
+ if (file_oxford.empty()) {
+ if ((file_cvt).empty() || (indicator_cvt).size() == 0) {
+ n_cvt = 1;
+ }
+ }
+
+ if ((a_mode == 66 || a_mode == 67) && (v_pve.size() != n_vc)) {
+ cout << "error! the number of pve estimates does not equal to "
+ << "the number of categories in the cat file:" << v_pve.size() << " "
+ << n_vc << endl;
+ error = true;
+ }
+
+ if ((indicator_cvt).size() != 0 &&
+ (indicator_cvt).size() != (indicator_idv).size()) {
+ error = true;
+ cout << "error! number of rows in the covariates file do not "
+ << "match the number of individuals. " << endl;
return;
}
- if ( (indicator_gxe).size()!=0 && (indicator_gxe).size() !=
- (indicator_idv).size()) {
- error=true;
- cout<<"error! number of rows in the gxe file do not match the number "<<
- "of individuals. "<<endl;
+ if ((indicator_gxe).size() != 0 &&
+ (indicator_gxe).size() != (indicator_idv).size()) {
+ error = true;
+ cout << "error! number of rows in the gxe file do not match the number "
+ << "of individuals. " << endl;
return;
}
- if ( (indicator_weight).size()!=0 &&
- (indicator_weight).size()!=(indicator_idv).size()) {
- error=true;
- cout<<"error! number of rows in the weight file do not match "<<
- "the number of individuals. "<<endl;
+ if ((indicator_weight).size() != 0 &&
+ (indicator_weight).size() != (indicator_idv).size()) {
+ error = true;
+ cout << "error! number of rows in the weight file do not match "
+ << "the number of individuals. " << endl;
return;
}
- if ( (indicator_read).size()!=0 &&
- (indicator_read).size()!=(indicator_idv).size()) {
- error=true;
- cout<<"error! number of rows in the total read file do not "<<
- "match the number of individuals. "<<endl;
+ if ((indicator_read).size() != 0 &&
+ (indicator_read).size() != (indicator_idv).size()) {
+ error = true;
+ cout << "error! number of rows in the total read file do not "
+ << "match the number of individuals. " << endl;
return;
}
- // Calculate ni_total and ni_test, and set indicator_idv to 0
- // whenever indicator_cvt=0, and calculate np_obs and np_miss.
- ni_total=(indicator_idv).size();
-
- ni_test=0;
- for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
- if (indicator_idv[i]==0) {continue;}
- ni_test++;
- }
-
- ni_cvt=0;
- for (size_t i=0; i<indicator_cvt.size(); i++) {
- if (indicator_cvt[i]==0) {continue;}
- ni_cvt++;
- }
-
- np_obs=0; np_miss=0;
- for (size_t i=0; i<indicator_pheno.size(); i++) {
- if (indicator_cvt.size()!=0) {
- if (indicator_cvt[i]==0) {continue;}
- }
-
- if (indicator_gxe.size()!=0) {
- if (indicator_gxe[i]==0) {continue;}
- }
-
- if (indicator_weight.size()!=0) {
- if (indicator_weight[i]==0) {continue;}
- }
-
- for (size_t j=0; j<indicator_pheno[i].size(); j++) {
- if (indicator_pheno[i][j]==0) {
- np_miss++;
- } else {
- np_obs++;
- }
- }
- }
-
- if (ni_test==0 && file_cor.empty() && file_mstudy.empty() &&
- file_study.empty() && file_beta.empty() && file_bf.empty() ) {
- error=true;
- cout<<"error! number of analyzed individuals equals 0. "<<endl;
- return;
- }
-
- if (a_mode==43) {
- if (ni_cvt==ni_test) {
- error=true;
- cout<<"error! no individual has missing "<<
- "phenotypes."<<endl;
- return;
- }
- if ((np_obs+np_miss)!=(ni_cvt*n_ph)) {
- error=true;
- cout<<"error! number of phenotypes do not match the "<<
- "summation of missing and observed phenotypes."<<
- endl;
- return;
- }
- }
-
- // Output some information.
- if (file_cor.empty() && file_mstudy.empty() && file_study.empty() &&
- a_mode!=15 && a_mode!=27 && a_mode!=28) {
- cout<<"## number of total individuals = "<<ni_total<<endl;
- if (a_mode==43) {
- cout<<"## number of analyzed individuals = "<<ni_cvt<<endl;
- cout<<"## number of individuals with full phenotypes = "<<
- ni_test<<endl;
- } else {
- cout<<"## number of analyzed individuals = "<<ni_test<<endl;
- }
- cout<<"## number of covariates = "<<n_cvt<<endl;
- cout<<"## number of phenotypes = "<<n_ph<<endl;
- if (a_mode==43) {
- cout<<"## number of observed data = "<<np_obs<<endl;
- cout<<"## number of missing data = "<<np_miss<<endl;
- }
- if (!file_gene.empty()) {
- cout<<"## number of total genes = "<<ng_total<<endl;
- } else if (file_epm.empty() && a_mode!=43 && a_mode!=5) {
- cout<<"## number of total SNPs = "<<ns_total<<endl;
- cout<<"## number of analyzed SNPs = "<<ns_test<<endl;
- } else {}
- }
-
- // Set d_pace to 1000 for gene expression.
- if (!file_gene.empty() && d_pace==100000) {
- d_pace=1000;
- }
-
- // For case-control studies, count # cases and # controls.
- int flag_cc=0;
- if (a_mode==13) {
- ni_case=0;
- ni_control=0;
- for (size_t i=0; i<indicator_idv.size(); i++) {
- if (indicator_idv[i]==0) {continue;}
-
- if (pheno[i][0]==0) {ni_control++;}
- else if (pheno[i][0]==1) {ni_case++;}
- else {flag_cc=1;}
- }
- cout<<"## number of cases = "<<ni_case<<endl;
- cout<<"## number of controls = "<<ni_control<<endl;
- }
-
- if (flag_cc==1) {cout<<"Unexpected non-binary phenotypes for "<<
- "case/control analysis. Use default (BSLMM) analysis instead."<<
- endl;
- a_mode=11;
- }
-
- // Set parameters for BSLMM and check for predict.
- if (a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14) {
- if (a_mode==11) {
- n_mh=1;
- }
- if (logp_min==0) {
- logp_min=-1.0*log((double)ns_test);
- }
-
- if (h_scale==-1) {
- h_scale=min(1.0, 10.0/sqrt((double)ni_test) );
- }
- if (rho_scale==-1) {
- rho_scale=min(1.0, 10.0/sqrt((double)ni_test) );
- }
- if (logp_scale==-1) {
- logp_scale=min(1.0, 5.0/sqrt((double)ni_test) );
- }
-
- if (h_min==-1) {h_min=0.0;}
- if (h_max==-1) {h_max=1.0;}
-
- if (s_max>ns_test) {
- s_max=ns_test;
- cout<<"s_max is re-set to the number of analyzed SNPs."<<
- endl;
- }
- if (s_max<s_min) {
- cout<<"error! maximum s value must be larger than the "<<
- "minimal value. current values = "<<s_max<<" and "<<
- s_min<<endl;
- error=true;
- }
- } else if (a_mode==41 || a_mode==42) {
- if (indicator_bv.size()!=0) {
- if (indicator_idv.size()!=indicator_bv.size()) {
- cout<<"error! number of rows in the "<<
- "phenotype file does not match that in the "<<
- "estimated breeding value file: "<<
- indicator_idv.size()<<"\t"<<indicator_bv.size()<<
- endl;
- error=true;
- } else {
- size_t flag_bv=0;
- for (size_t i=0; i<(indicator_bv).size(); ++i) {
- if (indicator_idv[i]!=indicator_bv[i]) {flag_bv++;}
- }
- if (flag_bv!=0) {
- cout<<"error! individuals with missing value in the "<<
- "phenotype file does not match that in the "<<
- "estimated breeding value file: "<<flag_bv<<endl;
- error=true;
- }
- }
- }
- }
-
- if (a_mode==62 && !file_beta.empty() && mapRS2wcat.size()==0) {
- cout<<"vc analysis with beta files requires -wcat file."<<endl;
- error=true;
- }
- if (a_mode==67 && mapRS2wcat.size()==0) {
- cout<<"ci analysis with beta files requires -wcat file."<<endl;
- error=true;
- }
-
- // File_mk needs to contain more than one line.
- if (n_vc==1 && !file_mk.empty()) {
- cout<<"error! -mk file should contain more than one line."<<endl;
- error=true;
- }
-
- return;
-}
+ // Calculate ni_total and ni_test, and set indicator_idv to 0
+ // whenever indicator_cvt=0, and calculate np_obs and np_miss.
+ ni_total = (indicator_idv).size();
+
+ ni_test = 0;
+ for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ ni_test++;
+ }
+
+ ni_cvt = 0;
+ for (size_t i = 0; i < indicator_cvt.size(); i++) {
+ if (indicator_cvt[i] == 0) {
+ continue;
+ }
+ ni_cvt++;
+ }
+
+ np_obs = 0;
+ np_miss = 0;
+ for (size_t i = 0; i < indicator_pheno.size(); i++) {
+ if (indicator_cvt.size() != 0) {
+ if (indicator_cvt[i] == 0) {
+ continue;
+ }
+ }
+
+ if (indicator_gxe.size() != 0) {
+ if (indicator_gxe[i] == 0) {
+ continue;
+ }
+ }
+
+ if (indicator_weight.size() != 0) {
+ if (indicator_weight[i] == 0) {
+ continue;
+ }
+ }
+
+ for (size_t j = 0; j < indicator_pheno[i].size(); j++) {
+ if (indicator_pheno[i][j] == 0) {
+ np_miss++;
+ } else {
+ np_obs++;
+ }
+ }
+ }
+
+ if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() &&
+ file_study.empty() && file_beta.empty() && file_bf.empty()) {
+ error = true;
+ cout << "error! number of analyzed individuals equals 0. " << endl;
+ return;
+ }
+
+ if (a_mode == 43) {
+ if (ni_cvt == ni_test) {
+ error = true;
+ cout << "error! no individual has missing "
+ << "phenotypes." << endl;
+ return;
+ }
+ if ((np_obs + np_miss) != (ni_cvt * n_ph)) {
+ error = true;
+ cout << "error! number of phenotypes do not match the "
+ << "summation of missing and observed phenotypes." << endl;
+ return;
+ }
+ }
+
+ // Output some information.
+ if (file_cor.empty() && file_mstudy.empty() && file_study.empty() &&
+ a_mode != 15 && a_mode != 27 && a_mode != 28) {
+ cout << "## number of total individuals = " << ni_total << endl;
+ if (a_mode == 43) {
+ cout << "## number of analyzed individuals = " << ni_cvt << endl;
+ cout << "## number of individuals with full phenotypes = " << ni_test
+ << endl;
+ } else {
+ cout << "## number of analyzed individuals = " << ni_test << endl;
+ }
+ cout << "## number of covariates = " << n_cvt << endl;
+ cout << "## number of phenotypes = " << n_ph << endl;
+ if (a_mode == 43) {
+ cout << "## number of observed data = " << np_obs << endl;
+ cout << "## number of missing data = " << np_miss << endl;
+ }
+ if (!file_gene.empty()) {
+ cout << "## number of total genes = " << ng_total << endl;
+ } else if (file_epm.empty() && a_mode != 43 && a_mode != 5) {
+ cout << "## number of total SNPs = " << ns_total << endl;
+ cout << "## number of analyzed SNPs = " << ns_test << endl;
+ } else {
+ }
+ }
+
+ // Set d_pace to 1000 for gene expression.
+ if (!file_gene.empty() && d_pace == 100000) {
+ d_pace = 1000;
+ }
+
+ // For case-control studies, count # cases and # controls.
+ int flag_cc = 0;
+ if (a_mode == 13) {
+ ni_case = 0;
+ ni_control = 0;
+ for (size_t i = 0; i < indicator_idv.size(); i++) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+
+ if (pheno[i][0] == 0) {
+ ni_control++;
+ } else if (pheno[i][0] == 1) {
+ ni_case++;
+ } else {
+ flag_cc = 1;
+ }
+ }
+ cout << "## number of cases = " << ni_case << endl;
+ cout << "## number of controls = " << ni_control << endl;
+ }
+
+ if (flag_cc == 1) {
+ cout << "Unexpected non-binary phenotypes for "
+ << "case/control analysis. Use default (BSLMM) analysis instead."
+ << endl;
+ a_mode = 11;
+ }
+
+ // Set parameters for BSLMM and check for predict.
+ if (a_mode == 11 || a_mode == 12 || a_mode == 13 || a_mode == 14) {
+ if (a_mode == 11) {
+ n_mh = 1;
+ }
+ if (logp_min == 0) {
+ logp_min = -1.0 * log((double)ns_test);
+ }
+
+ if (h_scale == -1) {
+ h_scale = min(1.0, 10.0 / sqrt((double)ni_test));
+ }
+ if (rho_scale == -1) {
+ rho_scale = min(1.0, 10.0 / sqrt((double)ni_test));
+ }
+ if (logp_scale == -1) {
+ logp_scale = min(1.0, 5.0 / sqrt((double)ni_test));
+ }
+
+ if (h_min == -1) {
+ h_min = 0.0;
+ }
+ if (h_max == -1) {
+ h_max = 1.0;
+ }
+
+ if (s_max > ns_test) {
+ s_max = ns_test;
+ cout << "s_max is re-set to the number of analyzed SNPs." << endl;
+ }
+ if (s_max < s_min) {
+ cout << "error! maximum s value must be larger than the "
+ << "minimal value. current values = " << s_max << " and " << s_min
+ << endl;
+ error = true;
+ }
+ } else if (a_mode == 41 || a_mode == 42) {
+ if (indicator_bv.size() != 0) {
+ if (indicator_idv.size() != indicator_bv.size()) {
+ cout << "error! number of rows in the "
+ << "phenotype file does not match that in the "
+ << "estimated breeding value file: " << indicator_idv.size()
+ << "\t" << indicator_bv.size() << endl;
+ error = true;
+ } else {
+ size_t flag_bv = 0;
+ for (size_t i = 0; i < (indicator_bv).size(); ++i) {
+ if (indicator_idv[i] != indicator_bv[i]) {
+ flag_bv++;
+ }
+ }
+ if (flag_bv != 0) {
+ cout << "error! individuals with missing value in the "
+ << "phenotype file does not match that in the "
+ << "estimated breeding value file: " << flag_bv << endl;
+ error = true;
+ }
+ }
+ }
+ }
-void PARAM::PrintSummary () {
- if (n_ph==1) {
- cout<<"pve estimate ="<<pve_null<<endl;
- cout<<"se(pve) ="<<pve_se_null<<endl;
- } else {
+ if (a_mode == 62 && !file_beta.empty() && mapRS2wcat.size() == 0) {
+ cout << "vc analysis with beta files requires -wcat file." << endl;
+ error = true;
+ }
+ if (a_mode == 67 && mapRS2wcat.size() == 0) {
+ cout << "ci analysis with beta files requires -wcat file." << endl;
+ error = true;
+ }
+
+ // File_mk needs to contain more than one line.
+ if (n_vc == 1 && !file_mk.empty()) {
+ cout << "error! -mk file should contain more than one line." << endl;
+ error = true;
+ }
+
+ return;
+}
- }
- return;
+void PARAM::PrintSummary() {
+ if (n_ph == 1) {
+ cout << "pve estimate =" << pve_null << endl;
+ cout << "se(pve) =" << pve_se_null << endl;
+ } else {
+ }
+ return;
}
-void PARAM::ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
- string file_str;
-
- if (!file_bfile.empty()) {
- file_str=file_bfile+".bed";
- if (ReadFile_bed (file_str, indicator_idv, indicator_snp,
- UtX, K, calc_K)==false) {
- error=true;
- }
- }
- else {
- if (ReadFile_geno (file_geno, indicator_idv, indicator_snp,
- UtX, K, calc_K)==false) {
- error=true;
- }
- }
-
- return;
+void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
+ string file_str;
+
+ if (!file_bfile.empty()) {
+ file_str = file_bfile + ".bed";
+ if (ReadFile_bed(file_str, indicator_idv, indicator_snp, UtX, K, calc_K) ==
+ false) {
+ error = true;
+ }
+ } else {
+ if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K,
+ calc_K) == false) {
+ error = true;
+ }
+ }
+
+ return;
}
-void PARAM::ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K,
- const bool calc_K) {
- string file_str;
-
- if (!file_bfile.empty()) {
- file_str=file_bfile+".bed";
- if (ReadFile_bed (file_str, indicator_idv, indicator_snp,
- Xt, K, calc_K, ni_test, ns_test)==false) {
- error=true;
- }
- } else {
- if (ReadFile_geno (file_geno, indicator_idv, indicator_snp,
- Xt, K, calc_K, ni_test, ns_test)==false) {
- error=true;
- }
- }
-
- return;
+void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
+ const bool calc_K) {
+ string file_str;
+
+ if (!file_bfile.empty()) {
+ file_str = file_bfile + ".bed";
+ if (ReadFile_bed(file_str, indicator_idv, indicator_snp, Xt, K, calc_K,
+ ni_test, ns_test) == false) {
+ error = true;
+ }
+ } else {
+ if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K,
+ ni_test, ns_test) == false) {
+ error = true;
+ }
+ }
+
+ return;
}
-void PARAM::CalcKin (gsl_matrix *matrix_kin) {
- string file_str;
-
- gsl_matrix_set_zero (matrix_kin);
-
- if (!file_bfile.empty() ) {
- file_str=file_bfile+".bed";
- if (PlinkKin (file_str, indicator_snp, a_mode-20, d_pace,
- matrix_kin)==false) {
- error=true;
- }
- }
- else if (!file_oxford.empty() ) {
- file_str=file_oxford+".bgen";
- if (bgenKin (file_str, indicator_snp, a_mode-20, d_pace,
- matrix_kin)==false) {
- error=true;
- }
- }
- else {
- file_str=file_geno;
- if (BimbamKin (file_str, indicator_snp, a_mode-20, d_pace,
- matrix_kin)==false) {
- error=true;
- }
- }
-
- return;
+void PARAM::CalcKin(gsl_matrix *matrix_kin) {
+ string file_str;
+
+ gsl_matrix_set_zero(matrix_kin);
+
+ if (!file_bfile.empty()) {
+ file_str = file_bfile + ".bed";
+ if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
+ false) {
+ error = true;
+ }
+ } else if (!file_oxford.empty()) {
+ file_str = file_oxford + ".bgen";
+ if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
+ false) {
+ error = true;
+ }
+ } else {
+ file_str = file_geno;
+ if (BimbamKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
+ false) {
+ error = true;
+ }
+ }
+
+ return;
}
// From an existing n by nd A and K matrices, compute the d by d S
// matrix (which is not necessary symmetric).
-void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt,
- gsl_matrix *S) {
- size_t n_vc=S->size1, ni_test=A->size1;
+void compAKtoS(const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt,
+ gsl_matrix *S) {
+ size_t n_vc = S->size1, ni_test = A->size1;
double di, dj, tr_AK, sum_A, sum_K, s_A, s_K, sum_AK, tr_A, tr_K, d;
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=0; j<n_vc; j++) {
- tr_AK=0; sum_A=0; sum_K=0; sum_AK=0; tr_A=0; tr_K=0;
- for (size_t l=0; l<ni_test; l++) {
- s_A=0; s_K=0;
- for (size_t k=0; k<ni_test; k++) {
- di=gsl_matrix_get(A, l, k+ni_test*i);
- dj=gsl_matrix_get(K, l, k+ni_test*j);
- s_A+=di; s_K+=dj;
-
- tr_AK+=di*dj; sum_A+=di; sum_K+=dj;
- if (l==k) {tr_A+=di; tr_K+=dj;}
- }
- sum_AK+=s_A*s_K;
- }
-
- sum_A/=(double)ni_test;
- sum_K/=(double)ni_test;
- sum_AK/=(double)ni_test;
- tr_A-=sum_A;
- tr_K-=sum_K;
- d=tr_AK-2*sum_AK+sum_A*sum_K;
-
- if (tr_A==0 || tr_K==0) {
- d=0;
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t j = 0; j < n_vc; j++) {
+ tr_AK = 0;
+ sum_A = 0;
+ sum_K = 0;
+ sum_AK = 0;
+ tr_A = 0;
+ tr_K = 0;
+ for (size_t l = 0; l < ni_test; l++) {
+ s_A = 0;
+ s_K = 0;
+ for (size_t k = 0; k < ni_test; k++) {
+ di = gsl_matrix_get(A, l, k + ni_test * i);
+ dj = gsl_matrix_get(K, l, k + ni_test * j);
+ s_A += di;
+ s_K += dj;
+
+ tr_AK += di * dj;
+ sum_A += di;
+ sum_K += dj;
+ if (l == k) {
+ tr_A += di;
+ tr_K += dj;
+ }
+ }
+ sum_AK += s_A * s_K;
+ }
+
+ sum_A /= (double)ni_test;
+ sum_K /= (double)ni_test;
+ sum_AK /= (double)ni_test;
+ tr_A -= sum_A;
+ tr_K -= sum_K;
+ d = tr_AK - 2 * sum_AK + sum_A * sum_K;
+
+ if (tr_A == 0 || tr_K == 0) {
+ d = 0;
} else {
- d=d/(tr_A*tr_K)-1/(double)(ni_test-n_cvt);
+ d = d / (tr_A * tr_K) - 1 / (double)(ni_test - n_cvt);
}
- gsl_matrix_set (S, i, j, d);
+ gsl_matrix_set(S, i, j, d);
}
}
@@ -1340,187 +1375,195 @@ void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt,
// Copied from lmm.cpp; is used in the following function compKtoV
// map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
-size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
- if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {
- cout<<"error in GetabIndex."<<endl;
- return 0;
- }
- size_t index;
- size_t l, h;
- if (b>a) {l=a; h=b;} else {l=b; h=a;}
-
- size_t n=n_cvt+2;
- index=(2*n-l+2)*(l-1)/2+h-l;
-
- return index;
+size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt) {
+ if (a > n_cvt + 2 || b > n_cvt + 2 || a <= 0 || b <= 0) {
+ cout << "error in GetabIndex." << endl;
+ return 0;
+ }
+ size_t index;
+ size_t l, h;
+ if (b > a) {
+ l = a;
+ h = b;
+ } else {
+ l = b;
+ h = a;
+ }
+
+ size_t n = n_cvt + 2;
+ index = (2 * n - l + 2) * (l - 1) / 2 + h - l;
+
+ return index;
}
// From an existing n by nd (centered) G matrix, compute the d+1 by
// d*(d-1)/2*(d+1) Q matrix where inside i'th d+1 by d+1 matrix, each
// element is tr(KiKlKjKm)-r*tr(KmKiKl)-r*tr(KlKjKm)+r^2*tr(KlKm),
// where r=n/(n-1)
-void compKtoV (const gsl_matrix *G, gsl_matrix *V) {
- size_t n_vc=G->size2/G->size1, ni_test=G->size1;
+void compKtoV(const gsl_matrix *G, gsl_matrix *V) {
+ size_t n_vc = G->size2 / G->size1, ni_test = G->size1;
- gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, (n_vc*(n_vc+1))/2*ni_test);
- gsl_vector *trKiKj=gsl_vector_alloc( n_vc*(n_vc+1)/2 );
- gsl_vector *trKi=gsl_vector_alloc(n_vc);
+ gsl_matrix *KiKj =
+ gsl_matrix_alloc(ni_test, (n_vc * (n_vc + 1)) / 2 * ni_test);
+ gsl_vector *trKiKj = gsl_vector_alloc(n_vc * (n_vc + 1) / 2);
+ gsl_vector *trKi = gsl_vector_alloc(n_vc);
- double d, tr, r=(double)ni_test/(double)(ni_test-1);
+ double d, tr, r = (double)ni_test / (double)(ni_test - 1);
size_t t, t_il, t_jm, t_lm, t_im, t_jl, t_ij;
// Compute KiKj for all pairs of i and j (not including the identity
// matrix).
- t=0;
- for (size_t i=0; i<n_vc; i++) {
- gsl_matrix_const_view Ki=
- gsl_matrix_const_submatrix(G, 0, i*ni_test, ni_test, ni_test);
- for (size_t j=i; j<n_vc; j++) {
- gsl_matrix_const_view Kj=
- gsl_matrix_const_submatrix(G, 0, j*ni_test, ni_test, ni_test);
- gsl_matrix_view KiKj_sub=
- gsl_matrix_submatrix (KiKj, 0, t*ni_test, ni_test, ni_test);
- eigenlib_dgemm ("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0,
- &KiKj_sub.matrix);
+ t = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ gsl_matrix_const_view Ki =
+ gsl_matrix_const_submatrix(G, 0, i * ni_test, ni_test, ni_test);
+ for (size_t j = i; j < n_vc; j++) {
+ gsl_matrix_const_view Kj =
+ gsl_matrix_const_submatrix(G, 0, j * ni_test, ni_test, ni_test);
+ gsl_matrix_view KiKj_sub =
+ gsl_matrix_submatrix(KiKj, 0, t * ni_test, ni_test, ni_test);
+ eigenlib_dgemm("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0,
+ &KiKj_sub.matrix);
t++;
}
}
// Compute trKi, trKiKj.
- t=0;
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=i; j<n_vc; j++) {
- tr=0;
- for (size_t k=0; k<ni_test; k++) {
- tr+=gsl_matrix_get (KiKj, k, t*ni_test+k);
+ t = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t j = i; j < n_vc; j++) {
+ tr = 0;
+ for (size_t k = 0; k < ni_test; k++) {
+ tr += gsl_matrix_get(KiKj, k, t * ni_test + k);
}
- gsl_vector_set (trKiKj, t, tr);
+ gsl_vector_set(trKiKj, t, tr);
t++;
}
- tr=0;
- for (size_t k=0; k<ni_test; k++) {
- tr+=gsl_matrix_get (G, k, i*ni_test+k);
+ tr = 0;
+ for (size_t k = 0; k < ni_test; k++) {
+ tr += gsl_matrix_get(G, k, i * ni_test + k);
}
- gsl_vector_set (trKi, i, tr);
+ gsl_vector_set(trKi, i, tr);
}
// Compute V.
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=i; j<n_vc; j++) {
- t_ij=GetabIndex (i+1, j+1, n_vc-2);
- for (size_t l=0; l<n_vc+1; l++) {
- for (size_t m=0; m<n_vc+1; m++) {
- if (l!=n_vc && m!=n_vc) {
- t_il=GetabIndex (i+1, l+1, n_vc-2);
- t_jm=GetabIndex (j+1, m+1, n_vc-2);
- t_lm=GetabIndex (l+1, m+1, n_vc-2);
- tr=0;
- for (size_t k=0; k<ni_test; k++) {
- gsl_vector_const_view KiKl_row=
- gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test);
- gsl_vector_const_view KiKl_col=
- gsl_matrix_const_column (KiKj, t_il*ni_test+k);
- gsl_vector_const_view KjKm_row=
- gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test);
- gsl_vector_const_view KjKm_col=
- gsl_matrix_const_column (KiKj, t_jm*ni_test+k);
-
- gsl_vector_const_view Kl_row=
- gsl_matrix_const_subrow (G, k, l*ni_test, ni_test);
- gsl_vector_const_view Km_row=
- gsl_matrix_const_subrow (G, k, m*ni_test, ni_test);
-
- if (i<=l && j<=m) {
- gsl_blas_ddot (&KiKl_row.vector, &KjKm_col.vector, &d);
- tr+=d;
- gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d);
- tr-=r*d;
- gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d);
- tr-=r*d;
- } else if (i<=l && j>m) {
- gsl_blas_ddot (&KiKl_row.vector, &KjKm_row.vector, &d);
- tr+=d;
- gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d);
- tr-=r*d;
- gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d);
- tr-=r*d;
- } else if (i>l && j<=m) {
- gsl_blas_ddot (&KiKl_col.vector, &KjKm_col.vector, &d);
- tr+=d;
- gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d);
- tr-=r*d;
- gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d);
- tr-=r*d;
- } else {
- gsl_blas_ddot (&KiKl_col.vector, &KjKm_row.vector, &d);
- tr+=d;
- gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d);
- tr-=r*d;
- gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d);
- tr-=r*d;
- }
- }
-
- tr+=r*r*gsl_vector_get (trKiKj, t_lm);
- } else if (l!=n_vc && m==n_vc) {
- t_il=GetabIndex (i+1, l+1, n_vc-2);
- t_jl=GetabIndex (j+1, l+1, n_vc-2);
- tr=0;
- for (size_t k=0; k<ni_test; k++) {
- gsl_vector_const_view KiKl_row=
- gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test);
- gsl_vector_const_view KiKl_col=
- gsl_matrix_const_column (KiKj, t_il*ni_test+k);
- gsl_vector_const_view Kj_row=
- gsl_matrix_const_subrow (G, k, j*ni_test, ni_test);
-
- if (i<=l) {
- gsl_blas_ddot (&KiKl_row.vector, &Kj_row.vector, &d);
- tr+=d;
- } else {
- gsl_blas_ddot (&KiKl_col.vector, &Kj_row.vector, &d);
- tr+=d;
- }
- }
- tr+=-r*gsl_vector_get (trKiKj, t_il) -
- r*gsl_vector_get (trKiKj, t_jl)+r*r*gsl_vector_get (trKi, l);
- } else if (l==n_vc && m!=n_vc) {
- t_jm=GetabIndex (j+1, m+1, n_vc-2);
- t_im=GetabIndex (i+1, m+1, n_vc-2);
- tr=0;
- for (size_t k=0; k<ni_test; k++) {
- gsl_vector_const_view KjKm_row=
- gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test);
- gsl_vector_const_view KjKm_col=
- gsl_matrix_const_column (KiKj, t_jm*ni_test+k);
- gsl_vector_const_view Ki_row=
- gsl_matrix_const_subrow (G, k, i*ni_test, ni_test);
-
- if (j<=m) {
- gsl_blas_ddot (&KjKm_row.vector, &Ki_row.vector, &d);
- tr+=d;
- } else {
- gsl_blas_ddot (&KjKm_col.vector, &Ki_row.vector, &d);
- tr+=d;
- }
- }
- tr+=-r*gsl_vector_get (trKiKj, t_im) -
- r*gsl_vector_get (trKiKj, t_jm)+r*r*gsl_vector_get (trKi, m);
- } else {
- tr=gsl_vector_get (trKiKj, t_ij) -
- r*gsl_vector_get (trKi, i) -
- r*gsl_vector_get (trKi, j)+r*r*(double)(ni_test-1);
- }
-
- gsl_matrix_set (V, l, t_ij*(n_vc+1)+m, tr);
- }
- }
- }
- }
-
- gsl_matrix_scale (V, 1.0/pow((double)ni_test, 2) );
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t j = i; j < n_vc; j++) {
+ t_ij = GetabIndex(i + 1, j + 1, n_vc - 2);
+ for (size_t l = 0; l < n_vc + 1; l++) {
+ for (size_t m = 0; m < n_vc + 1; m++) {
+ if (l != n_vc && m != n_vc) {
+ t_il = GetabIndex(i + 1, l + 1, n_vc - 2);
+ t_jm = GetabIndex(j + 1, m + 1, n_vc - 2);
+ t_lm = GetabIndex(l + 1, m + 1, n_vc - 2);
+ tr = 0;
+ for (size_t k = 0; k < ni_test; k++) {
+ gsl_vector_const_view KiKl_row =
+ gsl_matrix_const_subrow(KiKj, k, t_il * ni_test, ni_test);
+ gsl_vector_const_view KiKl_col =
+ gsl_matrix_const_column(KiKj, t_il * ni_test + k);
+ gsl_vector_const_view KjKm_row =
+ gsl_matrix_const_subrow(KiKj, k, t_jm * ni_test, ni_test);
+ gsl_vector_const_view KjKm_col =
+ gsl_matrix_const_column(KiKj, t_jm * ni_test + k);
+
+ gsl_vector_const_view Kl_row =
+ gsl_matrix_const_subrow(G, k, l * ni_test, ni_test);
+ gsl_vector_const_view Km_row =
+ gsl_matrix_const_subrow(G, k, m * ni_test, ni_test);
+
+ if (i <= l && j <= m) {
+ gsl_blas_ddot(&KiKl_row.vector, &KjKm_col.vector, &d);
+ tr += d;
+ gsl_blas_ddot(&Km_row.vector, &KiKl_col.vector, &d);
+ tr -= r * d;
+ gsl_blas_ddot(&Kl_row.vector, &KjKm_col.vector, &d);
+ tr -= r * d;
+ } else if (i <= l && j > m) {
+ gsl_blas_ddot(&KiKl_row.vector, &KjKm_row.vector, &d);
+ tr += d;
+ gsl_blas_ddot(&Km_row.vector, &KiKl_col.vector, &d);
+ tr -= r * d;
+ gsl_blas_ddot(&Kl_row.vector, &KjKm_row.vector, &d);
+ tr -= r * d;
+ } else if (i > l && j <= m) {
+ gsl_blas_ddot(&KiKl_col.vector, &KjKm_col.vector, &d);
+ tr += d;
+ gsl_blas_ddot(&Km_row.vector, &KiKl_row.vector, &d);
+ tr -= r * d;
+ gsl_blas_ddot(&Kl_row.vector, &KjKm_col.vector, &d);
+ tr -= r * d;
+ } else {
+ gsl_blas_ddot(&KiKl_col.vector, &KjKm_row.vector, &d);
+ tr += d;
+ gsl_blas_ddot(&Km_row.vector, &KiKl_row.vector, &d);
+ tr -= r * d;
+ gsl_blas_ddot(&Kl_row.vector, &KjKm_row.vector, &d);
+ tr -= r * d;
+ }
+ }
+
+ tr += r * r * gsl_vector_get(trKiKj, t_lm);
+ } else if (l != n_vc && m == n_vc) {
+ t_il = GetabIndex(i + 1, l + 1, n_vc - 2);
+ t_jl = GetabIndex(j + 1, l + 1, n_vc - 2);
+ tr = 0;
+ for (size_t k = 0; k < ni_test; k++) {
+ gsl_vector_const_view KiKl_row =
+ gsl_matrix_const_subrow(KiKj, k, t_il * ni_test, ni_test);
+ gsl_vector_const_view KiKl_col =
+ gsl_matrix_const_column(KiKj, t_il * ni_test + k);
+ gsl_vector_const_view Kj_row =
+ gsl_matrix_const_subrow(G, k, j * ni_test, ni_test);
+
+ if (i <= l) {
+ gsl_blas_ddot(&KiKl_row.vector, &Kj_row.vector, &d);
+ tr += d;
+ } else {
+ gsl_blas_ddot(&KiKl_col.vector, &Kj_row.vector, &d);
+ tr += d;
+ }
+ }
+ tr += -r * gsl_vector_get(trKiKj, t_il) -
+ r * gsl_vector_get(trKiKj, t_jl) +
+ r * r * gsl_vector_get(trKi, l);
+ } else if (l == n_vc && m != n_vc) {
+ t_jm = GetabIndex(j + 1, m + 1, n_vc - 2);
+ t_im = GetabIndex(i + 1, m + 1, n_vc - 2);
+ tr = 0;
+ for (size_t k = 0; k < ni_test; k++) {
+ gsl_vector_const_view KjKm_row =
+ gsl_matrix_const_subrow(KiKj, k, t_jm * ni_test, ni_test);
+ gsl_vector_const_view KjKm_col =
+ gsl_matrix_const_column(KiKj, t_jm * ni_test + k);
+ gsl_vector_const_view Ki_row =
+ gsl_matrix_const_subrow(G, k, i * ni_test, ni_test);
+
+ if (j <= m) {
+ gsl_blas_ddot(&KjKm_row.vector, &Ki_row.vector, &d);
+ tr += d;
+ } else {
+ gsl_blas_ddot(&KjKm_col.vector, &Ki_row.vector, &d);
+ tr += d;
+ }
+ }
+ tr += -r * gsl_vector_get(trKiKj, t_im) -
+ r * gsl_vector_get(trKiKj, t_jm) +
+ r * r * gsl_vector_get(trKi, m);
+ } else {
+ tr = gsl_vector_get(trKiKj, t_ij) - r * gsl_vector_get(trKi, i) -
+ r * gsl_vector_get(trKi, j) + r * r * (double)(ni_test - 1);
+ }
+
+ gsl_matrix_set(V, l, t_ij * (n_vc + 1) + m, tr);
+ }
+ }
+ }
+ }
+
+ gsl_matrix_scale(V, 1.0 / pow((double)ni_test, 2));
gsl_matrix_free(KiKj);
gsl_vector_free(trKiKj);
@@ -1530,21 +1573,21 @@ void compKtoV (const gsl_matrix *G, gsl_matrix *V) {
}
// Perform Jacknife sampling for variance of S.
-void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A,
- const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) {
- size_t n_vc=Svar->size1, ni_test=A->size1, n_cvt=W->size2;
+void JackknifeAKtoS(const gsl_matrix *W, const gsl_matrix *A,
+ const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) {
+ size_t n_vc = Svar->size1, ni_test = A->size1, n_cvt = W->size2;
- vector<vector<vector<double> > > trAK, sumAK;
- vector<vector<double> > sumA, sumK, trA, trK, sA, sK;
+ vector<vector<vector<double>>> trAK, sumAK;
+ vector<vector<double>> sumA, sumK, trA, trK, sA, sK;
vector<double> vec_tmp;
double di, dj, d, m, v;
// Initialize and set all elements to zero.
- for (size_t i=0; i<ni_test; i++) {
+ for (size_t i = 0; i < ni_test; i++) {
vec_tmp.push_back(0);
}
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
sumA.push_back(vec_tmp);
sumK.push_back(vec_tmp);
trA.push_back(vec_tmp);
@@ -1553,82 +1596,93 @@ void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A,
sK.push_back(vec_tmp);
}
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
trAK.push_back(sumK);
sumAK.push_back(sumK);
}
// Run jackknife.
- for (size_t i=0; i<n_vc; i++) {
- for (size_t l=0; l<ni_test; l++) {
- for (size_t k=0; k<ni_test; k++) {
- di=gsl_matrix_get(A, l, k+ni_test*i);
- dj=gsl_matrix_get(K, l, k+ni_test*i);
-
- for (size_t t=0; t<ni_test; t++) {
- if (t==l || t==k) {continue;}
- sumA[i][t]+=di;
- sumK[i][t]+=dj;
- if (l==k) {trA[i][t]+=di; trK[i][t]+=dj;}
- }
- sA[i][l]+=di;
- sK[i][l]+=dj;
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t l = 0; l < ni_test; l++) {
+ for (size_t k = 0; k < ni_test; k++) {
+ di = gsl_matrix_get(A, l, k + ni_test * i);
+ dj = gsl_matrix_get(K, l, k + ni_test * i);
+
+ for (size_t t = 0; t < ni_test; t++) {
+ if (t == l || t == k) {
+ continue;
+ }
+ sumA[i][t] += di;
+ sumK[i][t] += dj;
+ if (l == k) {
+ trA[i][t] += di;
+ trK[i][t] += dj;
+ }
+ }
+ sA[i][l] += di;
+ sK[i][l] += dj;
}
}
- for (size_t t=0; t<ni_test; t++) {
- sumA[i][t]/=(double)(ni_test-1);
- sumK[i][t]/=(double)(ni_test-1);
+ for (size_t t = 0; t < ni_test; t++) {
+ sumA[i][t] /= (double)(ni_test - 1);
+ sumK[i][t] /= (double)(ni_test - 1);
}
}
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=0; j<n_vc; j++) {
- for (size_t l=0; l<ni_test; l++) {
- for (size_t k=0; k<ni_test; k++) {
- di=gsl_matrix_get(A, l, k+ni_test*i);
- dj=gsl_matrix_get(K, l, k+ni_test*j);
- d=di*dj;
-
- for (size_t t=0; t<ni_test; t++) {
- if (t==l || t==k) {continue;}
- trAK[i][j][t]+=d;
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t j = 0; j < n_vc; j++) {
+ for (size_t l = 0; l < ni_test; l++) {
+ for (size_t k = 0; k < ni_test; k++) {
+ di = gsl_matrix_get(A, l, k + ni_test * i);
+ dj = gsl_matrix_get(K, l, k + ni_test * j);
+ d = di * dj;
+
+ for (size_t t = 0; t < ni_test; t++) {
+ if (t == l || t == k) {
+ continue;
+ }
+ trAK[i][j][t] += d;
}
- }
+ }
- for (size_t t=0; t<ni_test; t++) {
- if (t==l) {continue;}
- di=gsl_matrix_get(A, l, t+ni_test*i);
- dj=gsl_matrix_get(K, l, t+ni_test*j);
+ for (size_t t = 0; t < ni_test; t++) {
+ if (t == l) {
+ continue;
+ }
+ di = gsl_matrix_get(A, l, t + ni_test * i);
+ dj = gsl_matrix_get(K, l, t + ni_test * j);
- sumAK[i][j][t]+=(sA[i][l]-di)*(sK[j][l]-dj);
- }
+ sumAK[i][j][t] += (sA[i][l] - di) * (sK[j][l] - dj);
+ }
}
- for (size_t t=0; t<ni_test; t++) {
- sumAK[i][j][t]/=(double)(ni_test-1);
+ for (size_t t = 0; t < ni_test; t++) {
+ sumAK[i][j][t] /= (double)(ni_test - 1);
}
- m=0; v=0;
- for (size_t t=0; t<ni_test; t++) {
- d=trAK[i][j][t]-2*sumAK[i][j][t]+sumA[i][t]*sumK[j][t];
- if ( (trA[i][t]-sumA[i][t])==0 || (trK[j][t]-sumK[j][t])==0) {
- d=0;
- } else {
- d/=(trA[i][t]-sumA[i][t])*(trK[j][t]-sumK[j][t]);
- d-=1/(double)(ni_test-n_cvt-1);
- }
- m+=d; v+=d*d;
+ m = 0;
+ v = 0;
+ for (size_t t = 0; t < ni_test; t++) {
+ d = trAK[i][j][t] - 2 * sumAK[i][j][t] + sumA[i][t] * sumK[j][t];
+ if ((trA[i][t] - sumA[i][t]) == 0 || (trK[j][t] - sumK[j][t]) == 0) {
+ d = 0;
+ } else {
+ d /= (trA[i][t] - sumA[i][t]) * (trK[j][t] - sumK[j][t]);
+ d -= 1 / (double)(ni_test - n_cvt - 1);
+ }
+ m += d;
+ v += d * d;
}
- m/=(double)ni_test;
- v/=(double)ni_test;
- v-=m*m;
- v*=(double)(ni_test-1);
- gsl_matrix_set (Svar, i, j, v);
- if (n_cvt==1) {
- d=gsl_matrix_get (S, i, j);
- d=(double)ni_test*d-(double)(ni_test-1)*m;
- gsl_matrix_set (S, i, j, d);
+ m /= (double)ni_test;
+ v /= (double)ni_test;
+ v -= m * m;
+ v *= (double)(ni_test - 1);
+ gsl_matrix_set(Svar, i, j, v);
+ if (n_cvt == 1) {
+ d = gsl_matrix_get(S, i, j);
+ d = (double)ni_test * d - (double)(ni_test - 1) * m;
+ gsl_matrix_set(S, i, j, d);
}
}
}
@@ -1638,561 +1692,590 @@ void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A,
// Compute the d by d S matrix with its d by d variance matrix of
// Svar, and the d+1 by d(d+1) matrix of Q for V(q).
-void PARAM::CalcS (const map<string, double> &mapRS2wA,
- const map<string, double> &mapRS2wK,
- const gsl_matrix *W, gsl_matrix *A,
- gsl_matrix *K, gsl_matrix *S,
- gsl_matrix *Svar, gsl_vector *ns) {
+void PARAM::CalcS(const map<string, double> &mapRS2wA,
+ const map<string, double> &mapRS2wK, const gsl_matrix *W,
+ gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar,
+ gsl_vector *ns) {
string file_str;
- gsl_matrix_set_zero (S);
- gsl_matrix_set_zero (Svar);
- gsl_vector_set_zero (ns);
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Svar);
+ gsl_vector_set_zero(ns);
// Compute the kinship matrix G for multiple categories; these
// matrices are not centered, for convienence of Jacknife sampling.
- if (!file_bfile.empty() ) {
- file_str=file_bfile+".bed";
- if (mapRS2wA.size()==0) {
- if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK,
- mapRS2cat, snpInfo, W, K, ns)==false) {
- error=true;
+ if (!file_bfile.empty()) {
+ file_str = file_bfile + ".bed";
+ if (mapRS2wA.size() == 0) {
+ if (PlinkKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK,
+ mapRS2cat, snpInfo, W, K, ns) == false) {
+ error = true;
}
} else {
- if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA,
- mapRS2cat, snpInfo, W, A, ns)==false) {
- error=true;
+ if (PlinkKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA,
+ mapRS2cat, snpInfo, W, A, ns) == false) {
+ error = true;
}
}
} else if (!file_geno.empty()) {
- file_str=file_geno;
- if (mapRS2wA.size()==0) {
- if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp,
- mapRS2wK, mapRS2cat, snpInfo, W, K, ns)==false) {
- error=true;
+ file_str = file_geno;
+ if (mapRS2wA.size() == 0) {
+ if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK,
+ mapRS2cat, snpInfo, W, K, ns) == false) {
+ error = true;
}
} else {
- if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp,
- mapRS2wA, mapRS2cat, snpInfo, W, A, ns)==false) {
- error=true;
+ if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA,
+ mapRS2cat, snpInfo, W, A, ns) == false) {
+ error = true;
}
}
- } else if (!file_mbfile.empty() ){
- if (mapRS2wA.size()==0) {
- if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
- mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) {
- error=true;
+ } else if (!file_mbfile.empty()) {
+ if (mapRS2wA.size() == 0) {
+ if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
+ mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) {
+ error = true;
}
} else {
- if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
- mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) {
- error=true;
+ if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
+ mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) {
+ error = true;
}
}
} else if (!file_mgeno.empty()) {
- if (mapRS2wA.size()==0) {
- if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
- mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) {
- error=true;
+ if (mapRS2wA.size() == 0) {
+ if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
+ mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) {
+ error = true;
}
} else {
- if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
- mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) {
- error=true;
+ if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
+ mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) {
+ error = true;
}
}
}
- if (mapRS2wA.size()==0) {
- gsl_matrix_memcpy (A, K);
+ if (mapRS2wA.size() == 0) {
+ gsl_matrix_memcpy(A, K);
}
// Center and scale every kinship matrix inside G.
- for (size_t i=0; i<n_vc; i++) {
- gsl_matrix_view Ksub=gsl_matrix_submatrix(K,0,i*ni_test,ni_test,ni_test);
+ for (size_t i = 0; i < n_vc; i++) {
+ gsl_matrix_view Ksub =
+ gsl_matrix_submatrix(K, 0, i * ni_test, ni_test, ni_test);
CenterMatrix(&Ksub.matrix);
ScaleMatrix(&Ksub.matrix);
- gsl_matrix_view Asub=gsl_matrix_submatrix(A,0,i*ni_test,ni_test,ni_test);
+ gsl_matrix_view Asub =
+ gsl_matrix_submatrix(A, 0, i * ni_test, ni_test, ni_test);
CenterMatrix(&Asub.matrix);
ScaleMatrix(&Asub.matrix);
}
// Cased on G, compute S.
- compAKtoS (A, K, W->size2, S);
+ compAKtoS(A, K, W->size2, S);
// Compute Svar and update S with Jacknife.
- JackknifeAKtoS (W, A, K, S, Svar);
+ JackknifeAKtoS(W, A, K, S, Svar);
return;
}
-void PARAM::WriteVector (const gsl_vector *q, const gsl_vector *s,
- const size_t n_total, const string suffix) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".";
- file_str+=suffix;
- file_str+=".txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
+void PARAM::WriteVector(const gsl_vector *q, const gsl_vector *s,
+ const size_t n_total, const string suffix) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".";
+ file_str += suffix;
+ file_str += ".txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
- outfile.precision(10);
+ outfile.precision(10);
- for (size_t i=0; i<q->size; ++i) {
- outfile<<gsl_vector_get (q, i)<<endl;
- }
+ for (size_t i = 0; i < q->size; ++i) {
+ outfile << gsl_vector_get(q, i) << endl;
+ }
- for (size_t i=0; i<s->size; ++i) {
- outfile<<gsl_vector_get (s, i)<<endl;
- }
+ for (size_t i = 0; i < s->size; ++i) {
+ outfile << gsl_vector_get(s, i) << endl;
+ }
- outfile<<n_total<<endl;
+ outfile << n_total << endl;
- outfile.close();
- outfile.clear();
- return;
+ outfile.close();
+ outfile.clear();
+ return;
}
-void PARAM::WriteVar (const string suffix) {
+void PARAM::WriteVar(const string suffix) {
string file_str, rs;
- file_str=path_out+"/"+file_out;
- file_str+=".";
- file_str+=suffix;
- file_str+=".txt.gz";
-
- ogzstream outfile (file_str.c_str(), ogzstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- outfile.precision(10);
-
- if (mindicator_snp.size()!=0) {
- for (size_t t=0; t<mindicator_snp.size(); t++) {
- indicator_snp=mindicator_snp[t];
- for (size_t i=0; i<indicator_snp.size(); i++) {
- if (indicator_snp[i]==0) {continue;}
- rs=snpInfo[i].rs_number;
- outfile<<rs<<endl;
- }
- }
- } else {
- for (size_t i=0; i<indicator_snp.size(); i++) {
- if (indicator_snp[i]==0) {continue;}
- rs=snpInfo[i].rs_number;
- outfile<<rs<<endl;
- }
- }
-
- outfile.close();
- outfile.clear();
- return;
-}
+ file_str = path_out + "/" + file_out;
+ file_str += ".";
+ file_str += suffix;
+ file_str += ".txt.gz";
+
+ ogzstream outfile(file_str.c_str(), ogzstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ outfile.precision(10);
+
+ if (mindicator_snp.size() != 0) {
+ for (size_t t = 0; t < mindicator_snp.size(); t++) {
+ indicator_snp = mindicator_snp[t];
+ for (size_t i = 0; i < indicator_snp.size(); i++) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+ rs = snpInfo[i].rs_number;
+ outfile << rs << endl;
+ }
+ }
+ } else {
+ for (size_t i = 0; i < indicator_snp.size(); i++) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+ rs = snpInfo[i].rs_number;
+ outfile << rs << endl;
+ }
+ }
-void PARAM::WriteMatrix (const gsl_matrix *matrix_U, const string suffix) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".";
- file_str+=suffix;
- file_str+=".txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- outfile.precision(10);
-
- for (size_t i=0; i<matrix_U->size1; ++i) {
- for (size_t j=0; j<matrix_U->size2; ++j) {
- outfile<<gsl_matrix_get (matrix_U, i, j)<<"\t";
- }
- outfile<<endl;
- }
-
- outfile.close();
- outfile.clear();
- return;
+ outfile.close();
+ outfile.clear();
+ return;
}
-void PARAM::WriteVector (const gsl_vector *vector_D, const string suffix) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".";
- file_str+=suffix;
- file_str+=".txt";
+void PARAM::WriteMatrix(const gsl_matrix *matrix_U, const string suffix) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".";
+ file_str += suffix;
+ file_str += ".txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ outfile.precision(10);
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
+ for (size_t i = 0; i < matrix_U->size1; ++i) {
+ for (size_t j = 0; j < matrix_U->size2; ++j) {
+ outfile << gsl_matrix_get(matrix_U, i, j) << "\t";
+ }
+ outfile << endl;
+ }
+
+ outfile.close();
+ outfile.clear();
+ return;
+}
+
+void PARAM::WriteVector(const gsl_vector *vector_D, const string suffix) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".";
+ file_str += suffix;
+ file_str += ".txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
- outfile.precision(10);
+ outfile.precision(10);
- for (size_t i=0; i<vector_D->size; ++i) {
- outfile<<gsl_vector_get (vector_D, i)<<endl;
- }
+ for (size_t i = 0; i < vector_D->size; ++i) {
+ outfile << gsl_vector_get(vector_D, i) << endl;
+ }
- outfile.close();
- outfile.clear();
- return;
+ outfile.close();
+ outfile.clear();
+ return;
}
-void PARAM::CheckCvt () {
- if (indicator_cvt.size()==0) {return;}
-
- size_t ci_test=0;
-
- gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
-
- for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;}
- for (size_t j=0; j<n_cvt; ++j) {
- gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
- }
- ci_test++;
- }
-
- size_t flag_ipt=0;
- double v_min, v_max;
- set<size_t> set_remove;
-
- // Check if any columns is an intercept.
- for (size_t i=0; i<W->size2; i++) {
- gsl_vector_view w_col=gsl_matrix_column (W, i);
- gsl_vector_minmax (&w_col.vector, &v_min, &v_max);
- if (v_min==v_max) {flag_ipt=1; set_remove.insert (i);}
- }
-
- // Add an intecept term if needed.
- if (n_cvt==set_remove.size()) {
- indicator_cvt.clear();
- n_cvt=1;
- } else if (flag_ipt==0) {
- cout<<"no intecept term is found in the cvt file. "<<
- "a column of 1s is added."<<endl;
- for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0 || indicator_cvt[i]==0) {
- continue;
- }
- cvt[i].push_back(1.0);
- }
-
- n_cvt++;
- } else {}
-
- gsl_matrix_free(W);
-
- return;
+void PARAM::CheckCvt() {
+ if (indicator_cvt.size() == 0) {
+ return;
+ }
+
+ size_t ci_test = 0;
+
+ gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
+
+ for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) {
+ continue;
+ }
+ for (size_t j = 0; j < n_cvt; ++j) {
+ gsl_matrix_set(W, ci_test, j, (cvt)[i][j]);
+ }
+ ci_test++;
+ }
+
+ size_t flag_ipt = 0;
+ double v_min, v_max;
+ set<size_t> set_remove;
+
+ // Check if any columns is an intercept.
+ for (size_t i = 0; i < W->size2; i++) {
+ gsl_vector_view w_col = gsl_matrix_column(W, i);
+ gsl_vector_minmax(&w_col.vector, &v_min, &v_max);
+ if (v_min == v_max) {
+ flag_ipt = 1;
+ set_remove.insert(i);
+ }
+ }
+
+ // Add an intecept term if needed.
+ if (n_cvt == set_remove.size()) {
+ indicator_cvt.clear();
+ n_cvt = 1;
+ } else if (flag_ipt == 0) {
+ cout << "no intecept term is found in the cvt file. "
+ << "a column of 1s is added." << endl;
+ for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) {
+ continue;
+ }
+ cvt[i].push_back(1.0);
+ }
+
+ n_cvt++;
+ } else {
+ }
+
+ gsl_matrix_free(W);
+
+ return;
}
// Post-process phentoypes and covariates.
-void PARAM::ProcessCvtPhen () {
-
- // Convert indicator_pheno to indicator_idv.
- int k=1;
- indicator_idv.clear();
- for (size_t i=0; i<indicator_pheno.size(); i++) {
- k=1;
- for (size_t j=0; j<indicator_pheno[i].size(); j++) {
- if (indicator_pheno[i][j]==0) {k=0;}
- }
- indicator_idv.push_back(k);
- }
-
- // Remove individuals with missing covariates.
- if ((indicator_cvt).size()!=0) {
- for (vector<int>::size_type i=0;
- i<(indicator_idv).size();
- ++i) {
- indicator_idv[i]*=indicator_cvt[i];
- }
- }
-
- // Remove individuals with missing gxe variables.
- if ((indicator_gxe).size()!=0) {
- for (vector<int>::size_type i=0;
- i<(indicator_idv).size();
- ++i) {
- indicator_idv[i]*=indicator_gxe[i];
- }
- }
-
- // Remove individuals with missing residual weights.
- if ((indicator_weight).size()!=0) {
- for (vector<int>::size_type i=0;
- i<(indicator_idv).size();
- ++i) {
- indicator_idv[i]*=indicator_weight[i];
- }
- }
-
- // Obtain ni_test.
- ni_test=0;
- for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
- if (indicator_idv[i]==0) {continue;}
- ni_test++;
- }
-
- // If subsample number is set, perform a random sub-sampling
- // to determine the subsampled ids.
- if (ni_subsample!=0) {
- if (ni_test<ni_subsample) {
- cout<<"error! number of subsamples is less than number of"<<
- "analyzed individuals. "<<endl;
- } else {
-
- // Set up random environment.
- gsl_rng_env_setup();
- gsl_rng *gsl_r;
- const gsl_rng_type * gslType;
- gslType = gsl_rng_default;
- if (randseed<0) {
- time_t rawtime;
- time (&rawtime);
- tm * ptm = gmtime (&rawtime);
-
- randseed = (unsigned)
- (ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec);
- }
- gsl_r = gsl_rng_alloc(gslType);
- gsl_rng_set(gsl_r, randseed);
-
- // From ni_test, sub-sample ni_subsample.
- vector<size_t> a, b;
- for (size_t i=0; i<ni_subsample; i++) {
- a.push_back(0);
- }
- for (size_t i=0; i<ni_test; i++) {
- b.push_back(i);
- }
-
- gsl_ran_choose (gsl_r, static_cast<void*>(&a[0]), ni_subsample,
- static_cast<void*>(&b[0]),ni_test,sizeof (size_t));
-
- // Re-set indicator_idv and ni_test.
- int j=0;
- for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
- if (indicator_idv[i]==0) {continue;}
- if(find(a.begin(), a.end(), j) == a.end()) {
- indicator_idv[i]=0;
- }
- j++;
- }
- ni_test=ni_subsample;
- }
- }
-
- // Check ni_test.
- if (ni_test==0 && a_mode!=15) {
- error=true;
- cout<<"error! number of analyzed individuals equals 0. "<<endl;
- return;
- }
-
- // Check covariates to see if they are correlated with each
- // other, and to see if the intercept term is included.
- // After getting ni_test.
- // Add or remove covariates.
- if (indicator_cvt.size()!=0) {
- CheckCvt();
- } else {
- vector<double> cvt_row;
- cvt_row.push_back(1);
-
- for (vector<int>::size_type i=0;
- i<(indicator_idv).size();
- ++i) {
- indicator_cvt.push_back(1);
- cvt.push_back(cvt_row);
- }
- }
-
- return;
+void PARAM::ProcessCvtPhen() {
+
+ // Convert indicator_pheno to indicator_idv.
+ int k = 1;
+ indicator_idv.clear();
+ for (size_t i = 0; i < indicator_pheno.size(); i++) {
+ k = 1;
+ for (size_t j = 0; j < indicator_pheno[i].size(); j++) {
+ if (indicator_pheno[i][j] == 0) {
+ k = 0;
+ }
+ }
+ indicator_idv.push_back(k);
+ }
+
+ // Remove individuals with missing covariates.
+ if ((indicator_cvt).size() != 0) {
+ for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+ indicator_idv[i] *= indicator_cvt[i];
+ }
+ }
+
+ // Remove individuals with missing gxe variables.
+ if ((indicator_gxe).size() != 0) {
+ for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+ indicator_idv[i] *= indicator_gxe[i];
+ }
+ }
+
+ // Remove individuals with missing residual weights.
+ if ((indicator_weight).size() != 0) {
+ for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+ indicator_idv[i] *= indicator_weight[i];
+ }
+ }
+
+ // Obtain ni_test.
+ ni_test = 0;
+ for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ ni_test++;
+ }
+
+ // If subsample number is set, perform a random sub-sampling
+ // to determine the subsampled ids.
+ if (ni_subsample != 0) {
+ if (ni_test < ni_subsample) {
+ cout << "error! number of subsamples is less than number of"
+ << "analyzed individuals. " << endl;
+ } else {
+
+ // Set up random environment.
+ gsl_rng_env_setup();
+ gsl_rng *gsl_r;
+ const gsl_rng_type *gslType;
+ gslType = gsl_rng_default;
+ if (randseed < 0) {
+ time_t rawtime;
+ time(&rawtime);
+ tm *ptm = gmtime(&rawtime);
+
+ randseed = (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 +
+ ptm->tm_sec);
+ }
+ gsl_r = gsl_rng_alloc(gslType);
+ gsl_rng_set(gsl_r, randseed);
+
+ // From ni_test, sub-sample ni_subsample.
+ vector<size_t> a, b;
+ for (size_t i = 0; i < ni_subsample; i++) {
+ a.push_back(0);
+ }
+ for (size_t i = 0; i < ni_test; i++) {
+ b.push_back(i);
+ }
+
+ gsl_ran_choose(gsl_r, static_cast<void *>(&a[0]), ni_subsample,
+ static_cast<void *>(&b[0]), ni_test, sizeof(size_t));
+
+ // Re-set indicator_idv and ni_test.
+ int j = 0;
+ for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ if (find(a.begin(), a.end(), j) == a.end()) {
+ indicator_idv[i] = 0;
+ }
+ j++;
+ }
+ ni_test = ni_subsample;
+ }
+ }
+
+ // Check ni_test.
+ if (ni_test == 0 && a_mode != 15) {
+ error = true;
+ cout << "error! number of analyzed individuals equals 0. " << endl;
+ return;
+ }
+
+ // Check covariates to see if they are correlated with each
+ // other, and to see if the intercept term is included.
+ // After getting ni_test.
+ // Add or remove covariates.
+ if (indicator_cvt.size() != 0) {
+ CheckCvt();
+ } else {
+ vector<double> cvt_row;
+ cvt_row.push_back(1);
+
+ for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
+ indicator_cvt.push_back(1);
+ cvt.push_back(cvt_row);
+ }
+ }
+
+ return;
}
-void PARAM::CopyCvt (gsl_matrix *W) {
- size_t ci_test=0;
+void PARAM::CopyCvt(gsl_matrix *W) {
+ size_t ci_test = 0;
- for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;}
- for (size_t j=0; j<n_cvt; ++j) {
- gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
- }
- ci_test++;
- }
+ for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) {
+ continue;
+ }
+ for (size_t j = 0; j < n_cvt; ++j) {
+ gsl_matrix_set(W, ci_test, j, (cvt)[i][j]);
+ }
+ ci_test++;
+ }
- return;
+ return;
}
-void PARAM::CopyGxe (gsl_vector *env) {
- size_t ci_test=0;
+void PARAM::CopyGxe(gsl_vector *env) {
+ size_t ci_test = 0;
- for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0 || indicator_gxe[i]==0) {continue;}
- gsl_vector_set (env, ci_test, gxe[i]);
- ci_test++;
- }
+ for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0 || indicator_gxe[i] == 0) {
+ continue;
+ }
+ gsl_vector_set(env, ci_test, gxe[i]);
+ ci_test++;
+ }
- return;
+ return;
}
-void PARAM::CopyWeight (gsl_vector *w) {
- size_t ci_test=0;
+void PARAM::CopyWeight(gsl_vector *w) {
+ size_t ci_test = 0;
- for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0 || indicator_weight[i]==0) {continue;}
- gsl_vector_set (w, ci_test, weight[i]);
- ci_test++;
- }
+ for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0 || indicator_weight[i] == 0) {
+ continue;
+ }
+ gsl_vector_set(w, ci_test, weight[i]);
+ ci_test++;
+ }
- return;
+ return;
}
// If flag=0, then use indicator_idv to load W and Y;
// else, use indicator_cvt to load them.
-void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag) {
- size_t ci_test=0;
+void PARAM::CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag) {
+ size_t ci_test = 0;
- for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
- if (flag==0) {
- if (indicator_idv[i]==0) {continue;}
- } else {
- if (indicator_cvt[i]==0) {continue;}
- }
+ for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+ if (flag == 0) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ } else {
+ if (indicator_cvt[i] == 0) {
+ continue;
+ }
+ }
- gsl_vector_set (y, ci_test, (pheno)[i][0]);
+ gsl_vector_set(y, ci_test, (pheno)[i][0]);
- for (size_t j=0; j<n_cvt; ++j) {
- gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
- }
- ci_test++;
- }
+ for (size_t j = 0; j < n_cvt; ++j) {
+ gsl_matrix_set(W, ci_test, j, (cvt)[i][j]);
+ }
+ ci_test++;
+ }
- return;
+ return;
}
// If flag=0, then use indicator_idv to load W and Y;
// else, use indicator_cvt to load them.
-void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag) {
- size_t ci_test=0;
-
- for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
- if (flag==0) {
- if (indicator_idv[i]==0) {continue;}
- } else {
- if (indicator_cvt[i]==0) {continue;}
- }
-
- for (size_t j=0; j<n_ph; ++j) {
- gsl_matrix_set (Y, ci_test, j, (pheno)[i][j]);
- }
- for (size_t j=0; j<n_cvt; ++j) {
- gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
- }
-
- ci_test++;
- }
-
- return;
+void PARAM::CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag) {
+ size_t ci_test = 0;
+
+ for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+ if (flag == 0) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ } else {
+ if (indicator_cvt[i] == 0) {
+ continue;
+ }
+ }
+
+ for (size_t j = 0; j < n_ph; ++j) {
+ gsl_matrix_set(Y, ci_test, j, (pheno)[i][j]);
+ }
+ for (size_t j = 0; j < n_cvt; ++j) {
+ gsl_matrix_set(W, ci_test, j, (cvt)[i][j]);
+ }
+
+ ci_test++;
+ }
+
+ return;
}
-void PARAM::CopyRead (gsl_vector *log_N) {
- size_t ci_test=0;
+void PARAM::CopyRead(gsl_vector *log_N) {
+ size_t ci_test = 0;
- for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0) {continue;}
- gsl_vector_set (log_N, ci_test, log(vec_read[i]) );
- ci_test++;
- }
+ for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ gsl_vector_set(log_N, ci_test, log(vec_read[i]));
+ ci_test++;
+ }
- return;
+ return;
}
-void PARAM::ObtainWeight (const set<string> &setSnps_beta,
- map<string, double> &mapRS2wK) {
+void PARAM::ObtainWeight(const set<string> &setSnps_beta,
+ map<string, double> &mapRS2wK) {
mapRS2wK.clear();
vector<double> wsum, wcount;
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
wsum.push_back(0.0);
wcount.push_back(0.0);
}
string rs;
- if (msnpInfo.size()==0) {
- for (size_t i=0; i<snpInfo.size(); i++) {
- if (indicator_snp[i]==0) {continue;}
-
- rs=snpInfo[i].rs_number;
- if ( (setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) &&
- (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) &&
- (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) &&
- (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
- if (mapRS2wsnp.size()!=0) {
- mapRS2wK[rs]=mapRS2wsnp[rs];
- if (mapRS2cat.size()==0) {
- wsum[0]+=mapRS2wsnp[rs];
- } else {
- wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs];
- }
- wcount[0]++;
- } else {
- mapRS2wK[rs]=1;
- }
+ if (msnpInfo.size() == 0) {
+ for (size_t i = 0; i < snpInfo.size(); i++) {
+ if (indicator_snp[i] == 0) {
+ continue;
}
+ rs = snpInfo[i].rs_number;
+ if ((setSnps_beta.size() == 0 || setSnps_beta.count(rs) != 0) &&
+ (mapRS2wsnp.size() == 0 || mapRS2wsnp.count(rs) != 0) &&
+ (mapRS2wcat.size() == 0 || mapRS2wcat.count(rs) != 0) &&
+ (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) {
+ if (mapRS2wsnp.size() != 0) {
+ mapRS2wK[rs] = mapRS2wsnp[rs];
+ if (mapRS2cat.size() == 0) {
+ wsum[0] += mapRS2wsnp[rs];
+ } else {
+ wsum[mapRS2cat[rs]] += mapRS2wsnp[rs];
+ }
+ wcount[0]++;
+ } else {
+ mapRS2wK[rs] = 1;
+ }
+ }
}
} else {
- for (size_t t=0; t<msnpInfo.size(); t++) {
- snpInfo=msnpInfo[t];
- indicator_snp=mindicator_snp[t];
-
- for (size_t i=0; i<snpInfo.size(); i++) {
- if (indicator_snp[i]==0) {continue;}
-
- rs=snpInfo[i].rs_number;
- if ((setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) &&
- (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) &&
- (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) &&
- (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
- if (mapRS2wsnp.size()!=0) {
- mapRS2wK[rs]=mapRS2wsnp[rs];
- if (mapRS2cat.size()==0) {
- wsum[0]+=mapRS2wsnp[rs];
- } else {
- wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs];
- }
- wcount[0]++;
- } else {
- mapRS2wK[rs]=1;
- }
- }
- }
- }
- }
-
- if (mapRS2wsnp.size()!=0) {
- for (size_t i=0; i<n_vc; i++) {
- wsum[i]/=wcount[i];
- }
-
- for (map<string, double>::iterator it=mapRS2wK.begin();
- it!=mapRS2wK.end();
- ++it) {
- if (mapRS2cat.size()==0) {
- it->second/=wsum[0];
+ for (size_t t = 0; t < msnpInfo.size(); t++) {
+ snpInfo = msnpInfo[t];
+ indicator_snp = mindicator_snp[t];
+
+ for (size_t i = 0; i < snpInfo.size(); i++) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
+
+ rs = snpInfo[i].rs_number;
+ if ((setSnps_beta.size() == 0 || setSnps_beta.count(rs) != 0) &&
+ (mapRS2wsnp.size() == 0 || mapRS2wsnp.count(rs) != 0) &&
+ (mapRS2wcat.size() == 0 || mapRS2wcat.count(rs) != 0) &&
+ (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) {
+ if (mapRS2wsnp.size() != 0) {
+ mapRS2wK[rs] = mapRS2wsnp[rs];
+ if (mapRS2cat.size() == 0) {
+ wsum[0] += mapRS2wsnp[rs];
+ } else {
+ wsum[mapRS2cat[rs]] += mapRS2wsnp[rs];
+ }
+ wcount[0]++;
+ } else {
+ mapRS2wK[rs] = 1;
+ }
+ }
+ }
+ }
+ }
+
+ if (mapRS2wsnp.size() != 0) {
+ for (size_t i = 0; i < n_vc; i++) {
+ wsum[i] /= wcount[i];
+ }
+
+ for (map<string, double>::iterator it = mapRS2wK.begin();
+ it != mapRS2wK.end(); ++it) {
+ if (mapRS2cat.size() == 0) {
+ it->second /= wsum[0];
} else {
- it->second/=wsum[mapRS2cat[it->first]];
+ it->second /= wsum[mapRS2cat[it->first]];
}
}
}
@@ -2201,54 +2284,52 @@ void PARAM::ObtainWeight (const set<string> &setSnps_beta,
// If pve_flag=0 then do not change pve; pve_flag==1, then change pve
// to 0 if pve < 0 and pve to 1 if pve > 1.
-void PARAM::UpdateWeight (const size_t pve_flag,
- const map<string, double> &mapRS2wK,
- const size_t ni_test, const gsl_vector *ns,
- map<string, double> &mapRS2wA) {
+void PARAM::UpdateWeight(const size_t pve_flag,
+ const map<string, double> &mapRS2wK,
+ const size_t ni_test, const gsl_vector *ns,
+ map<string, double> &mapRS2wA) {
double d;
vector<double> wsum, wcount;
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
wsum.push_back(0.0);
wcount.push_back(0.0);
}
- for (map<string, double>::const_iterator it=mapRS2wK.begin();
- it!=mapRS2wK.end();
- ++it) {
- d=1;
- for (size_t i=0; i<n_vc; i++) {
- if (v_pve[i]>=1 && pve_flag==1) {
- d+=(double)ni_test/gsl_vector_get(ns, i)*mapRS2wcat[it->first][i];
- } else if (v_pve[i]<=0 && pve_flag==1) {
- d+=0;
+ for (map<string, double>::const_iterator it = mapRS2wK.begin();
+ it != mapRS2wK.end(); ++it) {
+ d = 1;
+ for (size_t i = 0; i < n_vc; i++) {
+ if (v_pve[i] >= 1 && pve_flag == 1) {
+ d += (double)ni_test / gsl_vector_get(ns, i) * mapRS2wcat[it->first][i];
+ } else if (v_pve[i] <= 0 && pve_flag == 1) {
+ d += 0;
} else {
- d+=(double)ni_test/gsl_vector_get(ns, i)*
- mapRS2wcat[it->first][i]*v_pve[i];
+ d += (double)ni_test / gsl_vector_get(ns, i) *
+ mapRS2wcat[it->first][i] * v_pve[i];
}
}
- mapRS2wA[it->first]=1/(d*d);
+ mapRS2wA[it->first] = 1 / (d * d);
- if (mapRS2cat.size()==0) {
- wsum[0]+=mapRS2wA[it->first];
+ if (mapRS2cat.size() == 0) {
+ wsum[0] += mapRS2wA[it->first];
wcount[0]++;
} else {
- wsum[mapRS2cat[it->first]]+=mapRS2wA[it->first];
+ wsum[mapRS2cat[it->first]] += mapRS2wA[it->first];
wcount[mapRS2cat[it->first]]++;
}
}
- for (size_t i=0; i<n_vc; i++) {
- wsum[i]/=wcount[i];
+ for (size_t i = 0; i < n_vc; i++) {
+ wsum[i] /= wcount[i];
}
- for (map<string, double>::iterator it=mapRS2wA.begin();
- it!=mapRS2wA.end();
- ++it) {
- if (mapRS2cat.size()==0) {
- it->second/=wsum[0];
+ for (map<string, double>::iterator it = mapRS2wA.begin();
+ it != mapRS2wA.end(); ++it) {
+ if (mapRS2cat.size() == 0) {
+ it->second /= wsum[0];
} else {
- it->second/=wsum[mapRS2cat[it->first]];
+ it->second /= wsum[mapRS2cat[it->first]];
}
}
return;
@@ -2256,61 +2337,64 @@ void PARAM::UpdateWeight (const size_t pve_flag,
// This function updates indicator_snp, and save z-scores and other
// values into vectors.
-void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA,
- const map<string, string> &mapRS2A1,
- const map<string, double> &mapRS2z,
- gsl_vector *w, gsl_vector *z,
- vector<size_t> &vec_cat) {
- gsl_vector_set_zero (w);
- gsl_vector_set_zero (z);
+void PARAM::UpdateSNPnZ(const map<string, double> &mapRS2wA,
+ const map<string, string> &mapRS2A1,
+ const map<string, double> &mapRS2z, gsl_vector *w,
+ gsl_vector *z, vector<size_t> &vec_cat) {
+ gsl_vector_set_zero(w);
+ gsl_vector_set_zero(z);
vec_cat.clear();
string rs, a1;
- size_t c=0;
- if (msnpInfo.size()==0) {
- for (size_t i=0; i<snpInfo.size(); i++) {
- if (indicator_snp[i]==0) {continue;}
-
- rs=snpInfo[i].rs_number;
- a1=snpInfo[i].a_minor;
-
- if (mapRS2wA.count(rs)!=0) {
- if (a1==mapRS2A1.at(rs)) {
- gsl_vector_set (z, c, mapRS2z.at(rs) );
- } else {
- gsl_vector_set (z, c, -1*mapRS2z.at(rs) );
- }
- vec_cat.push_back(mapRS2cat.at(rs) );
- gsl_vector_set (w, c, mapRS2wA.at(rs) );
-
- c++;
- } else {
- indicator_snp[i]=0;
+ size_t c = 0;
+ if (msnpInfo.size() == 0) {
+ for (size_t i = 0; i < snpInfo.size(); i++) {
+ if (indicator_snp[i] == 0) {
+ continue;
}
- }
- } else {
- for (size_t t=0; t<msnpInfo.size(); t++) {
- snpInfo=msnpInfo[t];
- for (size_t i=0; i<snpInfo.size(); i++) {
- if (mindicator_snp[t][i]==0) {continue;}
+ rs = snpInfo[i].rs_number;
+ a1 = snpInfo[i].a_minor;
- rs=snpInfo[i].rs_number;
- a1=snpInfo[i].a_minor;
+ if (mapRS2wA.count(rs) != 0) {
+ if (a1 == mapRS2A1.at(rs)) {
+ gsl_vector_set(z, c, mapRS2z.at(rs));
+ } else {
+ gsl_vector_set(z, c, -1 * mapRS2z.at(rs));
+ }
+ vec_cat.push_back(mapRS2cat.at(rs));
+ gsl_vector_set(w, c, mapRS2wA.at(rs));
- if (mapRS2wA.count(rs)!=0) {
- if (a1==mapRS2A1.at(rs)) {
- gsl_vector_set (z, c, mapRS2z.at(rs) );
- } else {
- gsl_vector_set (z, c, -1*mapRS2z.at(rs) );
- }
- vec_cat.push_back(mapRS2cat.at(rs) );
- gsl_vector_set (w, c, mapRS2wA.at(rs) );
+ c++;
+ } else {
+ indicator_snp[i] = 0;
+ }
+ }
+ } else {
+ for (size_t t = 0; t < msnpInfo.size(); t++) {
+ snpInfo = msnpInfo[t];
+
+ for (size_t i = 0; i < snpInfo.size(); i++) {
+ if (mindicator_snp[t][i] == 0) {
+ continue;
+ }
+
+ rs = snpInfo[i].rs_number;
+ a1 = snpInfo[i].a_minor;
+
+ if (mapRS2wA.count(rs) != 0) {
+ if (a1 == mapRS2A1.at(rs)) {
+ gsl_vector_set(z, c, mapRS2z.at(rs));
+ } else {
+ gsl_vector_set(z, c, -1 * mapRS2z.at(rs));
+ }
+ vec_cat.push_back(mapRS2cat.at(rs));
+ gsl_vector_set(w, c, mapRS2wA.at(rs));
- c++;
- } else {
- mindicator_snp[t][i]=0;
- }
+ c++;
+ } else {
+ mindicator_snp[t][i] = 0;
+ }
}
}
}
@@ -2320,30 +2404,34 @@ void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA,
// This function updates indicator_snp, and save z-scores and other
// values into vectors.
-void PARAM::UpdateSNP (const map<string, double> &mapRS2wA) {
+void PARAM::UpdateSNP(const map<string, double> &mapRS2wA) {
string rs;
- if (msnpInfo.size()==0) {
- for (size_t i=0; i<snpInfo.size(); i++) {
- if (indicator_snp[i]==0) {continue;}
+ if (msnpInfo.size() == 0) {
+ for (size_t i = 0; i < snpInfo.size(); i++) {
+ if (indicator_snp[i] == 0) {
+ continue;
+ }
- rs=snpInfo[i].rs_number;
+ rs = snpInfo[i].rs_number;
- if (mapRS2wA.count(rs)==0) {
- indicator_snp[i]=0;
+ if (mapRS2wA.count(rs) == 0) {
+ indicator_snp[i] = 0;
}
}
} else {
- for (size_t t=0; t<msnpInfo.size(); t++) {
- snpInfo=msnpInfo[t];
+ for (size_t t = 0; t < msnpInfo.size(); t++) {
+ snpInfo = msnpInfo[t];
- for (size_t i=0; i<mindicator_snp[t].size(); i++) {
- if (mindicator_snp[t][i]==0) {continue;}
+ for (size_t i = 0; i < mindicator_snp[t].size(); i++) {
+ if (mindicator_snp[t][i] == 0) {
+ continue;
+ }
- rs=snpInfo[i].rs_number;
+ rs = snpInfo[i].rs_number;
- if (mapRS2wA.count(rs)==0) {
- mindicator_snp[t][i]=0;
- }
+ if (mapRS2wA.count(rs) == 0) {
+ mindicator_snp[t][i] = 0;
+ }
}
}
}
diff --git a/src/param.h b/src/param.h
index f58da53..33e2431 100644
--- a/src/param.h
+++ b/src/param.h
@@ -19,340 +19,336 @@
#ifndef __PARAM_H__
#define __PARAM_H__
-#include <vector>
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
#include <map>
#include <set>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
+#include <vector>
using namespace std;
class SNPINFO {
public:
- string chr;
- string rs_number;
- double cM;
- long int base_position;
- string a_minor;
- string a_major;
- size_t n_miss;
- double missingness;
- double maf;
- size_t n_idv; // Number of non-missing individuals.
- size_t n_nb; // Number of neighbours on the right hand side.
- size_t file_position; // SNP location in file.
+ string chr;
+ string rs_number;
+ double cM;
+ long int base_position;
+ string a_minor;
+ string a_major;
+ size_t n_miss;
+ double missingness;
+ double maf;
+ size_t n_idv; // Number of non-missing individuals.
+ size_t n_nb; // Number of neighbours on the right hand side.
+ size_t file_position; // SNP location in file.
};
// Results for LMM.
class SUMSTAT {
public:
- double beta; // REML estimator for beta.
- double se; // SE for beta.
- double lambda_remle; // REML estimator for lambda.
- double lambda_mle; // MLE estimator for lambda.
- double p_wald; // p value from a Wald test.
- double p_lrt; // p value from a likelihood ratio test.
- double p_score; // p value from a score test.
+ double beta; // REML estimator for beta.
+ double se; // SE for beta.
+ double lambda_remle; // REML estimator for lambda.
+ double lambda_mle; // MLE estimator for lambda.
+ double p_wald; // p value from a Wald test.
+ double p_lrt; // p value from a likelihood ratio test.
+ double p_score; // p value from a score test.
};
// Results for mvLMM.
class MPHSUMSTAT {
public:
- vector<double> v_beta; // REML estimator for beta.
- double p_wald; // p value from a Wald test.
- double p_lrt; // p value from a likelihood ratio test.
- double p_score; // p value from a score test.
- vector<double> v_Vg; // Estimator for Vg, right half.
- vector<double> v_Ve; // Estimator for Ve, right half.
- vector<double> v_Vbeta; // Estimator for Vbeta, right half.
+ vector<double> v_beta; // REML estimator for beta.
+ double p_wald; // p value from a Wald test.
+ double p_lrt; // p value from a likelihood ratio test.
+ double p_score; // p value from a score test.
+ vector<double> v_Vg; // Estimator for Vg, right half.
+ vector<double> v_Ve; // Estimator for Ve, right half.
+ vector<double> v_Vbeta; // Estimator for Vbeta, right half.
};
// Hyper-parameters for BSLMM.
class HYPBSLMM {
public:
- double h;
- double pve;
- double rho;
- double pge;
- double logp;
- size_t n_gamma;
+ double h;
+ double pve;
+ double rho;
+ double pge;
+ double logp;
+ size_t n_gamma;
};
// Header class.
class HEADER {
public:
- size_t rs_col;
- size_t chr_col;
- size_t pos_col;
- size_t cm_col;
- size_t a1_col;
- size_t a0_col;
- size_t z_col;
- size_t beta_col;
- size_t sebeta_col;
- size_t chisq_col;
- size_t p_col;
- size_t n_col;
- size_t nmis_col;
- size_t nobs_col;
- size_t ncase_col;
- size_t ncontrol_col;
- size_t af_col;
- size_t var_col;
- size_t ws_col;
- size_t cor_col;
- size_t coln; // Number of columns.
- set<size_t> catc_col;
- set<size_t> catd_col;
+ size_t rs_col;
+ size_t chr_col;
+ size_t pos_col;
+ size_t cm_col;
+ size_t a1_col;
+ size_t a0_col;
+ size_t z_col;
+ size_t beta_col;
+ size_t sebeta_col;
+ size_t chisq_col;
+ size_t p_col;
+ size_t n_col;
+ size_t nmis_col;
+ size_t nobs_col;
+ size_t ncase_col;
+ size_t ncontrol_col;
+ size_t af_col;
+ size_t var_col;
+ size_t ws_col;
+ size_t cor_col;
+ size_t coln; // Number of columns.
+ set<size_t> catc_col;
+ set<size_t> catd_col;
};
class PARAM {
public:
- // IO-related parameters.
- bool mode_silence;
- int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
- int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
- vector<size_t> p_column; // Which phenotype column needs analysis.
- size_t d_pace; // Display pace
-
- string file_bfile, file_mbfile;
- string file_geno, file_mgeno;
- string file_pheno;
- string file_anno; // Optional.
- string file_gxe; // Optional.
- string file_cvt; // Optional.
- string file_cat, file_mcat;
- string file_catc, file_mcatc;
- string file_var;
- string file_beta;
- string file_cor;
- string file_kin, file_mk;
- string file_ku, file_kd;
- string file_study, file_mstudy;
- string file_ref, file_mref;
- string file_weight, file_wsnp, file_wcat;
- string file_out;
- string file_bf, file_hyp;
- string path_out;
-
- string file_epm; // Estimated parameter file.
- string file_ebv; // Estimated breeding value file.
- string file_log; // Log file containing mean estimate.
- string file_read; // File containing total number of reads.
- string file_gene; // Gene expression file.
- string file_snps; // File containing analyzed SNPs or genes.
-
- // WJA added.
- string file_oxford;
-
- // QC-related parameters.
- double miss_level;
- double maf_level;
- double hwe_level;
- double r2_level;
-
- // LMM-related parameters.
- double l_min;
- double l_max;
- size_t n_region;
- double l_mle_null, l_remle_null;
- double logl_mle_H0, logl_remle_H0;
- double pve_null, pve_se_null, pve_total, se_pve_total;
- double vg_remle_null, ve_remle_null, vg_mle_null, ve_mle_null;
- vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
- vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
- vector<double> VVe_mle_null;
- vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
- vector<double> se_beta_mle_null;
- double p_nr;
- double em_prec, nr_prec;
- size_t em_iter, nr_iter;
- size_t crt;
- double pheno_mean; // Phenotype mean from BSLMM fitting or prediction.
-
- // For fitting multiple variance components.
- // The first 3 are of size (n_vc), and the next 2 are of size n_vc+1.
- bool noconstrain;
- vector<double> v_traceG;
- vector<double> v_pve;
- vector<double> v_se_pve;
-
- vector<double> v_sigma2;
- vector<double> v_se_sigma2;
- vector<double> v_enrich;
- vector<double> v_se_enrich;
- vector<double> v_beta;
- vector<double> v_se_beta;
-
- // BSLMM/MCMC-related parameters.
- double h_min, h_max, h_scale; // Priors for h.
- double rho_min, rho_max, rho_scale; // Priors for rho.
- double logp_min, logp_max, logp_scale; // Priors for log(pi).
- size_t h_ngrid, rho_ngrid;
- size_t s_min, s_max; // Min & max. number of gammas.
- size_t w_step; // # warm up/burn in iter.
- size_t s_step; // # sampling iterations.
- size_t r_pace; // Record pace.
- size_t w_pace; // Write pace.
- size_t n_accept; // Number of acceptance.
- size_t n_mh; // # MH steps in each iter.
- double geo_mean; // Mean of geometric dist.
- long int randseed;
- double trace_G;
-
- HYPBSLMM cHyp_initial;
-
- // VARCOV-related parameters.
- double window_cm;
- size_t window_bp;
- size_t window_ns;
-
- // vc-related parameters.
- size_t n_block;
-
- // Summary statistics.
- bool error;
-
- // Number of individuals.
- size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
-
- // Number of observed and missing phenotypes.
- size_t np_obs, np_miss;
-
- // Number of SNPs.
- size_t ns_total, ns_test, ns_study, ns_ref;
-
- size_t ng_total, ng_test; // Number of genes.
- size_t ni_control, ni_case; // Number of controls and number of cases.
- size_t ni_subsample; // Number of subsampled individuals.
- size_t n_cvt; // Number of covariates.
- size_t n_cat; // Number of continuous categories.
- size_t n_ph; // Number of phenotypes.
- size_t n_vc; // Number of variance components
- // (including the diagonal matrix).
- double time_total; // Record total time.
- double time_G; // Time spent on reading files the
- // second time and calculate K.
- double time_eigen; // Time spent on eigen-decomposition.
- double time_UtX; // Time spent on calculating UX and Uy.
- double time_UtZ; // Time calculating UtZ for probit BSLMM.
- double time_opt; // Time on optimization iterations/MCMC.
- double time_Omega; // Time spent on calculating Omega.
- double time_hyp; // Time sampling hyperparameters in PMM.
- double time_Proposal; // Time spent on constructing the
- // proposal distribution (i.e. the
- // initial LMM or LM analysis).
-
- // Data.
- // Vector recording all phenotypes (NA replaced with -9).
- vector<vector<double> > pheno;
-
- // Vector recording all covariates (NA replaced with -9).
- vector<vector<double> > cvt;
-
- // Vector recording all covariates (NA replaced with -9).
- vector<double> gxe;
-
- // Vector recording weights for the individuals, which is
- // useful for animal breeding studies.
- vector<double> weight;
-
- // Matrix recording when a phenotype is missing for an
- // individual; 0 missing, 1 available.
- vector<vector<int> > indicator_pheno;
-
- // Indicator for individuals (phenotypes): 0 missing, 1
- // available for analysis
- vector<int> indicator_idv;
-
- // Sequence indicator for SNPs: 0 ignored because of (a) maf,
- // (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
-
- // Sequence indicator for SNPs: 0 ignored because of (a) maf,
- // (b) miss, (c) non-poly; 1 available for analysis.
- vector< vector<int> > mindicator_snp;
-
- // Indicator for covariates: 0 missing, 1 available for
- // analysis.
- vector<int> indicator_cvt;
-
- // Indicator for gxe: 0 missing, 1 available for analysis.
- vector<int> indicator_gxe;
-
- // Indicator for weight: 0 missing, 1 available for analysis.
- vector<int> indicator_weight;
-
- // Indicator for estimated breeding value file: 0 missing, 1
- // available for analysis.
- vector<int> indicator_bv;
-
- // Indicator for read file: 0 missing, 1 available for analysis.
- vector<int> indicator_read;
- vector<double> vec_read; // Total number of reads.
- vector<double> vec_bv; // Breeding values.
- vector<size_t> est_column;
-
- map<string, int> mapID2num; // Map small ID to number, 0 to n-1.
- map<string, string> mapRS2chr; // Map rs# to chromosome location.
- map<string, long int> mapRS2bp; // Map rs# to base position.
- map<string, double> mapRS2cM; // Map rs# to cM.
- map<string, double> mapRS2est; // Map rs# to parameters.
- map<string, size_t> mapRS2cat; // Map rs# to category number.
- map<string, vector<double> > mapRS2catc; // Map rs# to cont. cat's.
- map<string, double> mapRS2wsnp; // Map rs# to SNP weights.
- map<string, vector<double> > mapRS2wcat; // Map rs# to SNP cat weights.
-
- vector<SNPINFO> snpInfo; // Record SNP information.
- vector< vector<SNPINFO> > msnpInfo; // Record SNP information.
- set<string> setSnps; // Set of snps for analysis.
-
- // Constructor.
- PARAM();
-
- // Functions.
- void ReadFiles ();
- void CheckParam ();
- void CheckData ();
- void PrintSummary ();
- void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K,
- const bool calc_K);
- void ReadGenotypes (vector<vector<unsigned char> > &Xt,
- gsl_matrix *K, const bool calc_K);
- void CheckCvt ();
- void CopyCvt (gsl_matrix *W);
- void CopyA (size_t flag, gsl_matrix *A);
- void CopyGxe (gsl_vector *gxe);
- void CopyWeight (gsl_vector *w);
- void ProcessCvtPhen();
- void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag);
- void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag);
- void CalcKin (gsl_matrix *matrix_kin);
- void CalcS (const map<string, double> &mapRS2wA,
- const map<string, double> &mapRS2wK,
- const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K,
- gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns);
- void WriteVector (const gsl_vector *q, const gsl_vector *s,
- const size_t n_total, const string suffix);
- void WriteVar (const string suffix);
- void WriteMatrix (const gsl_matrix *matrix_U, const string suffix);
- void WriteVector (const gsl_vector *vector_D, const string suffix);
- void CopyRead (gsl_vector *log_N);
- void ObtainWeight (const set<string> &setSnps_beta, map<string,
- double> &mapRS2wK);
- void UpdateWeight (const size_t pve_flag,
- const map<string,double> &mapRS2wK,
- const size_t ni_test, const gsl_vector *ns,
- map<string, double> &mapRS2wA);
- void UpdateSNPnZ (const map<string, double> &mapRS2wA,
- const map<string, string> &mapRS2A1,
- const map<string, double> &mapRS2z,
- gsl_vector *w, gsl_vector *z,
- vector<size_t> &vec_cat);
- void UpdateSNP (const map<string, double> &mapRS2wA);
+ // IO-related parameters.
+ bool mode_silence;
+ int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
+ int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
+ vector<size_t> p_column; // Which phenotype column needs analysis.
+ size_t d_pace; // Display pace
+
+ string file_bfile, file_mbfile;
+ string file_geno, file_mgeno;
+ string file_pheno;
+ string file_anno; // Optional.
+ string file_gxe; // Optional.
+ string file_cvt; // Optional.
+ string file_cat, file_mcat;
+ string file_catc, file_mcatc;
+ string file_var;
+ string file_beta;
+ string file_cor;
+ string file_kin, file_mk;
+ string file_ku, file_kd;
+ string file_study, file_mstudy;
+ string file_ref, file_mref;
+ string file_weight, file_wsnp, file_wcat;
+ string file_out;
+ string file_bf, file_hyp;
+ string path_out;
+
+ string file_epm; // Estimated parameter file.
+ string file_ebv; // Estimated breeding value file.
+ string file_log; // Log file containing mean estimate.
+ string file_read; // File containing total number of reads.
+ string file_gene; // Gene expression file.
+ string file_snps; // File containing analyzed SNPs or genes.
+
+ // WJA added.
+ string file_oxford;
+
+ // QC-related parameters.
+ double miss_level;
+ double maf_level;
+ double hwe_level;
+ double r2_level;
+
+ // LMM-related parameters.
+ double l_min;
+ double l_max;
+ size_t n_region;
+ double l_mle_null, l_remle_null;
+ double logl_mle_H0, logl_remle_H0;
+ double pve_null, pve_se_null, pve_total, se_pve_total;
+ double vg_remle_null, ve_remle_null, vg_mle_null, ve_mle_null;
+ vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
+ vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
+ vector<double> VVe_mle_null;
+ vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
+ vector<double> se_beta_mle_null;
+ double p_nr;
+ double em_prec, nr_prec;
+ size_t em_iter, nr_iter;
+ size_t crt;
+ double pheno_mean; // Phenotype mean from BSLMM fitting or prediction.
+
+ // For fitting multiple variance components.
+ // The first 3 are of size (n_vc), and the next 2 are of size n_vc+1.
+ bool noconstrain;
+ vector<double> v_traceG;
+ vector<double> v_pve;
+ vector<double> v_se_pve;
+
+ vector<double> v_sigma2;
+ vector<double> v_se_sigma2;
+ vector<double> v_enrich;
+ vector<double> v_se_enrich;
+ vector<double> v_beta;
+ vector<double> v_se_beta;
+
+ // BSLMM/MCMC-related parameters.
+ double h_min, h_max, h_scale; // Priors for h.
+ double rho_min, rho_max, rho_scale; // Priors for rho.
+ double logp_min, logp_max, logp_scale; // Priors for log(pi).
+ size_t h_ngrid, rho_ngrid;
+ size_t s_min, s_max; // Min & max. number of gammas.
+ size_t w_step; // # warm up/burn in iter.
+ size_t s_step; // # sampling iterations.
+ size_t r_pace; // Record pace.
+ size_t w_pace; // Write pace.
+ size_t n_accept; // Number of acceptance.
+ size_t n_mh; // # MH steps in each iter.
+ double geo_mean; // Mean of geometric dist.
+ long int randseed;
+ double trace_G;
+
+ HYPBSLMM cHyp_initial;
+
+ // VARCOV-related parameters.
+ double window_cm;
+ size_t window_bp;
+ size_t window_ns;
+
+ // vc-related parameters.
+ size_t n_block;
+
+ // Summary statistics.
+ bool error;
+
+ // Number of individuals.
+ size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
+
+ // Number of observed and missing phenotypes.
+ size_t np_obs, np_miss;
+
+ // Number of SNPs.
+ size_t ns_total, ns_test, ns_study, ns_ref;
+
+ size_t ng_total, ng_test; // Number of genes.
+ size_t ni_control, ni_case; // Number of controls and number of cases.
+ size_t ni_subsample; // Number of subsampled individuals.
+ size_t n_cvt; // Number of covariates.
+ size_t n_cat; // Number of continuous categories.
+ size_t n_ph; // Number of phenotypes.
+ size_t n_vc; // Number of variance components
+ // (including the diagonal matrix).
+ double time_total; // Record total time.
+ double time_G; // Time spent on reading files the
+ // second time and calculate K.
+ double time_eigen; // Time spent on eigen-decomposition.
+ double time_UtX; // Time spent on calculating UX and Uy.
+ double time_UtZ; // Time calculating UtZ for probit BSLMM.
+ double time_opt; // Time on optimization iterations/MCMC.
+ double time_Omega; // Time spent on calculating Omega.
+ double time_hyp; // Time sampling hyperparameters in PMM.
+ double time_Proposal; // Time spent on constructing the
+ // proposal distribution (i.e. the
+ // initial LMM or LM analysis).
+
+ // Data.
+ // Vector recording all phenotypes (NA replaced with -9).
+ vector<vector<double>> pheno;
+
+ // Vector recording all covariates (NA replaced with -9).
+ vector<vector<double>> cvt;
+
+ // Vector recording all covariates (NA replaced with -9).
+ vector<double> gxe;
+
+ // Vector recording weights for the individuals, which is
+ // useful for animal breeding studies.
+ vector<double> weight;
+
+ // Matrix recording when a phenotype is missing for an
+ // individual; 0 missing, 1 available.
+ vector<vector<int>> indicator_pheno;
+
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis
+ vector<int> indicator_idv;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<vector<int>> mindicator_snp;
+
+ // Indicator for covariates: 0 missing, 1 available for
+ // analysis.
+ vector<int> indicator_cvt;
+
+ // Indicator for gxe: 0 missing, 1 available for analysis.
+ vector<int> indicator_gxe;
+
+ // Indicator for weight: 0 missing, 1 available for analysis.
+ vector<int> indicator_weight;
+
+ // Indicator for estimated breeding value file: 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_bv;
+
+ // Indicator for read file: 0 missing, 1 available for analysis.
+ vector<int> indicator_read;
+ vector<double> vec_read; // Total number of reads.
+ vector<double> vec_bv; // Breeding values.
+ vector<size_t> est_column;
+
+ map<string, int> mapID2num; // Map small ID to number, 0 to n-1.
+ map<string, string> mapRS2chr; // Map rs# to chromosome location.
+ map<string, long int> mapRS2bp; // Map rs# to base position.
+ map<string, double> mapRS2cM; // Map rs# to cM.
+ map<string, double> mapRS2est; // Map rs# to parameters.
+ map<string, size_t> mapRS2cat; // Map rs# to category number.
+ map<string, vector<double>> mapRS2catc; // Map rs# to cont. cat's.
+ map<string, double> mapRS2wsnp; // Map rs# to SNP weights.
+ map<string, vector<double>> mapRS2wcat; // Map rs# to SNP cat weights.
+
+ vector<SNPINFO> snpInfo; // Record SNP information.
+ vector<vector<SNPINFO>> msnpInfo; // Record SNP information.
+ set<string> setSnps; // Set of snps for analysis.
+
+ // Constructor.
+ PARAM();
+
+ // Functions.
+ void ReadFiles();
+ void CheckParam();
+ void CheckData();
+ void PrintSummary();
+ void ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
+ void ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
+ const bool calc_K);
+ void CheckCvt();
+ void CopyCvt(gsl_matrix *W);
+ void CopyA(size_t flag, gsl_matrix *A);
+ void CopyGxe(gsl_vector *gxe);
+ void CopyWeight(gsl_vector *w);
+ void ProcessCvtPhen();
+ void CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag);
+ void CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag);
+ void CalcKin(gsl_matrix *matrix_kin);
+ void CalcS(const map<string, double> &mapRS2wA,
+ const map<string, double> &mapRS2wK, const gsl_matrix *W,
+ gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar,
+ gsl_vector *ns);
+ void WriteVector(const gsl_vector *q, const gsl_vector *s,
+ const size_t n_total, const string suffix);
+ void WriteVar(const string suffix);
+ void WriteMatrix(const gsl_matrix *matrix_U, const string suffix);
+ void WriteVector(const gsl_vector *vector_D, const string suffix);
+ void CopyRead(gsl_vector *log_N);
+ void ObtainWeight(const set<string> &setSnps_beta,
+ map<string, double> &mapRS2wK);
+ void UpdateWeight(const size_t pve_flag, const map<string, double> &mapRS2wK,
+ const size_t ni_test, const gsl_vector *ns,
+ map<string, double> &mapRS2wA);
+ void UpdateSNPnZ(const map<string, double> &mapRS2wA,
+ const map<string, string> &mapRS2A1,
+ const map<string, double> &mapRS2z, gsl_vector *w,
+ gsl_vector *z, vector<size_t> &vec_cat);
+ void UpdateSNP(const map<string, double> &mapRS2wA);
};
-size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt);
+size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt);
#endif
-
diff --git a/src/prdt.cpp b/src/prdt.cpp
index b29d150..3e7c004 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -16,527 +16,537 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
-#include <sstream>
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
+#include <bitset>
+#include <cmath>
#include <fstream>
-#include <string>
#include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
+#include <sstream>
#include <stdio.h>
#include <stdlib.h>
-#include <cmath>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
-#include "gsl/gsl_blas.h"
+#include <string>
+#include <vector>
-#include "io.h"
-#include "lapack.h"
#include "gzstream.h"
#include "io.h"
-#include "prdt.h"
+#include "io.h"
+#include "lapack.h"
#include "mathfunc.h"
+#include "prdt.h"
using namespace std;
-void PRDT::CopyFromParam (PARAM &cPar) {
- a_mode=cPar.a_mode;
- d_pace=cPar.d_pace;
+void PRDT::CopyFromParam(PARAM &cPar) {
+ a_mode = cPar.a_mode;
+ d_pace = cPar.d_pace;
- file_bfile=cPar.file_bfile;
- file_geno=cPar.file_geno;
- file_out=cPar.file_out;
- path_out=cPar.path_out;
+ file_bfile = cPar.file_bfile;
+ file_geno = cPar.file_geno;
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
- indicator_pheno=cPar.indicator_pheno;
- indicator_cvt=cPar.indicator_cvt;
- indicator_idv=cPar.indicator_idv;
+ indicator_pheno = cPar.indicator_pheno;
+ indicator_cvt = cPar.indicator_cvt;
+ indicator_idv = cPar.indicator_idv;
- snpInfo=cPar.snpInfo;
- mapRS2est=cPar.mapRS2est;
+ snpInfo = cPar.snpInfo;
+ mapRS2est = cPar.mapRS2est;
- time_eigen=0;
+ time_eigen = 0;
- n_ph=cPar.n_ph;
- np_obs=cPar.np_obs;
- np_miss=cPar.np_miss;
- ns_total=cPar.ns_total;
- ns_test=0;
+ n_ph = cPar.n_ph;
+ np_obs = cPar.np_obs;
+ np_miss = cPar.np_miss;
+ ns_total = cPar.ns_total;
+ ns_test = 0;
- return;
+ return;
}
-void PRDT::CopyToParam (PARAM &cPar) {
- cPar.ns_test=ns_test;
- cPar.time_eigen=time_eigen;
+void PRDT::CopyToParam(PARAM &cPar) {
+ cPar.ns_test = ns_test;
+ cPar.time_eigen = time_eigen;
- return;
+ return;
}
-void PRDT::WriteFiles (gsl_vector *y_prdt) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".";
- file_str+="prdt";
- file_str+=".txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- size_t ci_test=0;
- for (size_t i=0; i<indicator_idv.size(); i++) {
- if (indicator_idv[i]==1) {
- outfile<<"NA"<<endl;
- } else {
- outfile<<gsl_vector_get (y_prdt, ci_test)<<endl;
- ci_test++;
- }
- }
-
- outfile.close();
- outfile.clear();
- return;
+void PRDT::WriteFiles(gsl_vector *y_prdt) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".";
+ file_str += "prdt";
+ file_str += ".txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ size_t ci_test = 0;
+ for (size_t i = 0; i < indicator_idv.size(); i++) {
+ if (indicator_idv[i] == 1) {
+ outfile << "NA" << endl;
+ } else {
+ outfile << gsl_vector_get(y_prdt, ci_test) << endl;
+ ci_test++;
+ }
+ }
+
+ outfile.close();
+ outfile.clear();
+ return;
}
-void PRDT::WriteFiles (gsl_matrix *Y_full) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".prdt.txt";
-
- ofstream outfile (file_str.c_str(), ofstream::out);
- if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- size_t ci_test=0;
- for (size_t i=0; i<indicator_cvt.size(); i++) {
- if (indicator_cvt[i]==0) {
- outfile<<"NA"<<endl;
- } else {
- for (size_t j=0; j<Y_full->size2; j++) {
- outfile << gsl_matrix_get(Y_full,ci_test,j) <<
- "\t";
- }
- outfile<<endl;
- ci_test++;
- }
- }
-
- outfile.close();
- outfile.clear();
- return;
+void PRDT::WriteFiles(gsl_matrix *Y_full) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".prdt.txt";
+
+ ofstream outfile(file_str.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ size_t ci_test = 0;
+ for (size_t i = 0; i < indicator_cvt.size(); i++) {
+ if (indicator_cvt[i] == 0) {
+ outfile << "NA" << endl;
+ } else {
+ for (size_t j = 0; j < Y_full->size2; j++) {
+ outfile << gsl_matrix_get(Y_full, ci_test, j) << "\t";
+ }
+ outfile << endl;
+ ci_test++;
+ }
+ }
+
+ outfile.close();
+ outfile.clear();
+ return;
}
-void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
- size_t ni_test=u_hat->size, ni_total=G->size1;
-
- gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test);
- gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test);
- gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test);
- gsl_vector *eval=gsl_vector_alloc (ni_test);
- gsl_vector *Utu=gsl_vector_alloc (ni_test);
- gsl_vector *w=gsl_vector_alloc (ni_total);
- gsl_permutation *pmt=gsl_permutation_alloc (ni_test);
-
- //center matrix G based on indicator_idv
- for (size_t i=0; i<ni_total; i++) {
- gsl_vector_set(w, i, indicator_idv[i]);
- }
- CenterMatrix(G, w);
-
- //obtain Koo and Kfo
- size_t o_i=0, o_j=0;
- double d;
- for (size_t i=0; i<indicator_idv.size(); i++) {
- o_j=0;
- for (size_t j=0; j<indicator_idv.size(); j++) {
- d=gsl_matrix_get(G, i, j);
- if (indicator_idv[i]==1 && indicator_idv[j]==1) {
- gsl_matrix_set(Goo, o_i, o_j, d);
- }
- if (indicator_idv[i]==0 && indicator_idv[j]==1) {
- gsl_matrix_set(Gfo, i-o_i, o_j, d);
- }
- if (indicator_idv[j]==1) {o_j++;}
- }
- if (indicator_idv[i]==1) {o_i++;}
- }
-
- //matrix operations to get u_prdt
- cout<<"Start Eigen-Decomposition..."<<endl;
- clock_t time_start=clock();
- EigenDecomp (Goo, U, eval, 0);
- for (size_t i=0; i<eval->size; i++) {
- if (gsl_vector_get(eval,i)<1e-10) {
- gsl_vector_set(eval, i, 0);
- }
- }
-
- time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
- gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu);
- for (size_t i=0; i<eval->size; i++) {
- d=gsl_vector_get(eval, i);
- if (d!=0) {
- d=gsl_vector_get(Utu, i)/d;
- gsl_vector_set(Utu, i, d);
- }
- }
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval);
- gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
-
- // Free matrices.
- gsl_matrix_free(Goo);
- gsl_matrix_free(Gfo);
- gsl_matrix_free(U);
- gsl_vector_free(eval);
- gsl_vector_free(Utu);
- gsl_vector_free(w);
- gsl_permutation_free(pmt);
-
- return;
+void PRDT::AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
+ size_t ni_test = u_hat->size, ni_total = G->size1;
+
+ gsl_matrix *Goo = gsl_matrix_alloc(ni_test, ni_test);
+ gsl_matrix *Gfo = gsl_matrix_alloc(ni_total - ni_test, ni_test);
+ gsl_matrix *U = gsl_matrix_alloc(ni_test, ni_test);
+ gsl_vector *eval = gsl_vector_alloc(ni_test);
+ gsl_vector *Utu = gsl_vector_alloc(ni_test);
+ gsl_vector *w = gsl_vector_alloc(ni_total);
+ gsl_permutation *pmt = gsl_permutation_alloc(ni_test);
+
+ // center matrix G based on indicator_idv
+ for (size_t i = 0; i < ni_total; i++) {
+ gsl_vector_set(w, i, indicator_idv[i]);
+ }
+ CenterMatrix(G, w);
+
+ // obtain Koo and Kfo
+ size_t o_i = 0, o_j = 0;
+ double d;
+ for (size_t i = 0; i < indicator_idv.size(); i++) {
+ o_j = 0;
+ for (size_t j = 0; j < indicator_idv.size(); j++) {
+ d = gsl_matrix_get(G, i, j);
+ if (indicator_idv[i] == 1 && indicator_idv[j] == 1) {
+ gsl_matrix_set(Goo, o_i, o_j, d);
+ }
+ if (indicator_idv[i] == 0 && indicator_idv[j] == 1) {
+ gsl_matrix_set(Gfo, i - o_i, o_j, d);
+ }
+ if (indicator_idv[j] == 1) {
+ o_j++;
+ }
+ }
+ if (indicator_idv[i] == 1) {
+ o_i++;
+ }
+ }
+
+ // matrix operations to get u_prdt
+ cout << "Start Eigen-Decomposition..." << endl;
+ clock_t time_start = clock();
+ EigenDecomp(Goo, U, eval, 0);
+ for (size_t i = 0; i < eval->size; i++) {
+ if (gsl_vector_get(eval, i) < 1e-10) {
+ gsl_vector_set(eval, i, 0);
+ }
+ }
+
+ time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+ gsl_blas_dgemv(CblasTrans, 1.0, U, u_hat, 0.0, Utu);
+ for (size_t i = 0; i < eval->size; i++) {
+ d = gsl_vector_get(eval, i);
+ if (d != 0) {
+ d = gsl_vector_get(Utu, i) / d;
+ gsl_vector_set(Utu, i, d);
+ }
+ }
+ gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, eval);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
+
+ // Free matrices.
+ gsl_matrix_free(Goo);
+ gsl_matrix_free(Gfo);
+ gsl_matrix_free(U);
+ gsl_vector_free(eval);
+ gsl_vector_free(Utu);
+ gsl_vector_free(w);
+ gsl_permutation_free(pmt);
+
+ return;
}
-void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return;
- }
-
- string line;
- char *ch_ptr;
- string rs;
-
- size_t n_miss, n_train_nomiss, c_phen;
- double geno, x_mean, x_train_mean, effect_size;
-
- gsl_vector *x=gsl_vector_alloc (y_prdt->size);
- gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size);
-
- ns_test=0;
-
- // Start reading genotypes and analyze.
- for (size_t t=0; t<ns_total; ++t) {
- !safeGetline(infile, line).eof();
- if (t%d_pace==0 || t==(ns_total-1)) {
- ProgressBar ("Reading SNPs ", t, ns_total-1);
- }
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- rs=ch_ptr;
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- if (mapRS2est.count(rs)==0) {
- continue;
- } else {
- effect_size=mapRS2est[rs];
- }
-
- x_mean=0.0;
- c_phen=0;
- n_miss=0;
- x_train_mean=0;
- n_train_nomiss=0;
-
- gsl_vector_set_zero(x_miss);
-
- for (size_t i=0; i<indicator_idv.size(); ++i) {
- ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==1) {
- if (strcmp(ch_ptr, "NA")!=0) {
- geno=atof(ch_ptr);
- x_train_mean+=geno;
- n_train_nomiss++;
- }
- } else {
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- } else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean+=geno;
- }
- c_phen++;
- }
- }
-
- if (x->size==n_miss) {
- cout << "snp " << rs << " has missing genotype for all " <<
- "individuals and will be ignored." << endl;
- continue;}
-
-
- x_mean/=(double)(x->size-n_miss);
- x_train_mean/=(double)(n_train_nomiss);
-
-
- for (size_t i=0; i<x->size; ++i) {
- geno=gsl_vector_get(x, i);
- if (gsl_vector_get (x_miss, i)==0) {
- gsl_vector_set(x, i, x_mean-x_train_mean);
- } else {
- gsl_vector_set(x, i, geno-x_train_mean);
- }
- }
-
- gsl_vector_scale (x, effect_size);
- gsl_vector_add (y_prdt, x);
-
- ns_test++;
- }
- cout<<endl;
-
- gsl_vector_free (x);
- gsl_vector_free (x_miss);
-
- infile.close();
- infile.clear();
-
- return;
+void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return;
+ }
+
+ string line;
+ char *ch_ptr;
+ string rs;
+
+ size_t n_miss, n_train_nomiss, c_phen;
+ double geno, x_mean, x_train_mean, effect_size;
+
+ gsl_vector *x = gsl_vector_alloc(y_prdt->size);
+ gsl_vector *x_miss = gsl_vector_alloc(y_prdt->size);
+
+ ns_test = 0;
+
+ // Start reading genotypes and analyze.
+ for (size_t t = 0; t < ns_total; ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % d_pace == 0 || t == (ns_total - 1)) {
+ ProgressBar("Reading SNPs ", t, ns_total - 1);
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ rs = ch_ptr;
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ if (mapRS2est.count(rs) == 0) {
+ continue;
+ } else {
+ effect_size = mapRS2est[rs];
+ }
+
+ x_mean = 0.0;
+ c_phen = 0;
+ n_miss = 0;
+ x_train_mean = 0;
+ n_train_nomiss = 0;
+
+ gsl_vector_set_zero(x_miss);
+
+ for (size_t i = 0; i < indicator_idv.size(); ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ if (indicator_idv[i] == 1) {
+ if (strcmp(ch_ptr, "NA") != 0) {
+ geno = atof(ch_ptr);
+ x_train_mean += geno;
+ n_train_nomiss++;
+ }
+ } else {
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(x_miss, c_phen, 0.0);
+ n_miss++;
+ } else {
+ geno = atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean += geno;
+ }
+ c_phen++;
+ }
+ }
+
+ if (x->size == n_miss) {
+ cout << "snp " << rs << " has missing genotype for all "
+ << "individuals and will be ignored." << endl;
+ continue;
+ }
+
+ x_mean /= (double)(x->size - n_miss);
+ x_train_mean /= (double)(n_train_nomiss);
+
+ for (size_t i = 0; i < x->size; ++i) {
+ geno = gsl_vector_get(x, i);
+ if (gsl_vector_get(x_miss, i) == 0) {
+ gsl_vector_set(x, i, x_mean - x_train_mean);
+ } else {
+ gsl_vector_set(x, i, geno - x_train_mean);
+ }
+ }
+
+ gsl_vector_scale(x, effect_size);
+ gsl_vector_add(y_prdt, x);
+
+ ns_test++;
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
-void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
- string file_bed=file_bfile+".bed";
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return;
- }
-
- char ch[1];
- bitset<8> b;
- string rs;
-
- size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
- double geno, x_mean, x_train_mean, effect_size;
-
- gsl_vector *x=gsl_vector_alloc (y_prdt->size);
-
- // Calculate n_bit and c, the number of bit for each SNP.
- if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;}
- else {n_bit=indicator_idv.size()/4+1; }
-
- // Print the first 3 magic numbers.
- for (size_t i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- ns_test=0;
-
- for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
- if (t%d_pace==0 || t==snpInfo.size()-1) {
- ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
- }
-
- rs=snpInfo[t].rs_number;
-
- if (mapRS2est.count(rs)==0) {
- continue;
- } else {
- effect_size=mapRS2est[rs];
- }
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- x_mean=0.0;
- n_miss=0;
- ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0;
- for (size_t i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) &&
- ci_total==indicator_idv.size()) {
- break;
- }
- if (indicator_idv[ci_total]==1) {
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- x_train_mean+=2.0;
- n_train_nomiss++;
- }
- else {
- x_train_mean+=1.0;
- n_train_nomiss++;
- }
- }
- else {
- if (b[2*j+1]==1) {
- n_train_nomiss++;
- }
- else {}
- }
- } else {
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(x,ci_test,2);
- x_mean+=2.0;
- }
- else {
- gsl_vector_set(x,ci_test,1);
- x_mean+=1.0;
- }
- }
- else {
- if (b[2*j+1]==1) {
- gsl_vector_set(x,ci_test,0);
- }
- else {
- gsl_vector_set(x,ci_test,-9);
- n_miss++;
- }
- }
- ci_test++;
- }
- ci_total++;
-
- }
- }
-
- if (x->size==n_miss) {
- cout << "snp " << rs << " has missing genotype for all " <<
- "individuals and will be ignored."<<endl;
- continue;
- }
-
- x_mean/=(double)(x->size-n_miss);
- x_train_mean/=(double)(n_train_nomiss);
-
- for (size_t i=0; i<x->size; ++i) {
- geno=gsl_vector_get(x, i);
- if (geno==-9) {
- gsl_vector_set(x, i, x_mean-x_train_mean);
- } else {
- gsl_vector_set(x, i, geno-x_train_mean);
- }
- }
-
- gsl_vector_scale (x, effect_size);
- gsl_vector_add (y_prdt, x);
-
- ns_test++;
- }
- cout<<endl;
-
- gsl_vector_free (x);
-
- infile.close();
- infile.clear();
-
- return;
+void PRDT::AnalyzePlink(gsl_vector *y_prdt) {
+ string file_bed = file_bfile + ".bed";
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return;
+ }
+
+ char ch[1];
+ bitset<8> b;
+ string rs;
+
+ size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
+ double geno, x_mean, x_train_mean, effect_size;
+
+ gsl_vector *x = gsl_vector_alloc(y_prdt->size);
+
+ // Calculate n_bit and c, the number of bit for each SNP.
+ if (indicator_idv.size() % 4 == 0) {
+ n_bit = indicator_idv.size() / 4;
+ } else {
+ n_bit = indicator_idv.size() / 4 + 1;
+ }
+
+ // Print the first 3 magic numbers.
+ for (size_t i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ ns_test = 0;
+
+ for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
+ if (t % d_pace == 0 || t == snpInfo.size() - 1) {
+ ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ }
+
+ rs = snpInfo[t].rs_number;
+
+ if (mapRS2est.count(rs) == 0) {
+ continue;
+ } else {
+ effect_size = mapRS2est[rs];
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ x_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ ci_test = 0;
+ x_train_mean = 0;
+ n_train_nomiss = 0;
+ for (size_t i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == indicator_idv.size()) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 1) {
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ x_train_mean += 2.0;
+ n_train_nomiss++;
+ } else {
+ x_train_mean += 1.0;
+ n_train_nomiss++;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ n_train_nomiss++;
+ } else {
+ }
+ }
+ } else {
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(x, ci_test, 2);
+ x_mean += 2.0;
+ } else {
+ gsl_vector_set(x, ci_test, 1);
+ x_mean += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(x, ci_test, 0);
+ } else {
+ gsl_vector_set(x, ci_test, -9);
+ n_miss++;
+ }
+ }
+ ci_test++;
+ }
+ ci_total++;
+ }
+ }
+
+ if (x->size == n_miss) {
+ cout << "snp " << rs << " has missing genotype for all "
+ << "individuals and will be ignored." << endl;
+ continue;
+ }
+
+ x_mean /= (double)(x->size - n_miss);
+ x_train_mean /= (double)(n_train_nomiss);
+
+ for (size_t i = 0; i < x->size; ++i) {
+ geno = gsl_vector_get(x, i);
+ if (geno == -9) {
+ gsl_vector_set(x, i, x_mean - x_train_mean);
+ } else {
+ gsl_vector_set(x, i, geno - x_train_mean);
+ }
+ }
+
+ gsl_vector_scale(x, effect_size);
+ gsl_vector_add(y_prdt, x);
+
+ ns_test++;
+ }
+ cout << endl;
+
+ gsl_vector_free(x);
+
+ infile.close();
+ infile.clear();
+
+ return;
}
// Predict missing phenotypes using ridge regression.
// Y_hat contains fixed effects
-void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
- gsl_matrix *Y_full) {
- gsl_vector *y_obs=gsl_vector_alloc (np_obs);
- gsl_vector *y_miss=gsl_vector_alloc (np_miss);
- gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs);
- gsl_matrix *H_mo=gsl_matrix_alloc (np_miss, np_obs);
- gsl_vector *Hiy=gsl_vector_alloc (np_obs);
-
- size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0;
-
- // Obtain H_oo, H_mo.
- c_obs1=0; c_miss1=0;
- for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) {
- if (indicator_cvt[i1]==0) {continue;}
- for (vector<int>::size_type j1=0; j1<n_ph; ++j1) {
-
- c_obs2=0; c_miss2=0;
- for (vector<int>::size_type i2=0;
- i2<indicator_pheno.size(); ++i2) {
- if (indicator_cvt[i2]==0) {continue;}
- for (vector<int>::size_type j2=0;
- j2<n_ph; j2++) {
-
- if (indicator_pheno[i2][j2]==1) {
- if (indicator_pheno[i1][j1]==1) {
- gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
- } else {
- gsl_matrix_set (H_mo, c_miss1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
- }
- c_obs2++;
- } else {
- c_miss2++;
- }
- }
- }
-
- if (indicator_pheno[i1][j1]==1) {
- c_obs1++;
- } else {
- c_miss1++;
- }
- }
-
- }
-
- // Do LU decomposition of H_oo.
- int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (np_obs);
- LUDecomp (H_oo, pmt, &sig);
-
- // Obtain y_obs=y_full-y_hat.
- // Add the fixed effects part to y_miss: y_miss=y_hat.
- c_obs1=0; c_miss1=0;
- for (vector<int>::size_type i=0;
- i<indicator_pheno.size(); ++i) {
- if (indicator_cvt[i]==0) {continue;}
-
- for (vector<int>::size_type j=0; j<n_ph; ++j) {
- if (indicator_pheno[i][j]==1) {
- gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j)-gsl_matrix_get (Y_hat, i, j) );
- c_obs1++;
- } else {
- gsl_vector_set (y_miss, c_miss1, gsl_matrix_get (Y_hat, i, j) );
- c_miss1++;
- }
- }
- }
-
- LUSolve (H_oo, pmt, y_obs, Hiy);
-
- gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
-
- // Put back predicted y_miss to Y_full.
- c_miss1=0;
- for (vector<int>::size_type i=0;
- i<indicator_pheno.size(); ++i) {
- if (indicator_cvt[i]==0) {continue;}
-
- for (vector<int>::size_type j=0; j<n_ph; ++j) {
- if (indicator_pheno[i][j]==0) {
- gsl_matrix_set (Y_full, i, j, gsl_vector_get (y_miss, c_miss1) );
- c_miss1++;
- }
- }
- }
-
- // Free matrices.
- gsl_vector_free(y_obs);
- gsl_vector_free(y_miss);
- gsl_matrix_free(H_oo);
- gsl_matrix_free(H_mo);
- gsl_vector_free(Hiy);
-
- return;
+void PRDT::MvnormPrdt(const gsl_matrix *Y_hat, const gsl_matrix *H,
+ gsl_matrix *Y_full) {
+ gsl_vector *y_obs = gsl_vector_alloc(np_obs);
+ gsl_vector *y_miss = gsl_vector_alloc(np_miss);
+ gsl_matrix *H_oo = gsl_matrix_alloc(np_obs, np_obs);
+ gsl_matrix *H_mo = gsl_matrix_alloc(np_miss, np_obs);
+ gsl_vector *Hiy = gsl_vector_alloc(np_obs);
+
+ size_t c_obs1 = 0, c_obs2 = 0, c_miss1 = 0, c_miss2 = 0;
+
+ // Obtain H_oo, H_mo.
+ c_obs1 = 0;
+ c_miss1 = 0;
+ for (vector<int>::size_type i1 = 0; i1 < indicator_pheno.size(); ++i1) {
+ if (indicator_cvt[i1] == 0) {
+ continue;
+ }
+ for (vector<int>::size_type j1 = 0; j1 < n_ph; ++j1) {
+
+ c_obs2 = 0;
+ c_miss2 = 0;
+ for (vector<int>::size_type i2 = 0; i2 < indicator_pheno.size(); ++i2) {
+ if (indicator_cvt[i2] == 0) {
+ continue;
+ }
+ for (vector<int>::size_type j2 = 0; j2 < n_ph; j2++) {
+
+ if (indicator_pheno[i2][j2] == 1) {
+ if (indicator_pheno[i1][j1] == 1) {
+ gsl_matrix_set(
+ H_oo, c_obs1, c_obs2,
+ gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2));
+ } else {
+ gsl_matrix_set(
+ H_mo, c_miss1, c_obs2,
+ gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2));
+ }
+ c_obs2++;
+ } else {
+ c_miss2++;
+ }
+ }
+ }
+
+ if (indicator_pheno[i1][j1] == 1) {
+ c_obs1++;
+ } else {
+ c_miss1++;
+ }
+ }
+ }
+
+ // Do LU decomposition of H_oo.
+ int sig;
+ gsl_permutation *pmt = gsl_permutation_alloc(np_obs);
+ LUDecomp(H_oo, pmt, &sig);
+
+ // Obtain y_obs=y_full-y_hat.
+ // Add the fixed effects part to y_miss: y_miss=y_hat.
+ c_obs1 = 0;
+ c_miss1 = 0;
+ for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) {
+ if (indicator_cvt[i] == 0) {
+ continue;
+ }
+
+ for (vector<int>::size_type j = 0; j < n_ph; ++j) {
+ if (indicator_pheno[i][j] == 1) {
+ gsl_vector_set(y_obs, c_obs1, gsl_matrix_get(Y_full, i, j) -
+ gsl_matrix_get(Y_hat, i, j));
+ c_obs1++;
+ } else {
+ gsl_vector_set(y_miss, c_miss1, gsl_matrix_get(Y_hat, i, j));
+ c_miss1++;
+ }
+ }
+ }
+
+ LUSolve(H_oo, pmt, y_obs, Hiy);
+
+ gsl_blas_dgemv(CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
+
+ // Put back predicted y_miss to Y_full.
+ c_miss1 = 0;
+ for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) {
+ if (indicator_cvt[i] == 0) {
+ continue;
+ }
+
+ for (vector<int>::size_type j = 0; j < n_ph; ++j) {
+ if (indicator_pheno[i][j] == 0) {
+ gsl_matrix_set(Y_full, i, j, gsl_vector_get(y_miss, c_miss1));
+ c_miss1++;
+ }
+ }
+ }
+
+ // Free matrices.
+ gsl_vector_free(y_obs);
+ gsl_vector_free(y_miss);
+ gsl_matrix_free(H_oo);
+ gsl_matrix_free(H_mo);
+ gsl_vector_free(Hiy);
+
+ return;
}
-
-
diff --git a/src/prdt.h b/src/prdt.h
index 0939b36..571fdb8 100644
--- a/src/prdt.h
+++ b/src/prdt.h
@@ -19,58 +19,50 @@
#ifndef __PRDT_H__
#define __PRDT_H__
-#include <vector>
-#include <map>
-#include <string.h>
-#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
#include "param.h"
+#include <map>
+#include <string.h>
+#include <vector>
using namespace std;
class PRDT {
public:
- // IO-related parameters.
- size_t a_mode;
- size_t d_pace;
-
- string file_bfile;
- string file_geno;
- string file_out;
- string path_out;
-
- vector<vector<int> > indicator_pheno;
- vector<int> indicator_cvt;
- vector<int> indicator_idv;
- vector<SNPINFO> snpInfo;
- map<string, double> mapRS2est;
-
- size_t n_ph;
- size_t np_obs, np_miss;
- size_t ns_total;
- size_t ns_test;
-
- double time_eigen;
-
- // Main functions.
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
- void WriteFiles (gsl_vector *y_prdt);
- void WriteFiles (gsl_matrix *Y_full);
- void AddBV (gsl_matrix *G, const gsl_vector *u_hat,
- gsl_vector *y_prdt);
- void AnalyzeBimbam (gsl_vector *y_prdt);
- void AnalyzePlink (gsl_vector *y_prdt);
- void MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
- gsl_matrix *Y_full);
+ // IO-related parameters.
+ size_t a_mode;
+ size_t d_pace;
+
+ string file_bfile;
+ string file_geno;
+ string file_out;
+ string path_out;
+
+ vector<vector<int>> indicator_pheno;
+ vector<int> indicator_cvt;
+ vector<int> indicator_idv;
+ vector<SNPINFO> snpInfo;
+ map<string, double> mapRS2est;
+
+ size_t n_ph;
+ size_t np_obs, np_miss;
+ size_t ns_total;
+ size_t ns_test;
+
+ double time_eigen;
+
+ // Main functions.
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
+ void WriteFiles(gsl_vector *y_prdt);
+ void WriteFiles(gsl_matrix *Y_full);
+ void AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt);
+ void AnalyzeBimbam(gsl_vector *y_prdt);
+ void AnalyzePlink(gsl_vector *y_prdt);
+ void MvnormPrdt(const gsl_matrix *Y_hat, const gsl_matrix *H,
+ gsl_matrix *Y_full);
};
#endif
-
-
-
-
-
-
-
diff --git a/src/varcov.cpp b/src/varcov.cpp
index 46b5bf8..0f87ba8 100644
--- a/src/varcov.cpp
+++ b/src/varcov.cpp
@@ -16,103 +16,126 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
+#include <bitset>
+#include <cmath>
+#include <cstring>
#include <fstream>
-#include <sstream>
-#include <string>
#include <iomanip>
-#include <bitset>
-#include <vector>
+#include <iostream>
#include <map>
#include <set>
-#include <cstring>
-#include <cmath>
+#include <sstream>
#include <stdio.h>
#include <stdlib.h>
+#include <string>
+#include <vector>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
-#include "lapack.h"
#include "gzstream.h"
-#include "param.h"
-#include "varcov.h"
#include "io.h"
+#include "lapack.h"
#include "mathfunc.h"
+#include "param.h"
+#include "varcov.h"
using namespace std;
-void VARCOV::CopyFromParam (PARAM &cPar) {
- d_pace=cPar.d_pace;
+void VARCOV::CopyFromParam(PARAM &cPar) {
+ d_pace = cPar.d_pace;
- file_bfile=cPar.file_bfile;
- file_geno=cPar.file_geno;
- file_out=cPar.file_out;
- path_out=cPar.path_out;
+ file_bfile = cPar.file_bfile;
+ file_geno = cPar.file_geno;
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
- time_opt=0.0;
+ time_opt = 0.0;
- window_cm=cPar.window_cm;
- window_bp=cPar.window_bp;
- window_ns=cPar.window_ns;
+ window_cm = cPar.window_cm;
+ window_bp = cPar.window_bp;
+ window_ns = cPar.window_ns;
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
- snpInfo=cPar.snpInfo;
+ indicator_idv = cPar.indicator_idv;
+ indicator_snp = cPar.indicator_snp;
+ snpInfo = cPar.snpInfo;
- return;
+ return;
}
-void VARCOV::CopyToParam (PARAM &cPar) {
- cPar.time_opt=time_opt;
- return;
+void VARCOV::CopyToParam(PARAM &cPar) {
+ cPar.time_opt = time_opt;
+ return;
}
-void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
- const vector<vector<double> > &Cov_mat) {
+void VARCOV::WriteCov(const int flag, const vector<SNPINFO> &snpInfo_sub,
+ const vector<vector<double>> &Cov_mat) {
string file_cov;
- file_cov=path_out+"/"+file_out;
- file_cov+=".cor.txt";
+ file_cov = path_out + "/" + file_out;
+ file_cov += ".cor.txt";
ofstream outfile;
- if (flag==0) {
- outfile.open (file_cov.c_str(), ofstream::out);
- if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;}
+ if (flag == 0) {
+ outfile.open(file_cov.c_str(), ofstream::out);
+ if (!outfile) {
+ cout << "error writing file: " << file_cov << endl;
+ return;
+ }
- outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"
- <<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"
- <<"\t"<<"af"<<"\t"<<"window_size"
- <<"\t"<<"var"<<"\t"<<"cor"<<endl;
+ outfile << "chr"
+ << "\t"
+ << "rs"
+ << "\t"
+ << "ps"
+ << "\t"
+ << "n_mis"
+ << "\t"
+ << "n_obs"
+ << "\t"
+ << "allele1"
+ << "\t"
+ << "allele0"
+ << "\t"
+ << "af"
+ << "\t"
+ << "window_size"
+ << "\t"
+ << "var"
+ << "\t"
+ << "cor" << endl;
} else {
- outfile.open (file_cov.c_str(), ofstream::app);
- if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;}
-
- for (size_t i=0; i<Cov_mat.size(); i++) {
- outfile << snpInfo_sub[i].chr << "\t" << snpInfo_sub[i].rs_number <<
- "\t" << snpInfo_sub[i].base_position << "\t" <<
- snpInfo_sub[i].n_miss << "\t" << snpInfo_sub[i].n_idv << "\t" <<
- snpInfo_sub[i].a_minor << "\t" << snpInfo_sub[i].a_major << "\t" <<
- fixed << setprecision(3) << snpInfo_sub[i].maf << "\t" <<
- Cov_mat[i].size()-1 << "\t";
- outfile<<scientific<<setprecision(6)<<Cov_mat[i][0]<<"\t";
-
- if (Cov_mat[i].size()==1) {
- outfile<<"NA";
+ outfile.open(file_cov.c_str(), ofstream::app);
+ if (!outfile) {
+ cout << "error writing file: " << file_cov << endl;
+ return;
+ }
+
+ for (size_t i = 0; i < Cov_mat.size(); i++) {
+ outfile << snpInfo_sub[i].chr << "\t" << snpInfo_sub[i].rs_number << "\t"
+ << snpInfo_sub[i].base_position << "\t" << snpInfo_sub[i].n_miss
+ << "\t" << snpInfo_sub[i].n_idv << "\t" << snpInfo_sub[i].a_minor
+ << "\t" << snpInfo_sub[i].a_major << "\t" << fixed
+ << setprecision(3) << snpInfo_sub[i].maf << "\t"
+ << Cov_mat[i].size() - 1 << "\t";
+ outfile << scientific << setprecision(6) << Cov_mat[i][0] << "\t";
+
+ if (Cov_mat[i].size() == 1) {
+ outfile << "NA";
} else {
- for (size_t j=1; j<Cov_mat[i].size(); j++) {
- if (j==(Cov_mat[i].size()-1)) {
- outfile<<Cov_mat[i][j];
- } else {
- outfile<<Cov_mat[i][j]<<",";
- }
- }
+ for (size_t j = 1; j < Cov_mat[i].size(); j++) {
+ if (j == (Cov_mat[i].size() - 1)) {
+ outfile << Cov_mat[i][j];
+ } else {
+ outfile << Cov_mat[i][j] << ",";
+ }
+ }
}
- outfile<<endl;
+ outfile << endl;
}
}
@@ -121,18 +144,18 @@ void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
return;
}
-bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) {
- int c_chr=snpInfo1.chr.compare(snpInfo2.chr);
- long int c_bp=snpInfo1.base_position-snpInfo2.base_position;
+bool CompareSNPinfo(const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) {
+ int c_chr = snpInfo1.chr.compare(snpInfo2.chr);
+ long int c_bp = snpInfo1.base_position - snpInfo2.base_position;
- if(c_chr<0) {
+ if (c_chr < 0) {
return true;
- } else if (c_chr>0) {
+ } else if (c_chr > 0) {
return false;
} else {
- if (c_bp<0) {
+ if (c_bp < 0) {
return true;
- } else if (c_bp>0) {
+ } else if (c_bp > 0) {
return false;
} else {
return true;
@@ -140,64 +163,73 @@ bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) {
}
}
-
// Do not sort SNPs (because gzip files do not support random access)
// then calculate n_nb, the number of neighbours, for each SNP.
-void VARCOV::CalcNB (vector<SNPINFO> &snpInfo_sort) {
- size_t t2=0, n_nb=0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (indicator_snp[t]==0) {continue;}
-
- if (snpInfo_sort[t].chr=="-9" ||
- (snpInfo_sort[t].cM==-9 && window_cm!=0) ||
- (snpInfo_sort[t].base_position==-9 && window_bp!=0) ) {
- snpInfo_sort[t].n_nb=0; continue;
+void VARCOV::CalcNB(vector<SNPINFO> &snpInfo_sort) {
+ size_t t2 = 0, n_nb = 0;
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ if (snpInfo_sort[t].chr == "-9" ||
+ (snpInfo_sort[t].cM == -9 && window_cm != 0) ||
+ (snpInfo_sort[t].base_position == -9 && window_bp != 0)) {
+ snpInfo_sort[t].n_nb = 0;
+ continue;
}
- if (t==indicator_snp.size()-1) {snpInfo_sort[t].n_nb=0; continue;}
+ if (t == indicator_snp.size() - 1) {
+ snpInfo_sort[t].n_nb = 0;
+ continue;
+ }
- t2=t+1; n_nb=0;
+ t2 = t + 1;
+ n_nb = 0;
- while (t2<indicator_snp.size() &&
- snpInfo_sort[t2].chr == snpInfo_sort[t].chr &&
- indicator_snp[t2]==0) {
+ while (t2 < indicator_snp.size() &&
+ snpInfo_sort[t2].chr == snpInfo_sort[t].chr &&
+ indicator_snp[t2] == 0) {
t2++;
}
- while (t2<indicator_snp.size() &&
- snpInfo_sort[t2].chr==snpInfo_sort[t].chr &&
- (snpInfo_sort[t2].cM-snpInfo_sort[t].cM<window_cm ||
- window_cm==0) &&
- (snpInfo_sort[t2].base_position-snpInfo_sort[t].base_position <
- window_bp || window_bp==0) && (n_nb<window_ns|| window_ns==0)) {
- t2++; n_nb++;
- while (t2<indicator_snp.size() &&
- snpInfo_sort[t2].chr==snpInfo_sort[t].chr &&
- indicator_snp[t2]==0) {
- t2++;
+ while (t2 < indicator_snp.size() &&
+ snpInfo_sort[t2].chr == snpInfo_sort[t].chr &&
+ (snpInfo_sort[t2].cM - snpInfo_sort[t].cM < window_cm ||
+ window_cm == 0) &&
+ (snpInfo_sort[t2].base_position - snpInfo_sort[t].base_position <
+ window_bp ||
+ window_bp == 0) &&
+ (n_nb < window_ns || window_ns == 0)) {
+ t2++;
+ n_nb++;
+ while (t2 < indicator_snp.size() &&
+ snpInfo_sort[t2].chr == snpInfo_sort[t].chr &&
+ indicator_snp[t2] == 0) {
+ t2++;
}
}
- snpInfo_sort[t].n_nb=n_nb;
+ snpInfo_sort[t].n_nb = n_nb;
}
return;
}
// Vector double is centered to have mean 0.
-void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec) {
+void Calc_Cor(vector<vector<double>> &X_mat, vector<double> &cov_vec) {
cov_vec.clear();
double v1, v2, r;
- vector<double> x_vec=X_mat[0];
+ vector<double> x_vec = X_mat[0];
lapack_ddot(x_vec, x_vec, v1);
- cov_vec.push_back(v1/(double)x_vec.size() );
+ cov_vec.push_back(v1 / (double)x_vec.size());
- for (size_t i=1; i<X_mat.size(); i++) {
+ for (size_t i = 1; i < X_mat.size(); i++) {
lapack_ddot(X_mat[i], x_vec, r);
lapack_ddot(X_mat[i], X_mat[i], v2);
- r/=sqrt(v1*v2);
+ r /= sqrt(v1 * v2);
cov_vec.push_back(r);
}
@@ -214,10 +246,10 @@ void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec) {
// window_size (which can vary if cM was used) read bimbam mean
// genotype file and calculate the covariance matrix for neighboring
// SNPs output values at 10000-SNP-interval.
-void VARCOV::AnalyzeBimbam () {
- igzstream infile (file_geno.c_str(), igzstream::in);
+void VARCOV::AnalyzeBimbam() {
+ igzstream infile(file_geno.c_str(), igzstream::in);
if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
+ cout << "error reading genotype file:" << file_geno << endl;
return;
}
@@ -225,58 +257,64 @@ void VARCOV::AnalyzeBimbam () {
vector<SNPINFO> snpInfo_sub;
CalcNB(snpInfo);
- size_t ni_test=0;
- for (size_t i=0; i<indicator_idv.size(); i++) {
- ni_test+=indicator_idv[i];
+ size_t ni_test = 0;
+ for (size_t i = 0; i < indicator_idv.size(); i++) {
+ ni_test += indicator_idv[i];
}
- gsl_vector *geno=gsl_vector_alloc (ni_test);
+ gsl_vector *geno = gsl_vector_alloc(ni_test);
double geno_mean;
vector<double> x_vec, cov_vec;
- vector<vector<double> > X_mat, Cov_mat;
+ vector<vector<double>> X_mat, Cov_mat;
- for (size_t i=0; i<ni_test; i++) {
+ for (size_t i = 0; i < ni_test; i++) {
x_vec.push_back(0);
}
- WriteCov (0, snpInfo_sub, Cov_mat);
+ WriteCov(0, snpInfo_sub, Cov_mat);
- size_t t2=0, inc;
- int n_nb=0;
+ size_t t2 = 0, inc;
+ int n_nb = 0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%d_pace==0 || t==(indicator_snp.size()-1))
- {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
- if (indicator_snp[t]==0) {continue;}
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % d_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
- if (X_mat.size()==0) {
- n_nb=snpInfo[t].n_nb+1;
+ if (X_mat.size() == 0) {
+ n_nb = snpInfo[t].n_nb + 1;
} else {
- n_nb=snpInfo[t].n_nb-n_nb+1;
+ n_nb = snpInfo[t].n_nb - n_nb + 1;
}
- for (int i=0; i<n_nb; i++) {
- if (X_mat.size()==0) {t2=t;}
+ for (int i = 0; i < n_nb; i++) {
+ if (X_mat.size() == 0) {
+ t2 = t;
+ }
// Read a line of the snp is filtered out.
- inc=0;
- while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
- t2++; inc++;
+ inc = 0;
+ while (t2 < indicator_snp.size() && indicator_snp[t2] == 0) {
+ t2++;
+ inc++;
}
- Bimbam_ReadOneSNP (inc, indicator_idv, infile, geno, geno_mean);
- gsl_vector_add_constant (geno, -1.0*geno_mean);
+ Bimbam_ReadOneSNP(inc, indicator_idv, infile, geno, geno_mean);
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
- for (size_t j=0; j<geno->size; j++) {
- x_vec[j]=gsl_vector_get(geno, j);
+ for (size_t j = 0; j < geno->size; j++) {
+ x_vec[j] = gsl_vector_get(geno, j);
}
X_mat.push_back(x_vec);
t2++;
}
- n_nb=snpInfo[t].n_nb;
+ n_nb = snpInfo[t].n_nb;
Calc_Cor(X_mat, cov_vec);
Cov_mat.push_back(cov_vec);
@@ -285,15 +323,15 @@ void VARCOV::AnalyzeBimbam () {
X_mat.erase(X_mat.begin());
// Write out var/cov values.
- if (Cov_mat.size()==10000) {
- WriteCov (1, snpInfo_sub, Cov_mat);
+ if (Cov_mat.size() == 10000) {
+ WriteCov(1, snpInfo_sub, Cov_mat);
Cov_mat.clear();
snpInfo_sub.clear();
}
}
- if (Cov_mat.size()!=0) {
- WriteCov (1, snpInfo_sub, Cov_mat);
+ if (Cov_mat.size() != 0) {
+ WriteCov(1, snpInfo_sub, Cov_mat);
Cov_mat.clear();
snpInfo_sub.clear();
}
@@ -306,68 +344,76 @@ void VARCOV::AnalyzeBimbam () {
return;
}
-void VARCOV::AnalyzePlink () {
- string file_bed=file_bfile+".bed";
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+void VARCOV::AnalyzePlink() {
+ string file_bed = file_bfile + ".bed";
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return;
+ }
// Calculate the number of right-hand-side neighbours for each SNP.
vector<SNPINFO> snpInfo_sub;
CalcNB(snpInfo);
- size_t ni_test=0;
- for (size_t i=0; i<indicator_idv.size(); i++) {
- ni_test+=indicator_idv[i];
+ size_t ni_test = 0;
+ for (size_t i = 0; i < indicator_idv.size(); i++) {
+ ni_test += indicator_idv[i];
}
- gsl_vector *geno=gsl_vector_alloc (ni_test);
+ gsl_vector *geno = gsl_vector_alloc(ni_test);
double geno_mean;
vector<double> x_vec, cov_vec;
- vector<vector<double> > X_mat, Cov_mat;
+ vector<vector<double>> X_mat, Cov_mat;
- for (size_t i=0; i<ni_test; i++) {
+ for (size_t i = 0; i < ni_test; i++) {
x_vec.push_back(0);
}
- WriteCov (0, snpInfo_sub, Cov_mat);
+ WriteCov(0, snpInfo_sub, Cov_mat);
- size_t t2=0, inc;
- int n_nb=0;
+ size_t t2 = 0, inc;
+ int n_nb = 0;
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%d_pace==0 || t==(indicator_snp.size()-1))
- {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
- if (indicator_snp[t]==0) {continue;}
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % d_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
- if (X_mat.size()==0) {
- n_nb=snpInfo[t].n_nb+1;
+ if (X_mat.size() == 0) {
+ n_nb = snpInfo[t].n_nb + 1;
} else {
- n_nb=snpInfo[t].n_nb-n_nb+1;
+ n_nb = snpInfo[t].n_nb - n_nb + 1;
}
- for (int i=0; i<n_nb; i++) {
- if (X_mat.size()==0) {t2=t;}
+ for (int i = 0; i < n_nb; i++) {
+ if (X_mat.size() == 0) {
+ t2 = t;
+ }
// Read a line if the SNP is filtered out.
- inc=0;
- while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
- t2++;
- inc++;
+ inc = 0;
+ while (t2 < indicator_snp.size() && indicator_snp[t2] == 0) {
+ t2++;
+ inc++;
}
- Plink_ReadOneSNP (t2, indicator_idv, infile, geno, geno_mean);
- gsl_vector_add_constant (geno, -1.0*geno_mean);
+ Plink_ReadOneSNP(t2, indicator_idv, infile, geno, geno_mean);
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
- for (size_t j=0; j<geno->size; j++) {
- x_vec[j]=gsl_vector_get(geno, j);
+ for (size_t j = 0; j < geno->size; j++) {
+ x_vec[j] = gsl_vector_get(geno, j);
}
X_mat.push_back(x_vec);
t2++;
}
- n_nb=snpInfo[t].n_nb;
+ n_nb = snpInfo[t].n_nb;
Calc_Cor(X_mat, cov_vec);
Cov_mat.push_back(cov_vec);
@@ -376,15 +422,15 @@ void VARCOV::AnalyzePlink () {
X_mat.erase(X_mat.begin());
// Write out var/cov values.
- if (Cov_mat.size()==10000) {
- WriteCov (1, snpInfo_sub, Cov_mat);
+ if (Cov_mat.size() == 10000) {
+ WriteCov(1, snpInfo_sub, Cov_mat);
Cov_mat.clear();
snpInfo_sub.clear();
}
}
- if (Cov_mat.size()!=0) {
- WriteCov (1, snpInfo_sub, Cov_mat);
+ if (Cov_mat.size() != 0) {
+ WriteCov(1, snpInfo_sub, Cov_mat);
Cov_mat.clear();
snpInfo_sub.clear();
}
diff --git a/src/varcov.h b/src/varcov.h
index 4a1eb3a..47b4f9d 100644
--- a/src/varcov.h
+++ b/src/varcov.h
@@ -19,45 +19,43 @@
#ifndef __VARCOV_H__
#define __VARCOV_H__
-#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
#include "io.h"
+#include "param.h"
using namespace std;
class VARCOV {
public:
- // IO-related parameters.
- string file_out;
- string path_out;
- string file_geno;
- string file_bfile;
- int d_pace;
-
- vector<int> indicator_idv;
- vector<int> indicator_snp;
-
- vector<SNPINFO> snpInfo;
-
- double time_opt;
-
- // Class-specific parameters.
- double window_cm;
- size_t window_bp;
- size_t window_ns;
-
- // Main functions.
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
- void CalcNB (vector<SNPINFO> &snpInfo_sort);
- void WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
- const vector<vector<double> > &Cov_mat);
- void AnalyzeBimbam ();
- void AnalyzePlink ();
+ // IO-related parameters.
+ string file_out;
+ string path_out;
+ string file_geno;
+ string file_bfile;
+ int d_pace;
+
+ vector<int> indicator_idv;
+ vector<int> indicator_snp;
+
+ vector<SNPINFO> snpInfo;
+
+ double time_opt;
+
+ // Class-specific parameters.
+ double window_cm;
+ size_t window_bp;
+ size_t window_ns;
+
+ // Main functions.
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
+ void CalcNB(vector<SNPINFO> &snpInfo_sort);
+ void WriteCov(const int flag, const vector<SNPINFO> &snpInfo_sub,
+ const vector<vector<double>> &Cov_mat);
+ void AnalyzeBimbam();
+ void AnalyzePlink();
};
#endif
-
-
diff --git a/src/vc.cpp b/src/vc.cpp
index e8ccece..b5f36c0 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -16,216 +16,216 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <sstream>
-#include <iomanip>
+#include <bitset>
#include <cmath>
+#include <cstring>
+#include <iomanip>
#include <iostream>
+#include <map>
+#include <set>
#include <stdio.h>
#include <stdlib.h>
-#include <bitset>
-#include <vector>
-#include <set>
-#include <map>
#include <string>
-#include <cstring>
+#include <vector>
-#include "gsl/gsl_vector.h"
-#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_linalg.h"
#include "gsl/gsl_blas.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
#include "gsl/gsl_cdf.h"
-#include "gsl/gsl_multiroots.h"
#include "gsl/gsl_min.h"
+#include "gsl/gsl_multiroots.h"
#include "Eigen/Dense"
-#include "param.h"
-#include "io.h"
-#include "lapack.h"
#include "eigenlib.h"
#include "gzstream.h"
-#include "mathfunc.h"
+#include "io.h"
+#include "lapack.h"
#include "lmm.h"
+#include "mathfunc.h"
+#include "param.h"
#include "vc.h"
using namespace std;
using namespace Eigen;
// In this file, X, Y are already transformed (i.e. UtX and UtY).
-void VC::CopyFromParam (PARAM &cPar) {
- a_mode=cPar.a_mode;
+void VC::CopyFromParam(PARAM &cPar) {
+ a_mode = cPar.a_mode;
- file_cat=cPar.file_cat;
- file_beta=cPar.file_beta;
- file_cor=cPar.file_cor;
+ file_cat = cPar.file_cat;
+ file_beta = cPar.file_beta;
+ file_cor = cPar.file_cor;
- setSnps=cPar.setSnps;
+ setSnps = cPar.setSnps;
- file_out=cPar.file_out;
- path_out=cPar.path_out;
+ file_out = cPar.file_out;
+ path_out = cPar.path_out;
- time_UtX=0.0;
- time_opt=0.0;
+ time_UtX = 0.0;
+ time_opt = 0.0;
- v_traceG=cPar.v_traceG;
+ v_traceG = cPar.v_traceG;
- ni_total=cPar.ni_total;
- ns_total=cPar.ns_total;
- ns_test=cPar.ns_test;
+ ni_total = cPar.ni_total;
+ ns_total = cPar.ns_total;
+ ns_test = cPar.ns_test;
- crt=cPar.crt;
- window_cm=cPar.window_cm;
- window_bp=cPar.window_bp;
- window_ns=cPar.window_ns;
+ crt = cPar.crt;
+ window_cm = cPar.window_cm;
+ window_bp = cPar.window_bp;
+ window_ns = cPar.window_ns;
- n_vc=cPar.n_vc;
+ n_vc = cPar.n_vc;
return;
}
-void VC::CopyToParam (PARAM &cPar) {
- cPar.time_UtX=time_UtX;
- cPar.time_opt=time_opt;
+void VC::CopyToParam(PARAM &cPar) {
+ cPar.time_UtX = time_UtX;
+ cPar.time_opt = time_opt;
- cPar.v_pve=v_pve;
- cPar.v_se_pve=v_se_pve;
- cPar.v_sigma2=v_sigma2;
- cPar.v_se_sigma2=v_se_sigma2;
- cPar.pve_total=pve_total;
- cPar.se_pve_total=se_pve_total;
- cPar.v_traceG=v_traceG;
+ cPar.v_pve = v_pve;
+ cPar.v_se_pve = v_se_pve;
+ cPar.v_sigma2 = v_sigma2;
+ cPar.v_se_sigma2 = v_se_sigma2;
+ cPar.pve_total = pve_total;
+ cPar.se_pve_total = se_pve_total;
+ cPar.v_traceG = v_traceG;
- cPar.v_beta=v_beta;
- cPar.v_se_beta=v_se_beta;
+ cPar.v_beta = v_beta;
+ cPar.v_se_beta = v_se_beta;
- cPar.ni_total=ni_total;
- cPar.ns_total=ns_total;
- cPar.ns_test=ns_test;
+ cPar.ni_total = ni_total;
+ cPar.ns_total = ns_total;
+ cPar.ns_test = ns_test;
- cPar.n_vc=n_vc;
+ cPar.n_vc = n_vc;
- return;
+ return;
}
-void VC::WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec,
- const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
- const gsl_matrix *Svar_mat) {
- string file_str;
- file_str=path_out+"/"+file_out;
- file_str+=".qvec.txt";
-
- ofstream outfile_q (file_str.c_str(), ofstream::out);
- if (!outfile_q) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- for (size_t i=0; i<s_vec->size; i++) {
- outfile_q<<gsl_vector_get(s_vec, i)<<endl;
- }
- for (size_t i=0; i<q_vec->size; i++) {
- outfile_q<<gsl_vector_get(q_vec, i)<<endl;
- }
- for (size_t i=0; i<qvar_vec->size; i++) {
- outfile_q<<gsl_vector_get(qvar_vec, i)<<endl;
- }
-
- outfile_q.clear();
- outfile_q.close();
-
- file_str=path_out+"/"+file_out;
- file_str+=".smat.txt";
-
- ofstream outfile_s (file_str.c_str(), ofstream::out);
- if (!outfile_s) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
- return;
- }
-
- for (size_t i=0; i<S_mat->size1; i++) {
- for (size_t j=0; j<S_mat->size2; j++) {
- outfile_s<<gsl_matrix_get(S_mat, i, j)<<"\t";
- }
- outfile_s<<endl;
- }
- for (size_t i=0; i<Svar_mat->size1; i++) {
- for (size_t j=0; j<Svar_mat->size2; j++) {
- outfile_s<<gsl_matrix_get(Svar_mat, i, j)<<"\t";
- }
- outfile_s<<endl;
- }
-
- outfile_s.clear();
- outfile_s.close();
-
- return;
+void VC::WriteFile_qs(const gsl_vector *s_vec, const gsl_vector *q_vec,
+ const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
+ const gsl_matrix *Svar_mat) {
+ string file_str;
+ file_str = path_out + "/" + file_out;
+ file_str += ".qvec.txt";
+
+ ofstream outfile_q(file_str.c_str(), ofstream::out);
+ if (!outfile_q) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ for (size_t i = 0; i < s_vec->size; i++) {
+ outfile_q << gsl_vector_get(s_vec, i) << endl;
+ }
+ for (size_t i = 0; i < q_vec->size; i++) {
+ outfile_q << gsl_vector_get(q_vec, i) << endl;
+ }
+ for (size_t i = 0; i < qvar_vec->size; i++) {
+ outfile_q << gsl_vector_get(qvar_vec, i) << endl;
+ }
+
+ outfile_q.clear();
+ outfile_q.close();
+
+ file_str = path_out + "/" + file_out;
+ file_str += ".smat.txt";
+
+ ofstream outfile_s(file_str.c_str(), ofstream::out);
+ if (!outfile_s) {
+ cout << "error writing file: " << file_str.c_str() << endl;
+ return;
+ }
+
+ for (size_t i = 0; i < S_mat->size1; i++) {
+ for (size_t j = 0; j < S_mat->size2; j++) {
+ outfile_s << gsl_matrix_get(S_mat, i, j) << "\t";
+ }
+ outfile_s << endl;
+ }
+ for (size_t i = 0; i < Svar_mat->size1; i++) {
+ for (size_t j = 0; j < Svar_mat->size2; j++) {
+ outfile_s << gsl_matrix_get(Svar_mat, i, j) << "\t";
+ }
+ outfile_s << endl;
+ }
+
+ outfile_s.clear();
+ outfile_s.close();
+
+ return;
}
-void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
- size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1, n_cvt=(p->W)->size2;
+void UpdateParam(const gsl_vector *log_sigma2, VC_PARAM *p) {
+ size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1, n_cvt = (p->W)->size2;
- gsl_matrix *K_temp=gsl_matrix_alloc(n1, n1);
- gsl_matrix *HiW=gsl_matrix_alloc(n1, n_cvt);
- gsl_matrix *WtHiW=gsl_matrix_alloc(n_cvt, n_cvt);
- gsl_matrix *WtHiWi=gsl_matrix_alloc(n_cvt, n_cvt);
- gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1);
+ gsl_matrix *K_temp = gsl_matrix_alloc(n1, n1);
+ gsl_matrix *HiW = gsl_matrix_alloc(n1, n_cvt);
+ gsl_matrix *WtHiW = gsl_matrix_alloc(n_cvt, n_cvt);
+ gsl_matrix *WtHiWi = gsl_matrix_alloc(n_cvt, n_cvt);
+ gsl_matrix *WtHiWiWtHi = gsl_matrix_alloc(n_cvt, n1);
double sigma2;
// Calculate H = \sum_i^{k+1} \sigma_i^2 K_i.
- gsl_matrix_set_zero (p->P);
- for (size_t i=0; i<n_vc+1; i++) {
- if (i==n_vc) {
- gsl_matrix_set_identity (K_temp);
+ gsl_matrix_set_zero(p->P);
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ if (i == n_vc) {
+ gsl_matrix_set_identity(K_temp);
} else {
- gsl_matrix_const_view K_sub=
- gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
- gsl_matrix_memcpy (K_temp, &K_sub.matrix);
+ gsl_matrix_const_view K_sub =
+ gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1);
+ gsl_matrix_memcpy(K_temp, &K_sub.matrix);
}
// When unconstrained, update on sigma2 instead of log_sigma2.
if (p->noconstrain) {
- sigma2=gsl_vector_get (log_sigma2, i);
+ sigma2 = gsl_vector_get(log_sigma2, i);
} else {
- sigma2=exp(gsl_vector_get (log_sigma2, i) );
+ sigma2 = exp(gsl_vector_get(log_sigma2, i));
}
gsl_matrix_scale(K_temp, sigma2);
- gsl_matrix_add (p->P, K_temp);
+ gsl_matrix_add(p->P, K_temp);
}
// Calculate H^{-1}.
eigenlib_invert(p->P);
- eigenlib_dgemm ("N", "N", 1.0, p->P, p->W, 0.0, HiW);
- eigenlib_dgemm ("T", "N", 1.0, p->W, HiW, 0.0, WtHiW);
+ eigenlib_dgemm("N", "N", 1.0, p->P, p->W, 0.0, HiW);
+ eigenlib_dgemm("T", "N", 1.0, p->W, HiW, 0.0, WtHiW);
eigenlib_invert(WtHiW);
gsl_matrix_memcpy(WtHiWi, WtHiW);
- eigenlib_dgemm ("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
- eigenlib_dgemm ("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
+ eigenlib_dgemm("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
+ eigenlib_dgemm("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
// Calculate Py, KPy, PKPy.
gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py);
double d;
- for (size_t i=0; i<n_vc+1; i++) {
- gsl_vector_view KPy=gsl_matrix_column (p->KPy_mat, i);
- gsl_vector_view PKPy=gsl_matrix_column (p->PKPy_mat, i);
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ gsl_vector_view KPy = gsl_matrix_column(p->KPy_mat, i);
+ gsl_vector_view PKPy = gsl_matrix_column(p->PKPy_mat, i);
- if (i==n_vc) {
- gsl_vector_memcpy (&KPy.vector, p->Py);
+ if (i == n_vc) {
+ gsl_vector_memcpy(&KPy.vector, p->Py);
} else {
- gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
+ gsl_matrix_const_view K_sub =
+ gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1);
// Seems to be important to use gsl dgemv here instead of
// eigenlib_dgemv; otherwise.
- gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0,
- &KPy.vector);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector);
}
gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector);
@@ -233,64 +233,64 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
// When phenotypes are not normalized well, then some values in
// the following matrix maybe NaN; change that to 0; this seems to
// only happen when eigenlib_dgemv was used above.
- for (size_t j=0; j<p->KPy_mat->size1; j++) {
- d=gsl_matrix_get (p->KPy_mat, j, i);
+ for (size_t j = 0; j < p->KPy_mat->size1; j++) {
+ d = gsl_matrix_get(p->KPy_mat, j, i);
if (std::isnan(d)) {
- gsl_matrix_set (p->KPy_mat, j, i, 0);
- cout<<"nan appears in "<<i<<" "<<j<<endl;
+ gsl_matrix_set(p->KPy_mat, j, i, 0);
+ cout << "nan appears in " << i << " " << j << endl;
}
- d=gsl_matrix_get (p->PKPy_mat, j, i);
+ d = gsl_matrix_get(p->PKPy_mat, j, i);
if (std::isnan(d)) {
- gsl_matrix_set (p->PKPy_mat, j, i, 0);
- cout<<"nan appears in "<<i<<" "<<j<<endl;
+ gsl_matrix_set(p->PKPy_mat, j, i, 0);
+ cout << "nan appears in " << i << " " << j << endl;
}
}
}
- gsl_matrix_free (K_temp);
- gsl_matrix_free (HiW);
- gsl_matrix_free (WtHiW);
- gsl_matrix_free (WtHiWi);
- gsl_matrix_free (WtHiWiWtHi);
+ gsl_matrix_free(K_temp);
+ gsl_matrix_free(HiW);
+ gsl_matrix_free(WtHiW);
+ gsl_matrix_free(WtHiWi);
+ gsl_matrix_free(WtHiWiWtHi);
return;
}
// Below are functions for AI algorithm.
-int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) {
- VC_PARAM *p=(VC_PARAM *) params;
+int LogRL_dev1(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) {
+ VC_PARAM *p = (VC_PARAM *)params;
- size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1;
+ size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1;
double tr, d;
// Update parameters.
- UpdateParam (log_sigma2, p);
+ UpdateParam(log_sigma2, p);
// Calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy.
- for (size_t i=0; i<n_vc+1; i++) {
- if (i==n_vc) {
- tr=0;
- for (size_t l=0; l<n1; l++) {
- tr+=gsl_matrix_get (p->P, l, l);
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ if (i == n_vc) {
+ tr = 0;
+ for (size_t l = 0; l < n1; l++) {
+ tr += gsl_matrix_get(p->P, l, l);
}
} else {
- tr=0;
- for (size_t l=0; l<n1; l++) {
- gsl_vector_view P_row=gsl_matrix_row (p->P, l);
- gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l);
- gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
- tr+=d;
+ tr = 0;
+ for (size_t l = 0; l < n1; l++) {
+ gsl_vector_view P_row = gsl_matrix_row(p->P, l);
+ gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l);
+ gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
+ tr += d;
}
}
- gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
+ gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
if (p->noconstrain) {
- d=(-0.5*tr+0.5*d);
+ d = (-0.5 * tr + 0.5 * d);
} else {
- d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i));
+ d = (-0.5 * tr + 0.5 * d) * exp(gsl_vector_get(log_sigma2, i));
}
gsl_vector_set(dev1, i, d);
@@ -299,324 +299,354 @@ int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) {
return GSL_SUCCESS;
}
-int LogRL_dev2 (const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) {
- VC_PARAM *p=(VC_PARAM *) params;
+int LogRL_dev2(const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) {
+ VC_PARAM *p = (VC_PARAM *)params;
- size_t n_vc=log_sigma2->size-1;
+ size_t n_vc = log_sigma2->size - 1;
double d, sigma2_i, sigma2_j;
// Update parameters.
- UpdateParam (log_sigma2, p);
+ UpdateParam(log_sigma2, p);
// Calculate dev2 = 0.5(yPKPKPy).
- for (size_t i=0; i<n_vc+1; i++) {
- gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
if (p->noconstrain) {
- sigma2_i=gsl_vector_get(log_sigma2, i);
+ sigma2_i = gsl_vector_get(log_sigma2, i);
} else {
- sigma2_i=exp(gsl_vector_get(log_sigma2, i));
+ sigma2_i = exp(gsl_vector_get(log_sigma2, i));
}
- for (size_t j=i; j<n_vc+1; j++) {
- gsl_vector_view PKPy_j=gsl_matrix_column (p->PKPy_mat, j);
+ for (size_t j = i; j < n_vc + 1; j++) {
+ gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j);
gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
if (p->noconstrain) {
- sigma2_j=gsl_vector_get(log_sigma2, j);
- d*=-0.5;
+ sigma2_j = gsl_vector_get(log_sigma2, j);
+ d *= -0.5;
} else {
- sigma2_j=exp(gsl_vector_get(log_sigma2, j));
- d*=-0.5*sigma2_i*sigma2_j;
+ sigma2_j = exp(gsl_vector_get(log_sigma2, j));
+ d *= -0.5 * sigma2_i * sigma2_j;
}
gsl_matrix_set(dev2, i, j, d);
- if (j!=i) {gsl_matrix_set(dev2, j, i, d);}
+ if (j != i) {
+ gsl_matrix_set(dev2, j, i, d);
+ }
}
}
- gsl_matrix_memcpy (p->Hessian, dev2);
+ gsl_matrix_memcpy(p->Hessian, dev2);
return GSL_SUCCESS;
}
-int LogRL_dev12 (const gsl_vector *log_sigma2, void *params,
- gsl_vector *dev1, gsl_matrix *dev2) {
- VC_PARAM *p=(VC_PARAM *) params;
+int LogRL_dev12(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1,
+ gsl_matrix *dev2) {
+ VC_PARAM *p = (VC_PARAM *)params;
- size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1;
+ size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1;
double tr, d, sigma2_i, sigma2_j;
// Update parameters.
- UpdateParam (log_sigma2, p);
+ UpdateParam(log_sigma2, p);
- for (size_t i=0; i<n_vc+1; i++) {
- if (i==n_vc) {
- tr=0;
- for (size_t l=0; l<n1; l++) {
- tr+=gsl_matrix_get (p->P, l, l);
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ if (i == n_vc) {
+ tr = 0;
+ for (size_t l = 0; l < n1; l++) {
+ tr += gsl_matrix_get(p->P, l, l);
}
} else {
- tr=0;
- for (size_t l=0; l<n1; l++) {
- gsl_vector_view P_row=gsl_matrix_row (p->P, l);
- gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l);
- gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
- tr+=d;
+ tr = 0;
+ for (size_t l = 0; l < n1; l++) {
+ gsl_vector_view P_row = gsl_matrix_row(p->P, l);
+ gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l);
+ gsl_blas_ddot(&P_row.vector, &K_col.vector, &d);
+ tr += d;
}
}
- gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
+ gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i);
gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
if (p->noconstrain) {
- sigma2_i=gsl_vector_get(log_sigma2, i);
- d=(-0.5*tr+0.5*d);
+ sigma2_i = gsl_vector_get(log_sigma2, i);
+ d = (-0.5 * tr + 0.5 * d);
} else {
- sigma2_i=exp(gsl_vector_get(log_sigma2, i));
- d=(-0.5*tr+0.5*d)*sigma2_i;
+ sigma2_i = exp(gsl_vector_get(log_sigma2, i));
+ d = (-0.5 * tr + 0.5 * d) * sigma2_i;
}
gsl_vector_set(dev1, i, d);
- for (size_t j=i; j<n_vc+1; j++) {
- gsl_vector_view PKPy_j=gsl_matrix_column (p->PKPy_mat, j);
+ for (size_t j = i; j < n_vc + 1; j++) {
+ gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j);
gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
if (p->noconstrain) {
- sigma2_j=gsl_vector_get(log_sigma2, j);
- d*=-0.5;
+ sigma2_j = gsl_vector_get(log_sigma2, j);
+ d *= -0.5;
} else {
- sigma2_j=exp(gsl_vector_get(log_sigma2, j));
- d*=-0.5*sigma2_i*sigma2_j;
+ sigma2_j = exp(gsl_vector_get(log_sigma2, j));
+ d *= -0.5 * sigma2_i * sigma2_j;
}
gsl_matrix_set(dev2, i, j, d);
- if (j!=i) {gsl_matrix_set(dev2, j, i, d);}
+ if (j != i) {
+ gsl_matrix_set(dev2, j, i, d);
+ }
}
-
}
- gsl_matrix_memcpy (p->Hessian, dev2);
+ gsl_matrix_memcpy(p->Hessian, dev2);
return GSL_SUCCESS;
}
// Read header to determine which column contains which item.
-bool ReadHeader_vc (const string &line, HEADER &header) {
- string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID",
- "rsid","RSID"};
- set<string> rs_set(rs_ptr, rs_ptr+10);
- string chr_ptr[]={"chr","CHR"};
- set<string> chr_set(chr_ptr, chr_ptr+2);
- string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION",
- "bp", "BP"};
- set<string> pos_set(pos_ptr, pos_ptr+8);
- string cm_ptr[]={"cm","CM"};
- set<string> cm_set(cm_ptr, cm_ptr+2);
- string a1_ptr[]={"a1","A1","allele1","ALLELE1"};
- set<string> a1_set(a1_ptr, a1_ptr+4);
- string a0_ptr[]={"a0","A0","allele0","ALLELE0"};
- set<string> a0_set(a0_ptr, a0_ptr+4);
-
- string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
- set<string> z_set(z_ptr, z_ptr+6);
- string beta_ptr[]={"beta","BETA","b","B"};
- set<string> beta_set(beta_ptr, beta_ptr+4);
- string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"};
- set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4);
- string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"};
- set<string> chisq_set(chisq_ptr, chisq_ptr+4);
- string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"};
- set<string> p_set(p_ptr, p_ptr+6);
-
- string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"};
- set<string> n_set(n_ptr, n_ptr+6);
- string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"};
- set<string> nmis_set(nmis_ptr, nmis_ptr+6);
- string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"};
- set<string> nobs_set(nobs_ptr, nobs_ptr+4);
-
- string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq",
- "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"};
- set<string> af_set(af_ptr, af_ptr+10);
- string var_ptr[]={"var","VAR"};
- set<string> var_set(var_ptr, var_ptr+2);
-
- string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"};
- set<string> ws_set(ws_ptr, ws_ptr+4);
- string cor_ptr[]={"cor","COR","r","R"};
- set<string> cor_set(cor_ptr, cor_ptr+4);
-
- header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0;
- header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0;
- header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0;
- header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0;
- header.cor_col=0; header.coln=0;
+bool ReadHeader_vc(const string &line, HEADER &header) {
+ string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps",
+ "SNPS", "snpid", "SNPID", "rsid", "RSID"};
+ set<string> rs_set(rs_ptr, rs_ptr + 10);
+ string chr_ptr[] = {"chr", "CHR"};
+ set<string> chr_set(chr_ptr, chr_ptr + 2);
+ string pos_ptr[] = {
+ "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"};
+ set<string> pos_set(pos_ptr, pos_ptr + 8);
+ string cm_ptr[] = {"cm", "CM"};
+ set<string> cm_set(cm_ptr, cm_ptr + 2);
+ string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1"};
+ set<string> a1_set(a1_ptr, a1_ptr + 4);
+ string a0_ptr[] = {"a0", "A0", "allele0", "ALLELE0"};
+ set<string> a0_set(a0_ptr, a0_ptr + 4);
+
+ string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"};
+ set<string> z_set(z_ptr, z_ptr + 6);
+ string beta_ptr[] = {"beta", "BETA", "b", "B"};
+ set<string> beta_set(beta_ptr, beta_ptr + 4);
+ string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"};
+ set<string> sebeta_set(sebeta_ptr, sebeta_ptr + 4);
+ string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"};
+ set<string> chisq_set(chisq_ptr, chisq_ptr + 4);
+ string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"};
+ set<string> p_set(p_ptr, p_ptr + 6);
+
+ string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"};
+ set<string> n_set(n_ptr, n_ptr + 6);
+ string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"};
+ set<string> nmis_set(nmis_ptr, nmis_ptr + 6);
+ string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"};
+ set<string> nobs_set(nobs_ptr, nobs_ptr + 4);
+
+ string af_ptr[] = {"af",
+ "AF",
+ "maf",
+ "MAF",
+ "f",
+ "F",
+ "allele_freq",
+ "ALLELE_FREQ",
+ "allele_frequency",
+ "ALLELE_FREQUENCY"};
+ set<string> af_set(af_ptr, af_ptr + 10);
+ string var_ptr[] = {"var", "VAR"};
+ set<string> var_set(var_ptr, var_ptr + 2);
+
+ string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"};
+ set<string> ws_set(ws_ptr, ws_ptr + 4);
+ string cor_ptr[] = {"cor", "COR", "r", "R"};
+ set<string> cor_set(cor_ptr, cor_ptr + 4);
+
+ header.rs_col = 0;
+ header.chr_col = 0;
+ header.pos_col = 0;
+ header.a1_col = 0;
+ header.a0_col = 0;
+ header.z_col = 0;
+ header.beta_col = 0;
+ header.sebeta_col = 0;
+ header.chisq_col = 0;
+ header.p_col = 0;
+ header.n_col = 0;
+ header.nmis_col = 0;
+ header.nobs_col = 0;
+ header.af_col = 0;
+ header.var_col = 0;
+ header.ws_col = 0;
+ header.cor_col = 0;
+ header.coln = 0;
char *ch_ptr;
string type;
- size_t n_error=0;
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- while (ch_ptr!=NULL) {
- type=ch_ptr;
- if (rs_set.count(type)!=0) {
- if (header.rs_col==0) {
- header.rs_col=header.coln+1;
+ size_t n_error = 0;
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ while (ch_ptr != NULL) {
+ type = ch_ptr;
+ if (rs_set.count(type) != 0) {
+ if (header.rs_col == 0) {
+ header.rs_col = header.coln + 1;
} else {
- cout<<"error! more than two rs columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two rs columns in the file." << endl;
+ n_error++;
}
- } else if (chr_set.count(type)!=0) {
- if (header.chr_col==0) {
- header.chr_col=header.coln+1;
+ } else if (chr_set.count(type) != 0) {
+ if (header.chr_col == 0) {
+ header.chr_col = header.coln + 1;
} else {
- cout<<"error! more than two chr columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two chr columns in the file." << endl;
+ n_error++;
}
- } else if (pos_set.count(type)!=0) {
- if (header.pos_col==0) {
- header.pos_col=header.coln+1;
+ } else if (pos_set.count(type) != 0) {
+ if (header.pos_col == 0) {
+ header.pos_col = header.coln + 1;
} else {
- cout<<"error! more than two pos columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two pos columns in the file." << endl;
+ n_error++;
}
- } else if (cm_set.count(type)!=0) {
- if (header.cm_col==0) {
- header.cm_col=header.coln+1;
+ } else if (cm_set.count(type) != 0) {
+ if (header.cm_col == 0) {
+ header.cm_col = header.coln + 1;
} else {
- cout<<"error! more than two cm columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two cm columns in the file." << endl;
+ n_error++;
}
- } else if (a1_set.count(type)!=0) {
- if (header.a1_col==0) {
- header.a1_col=header.coln+1;
+ } else if (a1_set.count(type) != 0) {
+ if (header.a1_col == 0) {
+ header.a1_col = header.coln + 1;
} else {
- cout<<"error! more than two allele1 columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two allele1 columns in the file." << endl;
+ n_error++;
}
- } else if (a0_set.count(type)!=0) {
- if (header.a0_col==0) {
- header.a0_col=header.coln+1;
+ } else if (a0_set.count(type) != 0) {
+ if (header.a0_col == 0) {
+ header.a0_col = header.coln + 1;
} else {
- cout<<"error! more than two allele0 columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two allele0 columns in the file." << endl;
+ n_error++;
}
- } else if (z_set.count(type)!=0) {
- if (header.z_col==0) {
- header.z_col=header.coln+1;
+ } else if (z_set.count(type) != 0) {
+ if (header.z_col == 0) {
+ header.z_col = header.coln + 1;
} else {
- cout<<"error! more than two z columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two z columns in the file." << endl;
+ n_error++;
}
- } else if (beta_set.count(type)!=0) {
- if (header.beta_col==0) {
- header.beta_col=header.coln+1;
+ } else if (beta_set.count(type) != 0) {
+ if (header.beta_col == 0) {
+ header.beta_col = header.coln + 1;
} else {
- cout<<"error! more than two beta columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two beta columns in the file." << endl;
+ n_error++;
}
- } else if (sebeta_set.count(type)!=0) {
- if (header.sebeta_col==0) {
- header.sebeta_col=header.coln+1;
+ } else if (sebeta_set.count(type) != 0) {
+ if (header.sebeta_col == 0) {
+ header.sebeta_col = header.coln + 1;
} else {
- cout<<"error! more than two se_beta columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two se_beta columns in the file." << endl;
+ n_error++;
}
- } else if (chisq_set.count(type)!=0) {
- if (header.chisq_col==0) {
- header.chisq_col=header.coln+1;
+ } else if (chisq_set.count(type) != 0) {
+ if (header.chisq_col == 0) {
+ header.chisq_col = header.coln + 1;
} else {
- cout<<"error! more than two z columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two z columns in the file." << endl;
+ n_error++;
}
- } else if (p_set.count(type)!=0) {
- if (header.p_col==0) {
- header.p_col=header.coln+1;
+ } else if (p_set.count(type) != 0) {
+ if (header.p_col == 0) {
+ header.p_col = header.coln + 1;
} else {
- cout<<"error! more than two p columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two p columns in the file." << endl;
+ n_error++;
}
- } else if (n_set.count(type)!=0) {
- if (header.n_col==0) {
- header.n_col=header.coln+1;
+ } else if (n_set.count(type) != 0) {
+ if (header.n_col == 0) {
+ header.n_col = header.coln + 1;
} else {
- cout<<"error! more than two n_total columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two n_total columns in the file." << endl;
+ n_error++;
}
- } else if (nmis_set.count(type)!=0) {
- if (header.nmis_col==0) {
- header.nmis_col=header.coln+1;
+ } else if (nmis_set.count(type) != 0) {
+ if (header.nmis_col == 0) {
+ header.nmis_col = header.coln + 1;
} else {
- cout<<"error! more than two n_mis columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two n_mis columns in the file." << endl;
+ n_error++;
}
- } else if (nobs_set.count(type)!=0) {
- if (header.nobs_col==0) {
- header.nobs_col=header.coln+1;
+ } else if (nobs_set.count(type) != 0) {
+ if (header.nobs_col == 0) {
+ header.nobs_col = header.coln + 1;
} else {
- cout<<"error! more than two n_obs columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two n_obs columns in the file." << endl;
+ n_error++;
}
- } else if (ws_set.count(type)!=0) {
- if (header.ws_col==0) {
- header.ws_col=header.coln+1;
+ } else if (ws_set.count(type) != 0) {
+ if (header.ws_col == 0) {
+ header.ws_col = header.coln + 1;
} else {
- cout<<"error! more than two window_size columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two window_size columns in the file." << endl;
+ n_error++;
}
- } else if (af_set.count(type)!=0) {
- if (header.af_col==0) {
- header.af_col=header.coln+1;
+ } else if (af_set.count(type) != 0) {
+ if (header.af_col == 0) {
+ header.af_col = header.coln + 1;
} else {
- cout<<"error! more than two af columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two af columns in the file." << endl;
+ n_error++;
}
- } else if (cor_set.count(type)!=0) {
- if (header.cor_col==0) {
- header.cor_col=header.coln+1;
+ } else if (cor_set.count(type) != 0) {
+ if (header.cor_col == 0) {
+ header.cor_col = header.coln + 1;
} else {
- cout<<"error! more than two cor columns in the file."<<endl;
- n_error++;
+ cout << "error! more than two cor columns in the file." << endl;
+ n_error++;
}
- } else {}
+ } else {
+ }
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
header.coln++;
}
- if (header.cor_col!=0 && header.cor_col!=header.coln) {
- cout<<"error! the cor column should be the last column."<<endl;
+ if (header.cor_col != 0 && header.cor_col != header.coln) {
+ cout << "error! the cor column should be the last column." << endl;
n_error++;
}
- if (header.rs_col==0) {
- if (header.chr_col!=0 && header.pos_col!=0) {
- cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl;
+ if (header.rs_col == 0) {
+ if (header.chr_col != 0 && header.pos_col != 0) {
+ cout << "missing an rs column. rs id will be replaced by chr:pos" << endl;
} else {
- cout<<"error! missing an rs column."<<endl; n_error++;
+ cout << "error! missing an rs column." << endl;
+ n_error++;
}
}
- if (n_error==0) {return true;} else {return false;}
+ if (n_error == 0) {
+ return true;
+ } else {
+ return false;
+ }
}
// Read cov file the first time, record mapRS2in, mapRS2var (in case
// var is not provided in the z file), store vec_n and vec_rs.
-void ReadFile_cor (const string &file_cor, const set<string> &setSnps,
- vector<string> &vec_rs, vector<size_t> &vec_n,
- vector<double> &vec_cm, vector<double> &vec_bp,
- map<string, size_t> &mapRS2in, map<string,
- double> &mapRS2var) {
+void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
+ vector<string> &vec_rs, vector<size_t> &vec_n,
+ vector<double> &vec_cm, vector<double> &vec_bp,
+ map<string, size_t> &mapRS2in,
+ map<string, double> &mapRS2var) {
vec_rs.clear();
vec_n.clear();
mapRS2in.clear();
mapRS2var.clear();
- igzstream infile (file_cor.c_str(), igzstream::in);
+ igzstream infile(file_cor.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open cov file: "<<file_cor<<endl;
+ cout << "error! fail to open cov file: " << file_cor << endl;
return;
}
@@ -624,88 +654,124 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps,
char *ch_ptr;
string rs, chr, a1, a0, pos, cm;
- double af=0, var_x=0, d_pos, d_cm;
- size_t n_total=0, n_mis=0, n_obs=0, ni_total=0;
- size_t ns_test=0, ns_total=0;
+ double af = 0, var_x = 0, d_pos, d_cm;
+ size_t n_total = 0, n_mis = 0, n_obs = 0, ni_total = 0;
+ size_t ns_test = 0, ns_total = 0;
HEADER header;
// Header.
!safeGetline(infile, line).eof();
- ReadHeader_vc (line, header);
+ ReadHeader_vc(line, header);
- if (header.n_col==0 ) {
- if (header.nobs_col==0 && header.nmis_col==0) {
- cout<<"error! missing sample size in the cor file."<<endl;
+ if (header.n_col == 0) {
+ if (header.nobs_col == 0 && header.nmis_col == 0) {
+ cout << "error! missing sample size in the cor file." << endl;
} else {
- cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+ cout << "total sample size will be replaced by obs/mis sample size."
+ << endl;
}
}
while (!safeGetline(infile, line).eof()) {
- //do not read cor values this time; upto col_n-1.
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
- n_total=0; n_mis=0; n_obs=0; af=0; var_x=0; d_cm=0; d_pos=0;
- for (size_t i=0; i<header.coln-1; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
- if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
- if (header.pos_col!=0 && header.pos_col==i+1) {
- pos=ch_ptr; d_pos=atof(ch_ptr);
+ // do not read cor values this time; upto col_n-1.
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+ n_total = 0;
+ n_mis = 0;
+ n_obs = 0;
+ af = 0;
+ var_x = 0;
+ d_cm = 0;
+ d_pos = 0;
+ for (size_t i = 0; i < header.coln - 1; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ }
+ if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ }
+ if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ d_pos = atof(ch_ptr);
}
- if (header.cm_col!=0 && header.cm_col==i+1) {
- cm=ch_ptr; d_cm=atof(ch_ptr);
+ if (header.cm_col != 0 && header.cm_col == i + 1) {
+ cm = ch_ptr;
+ d_cm = atof(ch_ptr);
+ }
+ if (header.a1_col != 0 && header.a1_col == i + 1) {
+ a1 = ch_ptr;
+ }
+ if (header.a0_col != 0 && header.a0_col == i + 1) {
+ a0 = ch_ptr;
}
- if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
- if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
- if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
- if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
- if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+ if (header.n_col != 0 && header.n_col == i + 1) {
+ n_total = atoi(ch_ptr);
+ }
+ if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+ n_mis = atoi(ch_ptr);
+ }
+ if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+ n_obs = atoi(ch_ptr);
+ }
- if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
- if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+ if (header.af_col != 0 && header.af_col == i + 1) {
+ af = atof(ch_ptr);
+ }
+ if (header.var_col != 0 && header.var_col == i + 1) {
+ var_x = atof(ch_ptr);
+ }
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
- if (header.rs_col==0) {
- rs=chr+":"+pos;
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
}
- if (header.n_col==0) {
- n_total=n_mis+n_obs;
+ if (header.n_col == 0) {
+ n_total = n_mis + n_obs;
}
// Record rs, n.
vec_rs.push_back(rs);
vec_n.push_back(n_total);
- if (d_cm>0) {vec_cm.push_back(d_cm);} else {vec_cm.push_back(d_cm);}
- if (d_pos>0) {vec_bp.push_back(d_pos);} else {vec_bp.push_back(d_pos);}
+ if (d_cm > 0) {
+ vec_cm.push_back(d_cm);
+ } else {
+ vec_cm.push_back(d_cm);
+ }
+ if (d_pos > 0) {
+ vec_bp.push_back(d_pos);
+ } else {
+ vec_bp.push_back(d_pos);
+ }
// Record mapRS2in and mapRS2var.
- if (setSnps.size()==0 || setSnps.count(rs)!=0) {
- if (mapRS2in.count(rs)==0) {
- mapRS2in[rs]=1;
+ if (setSnps.size() == 0 || setSnps.count(rs) != 0) {
+ if (mapRS2in.count(rs) == 0) {
+ mapRS2in[rs] = 1;
- if (header.var_col!=0) {
- mapRS2var[rs]=var_x;
- } else if (header.af_col!=0) {
- var_x=2.0*af*(1.0-af);
- mapRS2var[rs]=var_x;
- } else {}
+ if (header.var_col != 0) {
+ mapRS2var[rs] = var_x;
+ } else if (header.af_col != 0) {
+ var_x = 2.0 * af * (1.0 - af);
+ mapRS2var[rs] = var_x;
+ } else {
+ }
- ns_test++;
+ ns_test++;
} else {
- cout<<"error! more than one snp has the same id "<<rs<<
- " in cor file?"<<endl;
+ cout << "error! more than one snp has the same id " << rs
+ << " in cor file?" << endl;
}
}
// Record max pos.
- ni_total=max(ni_total, n_total);
+ ni_total = max(ni_total, n_total);
ns_total++;
}
@@ -717,19 +783,18 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps,
// Read beta file, store mapRS2var if var is provided here, calculate
// q and var_y.
-void ReadFile_beta (const bool flag_priorscale, const string &file_beta,
- const map<string, size_t> &mapRS2cat,
- map<string, size_t> &mapRS2in,
- map<string, double> &mapRS2var,
- map<string, size_t> &mapRS2nsamp,
- gsl_vector *q_vec, gsl_vector *qvar_vec,
- gsl_vector *s_vec, size_t &ni_total,
- size_t &ns_total) {
+void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
+ const map<string, size_t> &mapRS2cat,
+ map<string, size_t> &mapRS2in,
+ map<string, double> &mapRS2var,
+ map<string, size_t> &mapRS2nsamp, gsl_vector *q_vec,
+ gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total,
+ size_t &ns_total) {
mapRS2nsamp.clear();
- igzstream infile (file_beta.c_str(), igzstream::in);
+ igzstream infile(file_beta.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open beta file: "<<file_beta<<endl;
+ cout << "error! fail to open beta file: " << file_beta << endl;
return;
}
@@ -738,13 +803,15 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta,
string type;
string rs, chr, a1, a0, pos, cm;
- double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
- size_t n_total=0, n_mis=0, n_obs=0;
- size_t ns_test=0;
- ns_total=0; ni_total=0;
+ double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0,
+ af = 0, var_x = 0;
+ size_t n_total = 0, n_mis = 0, n_obs = 0;
+ size_t ns_test = 0;
+ ns_total = 0;
+ ni_total = 0;
vector<double> vec_q, vec_qvar, vec_s;
- for (size_t i=0; i<q_vec->size; i++) {
+ for (size_t i = 0; i < q_vec->size; i++) {
vec_q.push_back(0.0);
vec_qvar.push_back(0.0);
vec_s.push_back(0.0);
@@ -753,122 +820,166 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta,
// Read header.
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_vc (line, header);
+ ReadHeader_vc(line, header);
- if (header.n_col==0 ) {
- if (header.nobs_col==0 && header.nmis_col==0) {
- cout<<"error! missing sample size in the beta file."<<endl;
+ if (header.n_col == 0) {
+ if (header.nobs_col == 0 && header.nmis_col == 0) {
+ cout << "error! missing sample size in the beta file." << endl;
} else {
- cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+ cout << "total sample size will be replaced by obs/mis sample size."
+ << endl;
}
}
- if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) &&
- header.chisq_col==0 && header.p_col==0) {
- cout<<"error! missing z scores in the beta file."<<endl;
+ if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0) &&
+ header.chisq_col == 0 && header.p_col == 0) {
+ cout << "error! missing z scores in the beta file." << endl;
}
- if (header.af_col==0 && header.var_col==0 && mapRS2var.size()==0) {
- cout<<"error! missing allele frequency in the beta file."<<endl;
+ if (header.af_col == 0 && header.var_col == 0 && mapRS2var.size() == 0) {
+ cout << "error! missing allele frequency in the beta file." << endl;
}
while (!safeGetline(infile, line).eof()) {
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
- z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
- n_total=0; n_mis=0; n_obs=0; af=0; var_x=0;
- for (size_t i=0; i<header.coln; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
- if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
- if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
- if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
- if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
- if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+
+ z = 0;
+ beta = 0;
+ se_beta = 0;
+ chisq = 0;
+ pvalue = 0;
+ n_total = 0;
+ n_mis = 0;
+ n_obs = 0;
+ af = 0;
+ var_x = 0;
+ for (size_t i = 0; i < header.coln; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ }
+ if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ }
+ if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ }
+ if (header.cm_col != 0 && header.cm_col == i + 1) {
+ cm = ch_ptr;
+ }
+ if (header.a1_col != 0 && header.a1_col == i + 1) {
+ a1 = ch_ptr;
+ }
+ if (header.a0_col != 0 && header.a0_col == i + 1) {
+ a0 = ch_ptr;
+ }
- if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
- if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
- if (header.sebeta_col!=0 && header.sebeta_col==i+1) {
- se_beta=atof(ch_ptr);
+ if (header.z_col != 0 && header.z_col == i + 1) {
+ z = atof(ch_ptr);
+ }
+ if (header.beta_col != 0 && header.beta_col == i + 1) {
+ beta = atof(ch_ptr);
+ }
+ if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
+ se_beta = atof(ch_ptr);
+ }
+ if (header.chisq_col != 0 && header.chisq_col == i + 1) {
+ chisq = atof(ch_ptr);
+ }
+ if (header.p_col != 0 && header.p_col == i + 1) {
+ pvalue = atof(ch_ptr);
}
- if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
- if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
- if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
- if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
- if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+ if (header.n_col != 0 && header.n_col == i + 1) {
+ n_total = atoi(ch_ptr);
+ }
+ if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+ n_mis = atoi(ch_ptr);
+ }
+ if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+ n_obs = atoi(ch_ptr);
+ }
- if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
- if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+ if (header.af_col != 0 && header.af_col == i + 1) {
+ af = atof(ch_ptr);
+ }
+ if (header.var_col != 0 && header.var_col == i + 1) {
+ var_x = atof(ch_ptr);
+ }
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
- if (header.rs_col==0) {
- rs=chr+":"+pos;
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
}
- if (header.n_col==0) {
- n_total=n_mis+n_obs;
+ if (header.n_col == 0) {
+ n_total = n_mis + n_obs;
}
// Both z values and beta/se_beta have directions, while
// chisq/pvalue do not.
- if (header.z_col!=0) {
- zsquare=z*z;
- } else if (header.beta_col!=0 && header.sebeta_col!=0) {
- z=beta/se_beta;
- zsquare=z*z;
- } else if (header.chisq_col!=0) {
- zsquare=chisq;
- } else if (header.p_col!=0) {
- zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
- } else {zsquare=0;}
+ if (header.z_col != 0) {
+ zsquare = z * z;
+ } else if (header.beta_col != 0 && header.sebeta_col != 0) {
+ z = beta / se_beta;
+ zsquare = z * z;
+ } else if (header.chisq_col != 0) {
+ zsquare = chisq;
+ } else if (header.p_col != 0) {
+ zsquare = gsl_cdf_chisq_Qinv(pvalue, 1);
+ } else {
+ zsquare = 0;
+ }
// If the snp is also present in cor file, then do calculations.
- if ((header.var_col!=0 || header.af_col!=0 || mapRS2var.count(rs)!=0) &&
- mapRS2in.count(rs)!=0 &&
- (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
- if (mapRS2in.at(rs)>1) {
- cout<<"error! more than one snp has the same id "<<rs<<
- " in beta file?"<<endl;
- break;
+ if ((header.var_col != 0 || header.af_col != 0 ||
+ mapRS2var.count(rs) != 0) &&
+ mapRS2in.count(rs) != 0 &&
+ (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) {
+ if (mapRS2in.at(rs) > 1) {
+ cout << "error! more than one snp has the same id " << rs
+ << " in beta file?" << endl;
+ break;
}
- if (header.var_col==0) {
- if (header.af_col!=0) {
- var_x=2.0*af*(1.0-af);
- } else {
- var_x=mapRS2var.at(rs);
- }
+ if (header.var_col == 0) {
+ if (header.af_col != 0) {
+ var_x = 2.0 * af * (1.0 - af);
+ } else {
+ var_x = mapRS2var.at(rs);
+ }
}
- if (flag_priorscale) {var_x=1;}
+ if (flag_priorscale) {
+ var_x = 1;
+ }
mapRS2in[rs]++;
- mapRS2var[rs]=var_x;
- mapRS2nsamp[rs]=n_total;
-
- if (mapRS2cat.size()!=0) {
- vec_q[mapRS2cat.at(rs) ]+=(zsquare-1.0)*var_x/(double)n_total;
- vec_s[mapRS2cat.at(rs) ]+=var_x;
- vec_qvar[mapRS2cat.at(rs) ]+=
- var_x*var_x/((double)n_total*(double)n_total);
+ mapRS2var[rs] = var_x;
+ mapRS2nsamp[rs] = n_total;
+
+ if (mapRS2cat.size() != 0) {
+ vec_q[mapRS2cat.at(rs)] += (zsquare - 1.0) * var_x / (double)n_total;
+ vec_s[mapRS2cat.at(rs)] += var_x;
+ vec_qvar[mapRS2cat.at(rs)] +=
+ var_x * var_x / ((double)n_total * (double)n_total);
} else {
- vec_q[0]+=(zsquare-1.0)*var_x/(double)n_total;
- vec_s[0]+=var_x;
- vec_qvar[0]+=var_x*var_x/((double)n_total*(double)n_total);
+ vec_q[0] += (zsquare - 1.0) * var_x / (double)n_total;
+ vec_s[0] += var_x;
+ vec_qvar[0] += var_x * var_x / ((double)n_total * (double)n_total);
}
- ni_total=max(ni_total, n_total);
+ ni_total = max(ni_total, n_total);
ns_test++;
}
ns_total++;
}
- for (size_t i=0; i<q_vec->size; i++) {
+ for (size_t i = 0; i < q_vec->size; i++) {
gsl_vector_set(q_vec, i, vec_q[i]);
- gsl_vector_set(qvar_vec, i, 2.0*vec_qvar[i]);
+ gsl_vector_set(qvar_vec, i, 2.0 * vec_qvar[i]);
gsl_vector_set(s_vec, i, vec_s[i]);
}
@@ -882,21 +993,20 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta,
// Look for rs, n_mis+n_obs, var, window_size, cov.
// If window_cm/bp/ns is provided, then use these max values to
// calibrate estimates.
-void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
- const vector<size_t> &vec_n, const vector<double> &vec_cm,
- const vector<double> &vec_bp,
- const map<string, size_t> &mapRS2cat,
- const map<string, size_t> &mapRS2in,
- const map<string, double> &mapRS2var,
- const map<string, size_t> &mapRS2nsamp,
- const size_t crt, const double &window_cm,
- const double &window_bp, const double &window_ns,
- gsl_matrix *S_mat, gsl_matrix *Svar_mat,
- gsl_vector *qvar_vec, size_t &ni_total,
- size_t &ns_total, size_t &ns_test, size_t &ns_pair) {
- igzstream infile (file_cor.c_str(), igzstream::in);
+void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
+ const vector<size_t> &vec_n, const vector<double> &vec_cm,
+ const vector<double> &vec_bp,
+ const map<string, size_t> &mapRS2cat,
+ const map<string, size_t> &mapRS2in,
+ const map<string, double> &mapRS2var,
+ const map<string, size_t> &mapRS2nsamp, const size_t crt,
+ const double &window_cm, const double &window_bp,
+ const double &window_ns, gsl_matrix *S_mat,
+ gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total,
+ size_t &ns_total, size_t &ns_test, size_t &ns_pair) {
+ igzstream infile(file_cor.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open cov file: "<<file_cor<<endl;
+ cout << "error! fail to open cov file: " << file_cor << endl;
return;
}
@@ -905,172 +1015,192 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
string rs1, rs2;
double d1, d2, d3, cor, var1, var2;
- size_t n_nb, nsamp1, nsamp2, n12, bin_size=10, bin;
+ size_t n_nb, nsamp1, nsamp2, n12, bin_size = 10, bin;
- vector<vector<double> > mat_S, mat_Svar, mat_tmp;
+ vector<vector<double>> mat_S, mat_Svar, mat_tmp;
vector<double> vec_qvar, vec_tmp;
- vector<vector<vector<double> > > mat3d_Sbin;
+ vector<vector<vector<double>>> mat3d_Sbin;
- for (size_t i=0; i<S_mat->size1; i++) {
+ for (size_t i = 0; i < S_mat->size1; i++) {
vec_qvar.push_back(0.0);
}
- for (size_t i=0; i<S_mat->size1; i++) {
+ for (size_t i = 0; i < S_mat->size1; i++) {
mat_S.push_back(vec_qvar);
mat_Svar.push_back(vec_qvar);
}
- for (size_t k=0; k<bin_size; k++) {
+ for (size_t k = 0; k < bin_size; k++) {
vec_tmp.push_back(0.0);
}
- for (size_t i=0; i<S_mat->size1; i++) {
+ for (size_t i = 0; i < S_mat->size1; i++) {
mat_tmp.push_back(vec_tmp);
}
- for (size_t i=0; i<S_mat->size1; i++) {
+ for (size_t i = 0; i < S_mat->size1; i++) {
mat3d_Sbin.push_back(mat_tmp);
}
string rs, chr, a1, a0, type, pos, cm;
- size_t n_total=0, n_mis=0, n_obs=0;
+ size_t n_total = 0, n_mis = 0, n_obs = 0;
double d_pos1, d_pos2, d_pos, d_cm1, d_cm2, d_cm;
- ns_test=0; ns_total=0; ns_pair=0; ni_total=0;
+ ns_test = 0;
+ ns_total = 0;
+ ns_pair = 0;
+ ni_total = 0;
// Header.
HEADER header;
!safeGetline(infile, line).eof();
- ReadHeader_vc (line, header);
+ ReadHeader_vc(line, header);
while (!safeGetline(infile, line).eof()) {
// Do not read cor values this time; upto col_n-1.
- d_pos1=0; d_cm1=0;
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- for (size_t i=0; i<header.coln-1; i++) {
- if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
- if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
- if (header.pos_col!=0 && header.pos_col==i+1) {
- pos=ch_ptr;
- d_pos1=atof(ch_ptr);
+ d_pos1 = 0;
+ d_cm1 = 0;
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ for (size_t i = 0; i < header.coln - 1; i++) {
+ if (header.rs_col != 0 && header.rs_col == i + 1) {
+ rs = ch_ptr;
+ }
+ if (header.chr_col != 0 && header.chr_col == i + 1) {
+ chr = ch_ptr;
+ }
+ if (header.pos_col != 0 && header.pos_col == i + 1) {
+ pos = ch_ptr;
+ d_pos1 = atof(ch_ptr);
+ }
+ if (header.cm_col != 0 && header.cm_col == i + 1) {
+ cm = ch_ptr;
+ d_cm1 = atof(ch_ptr);
}
- if (header.cm_col!=0 && header.cm_col==i+1) {
- cm=ch_ptr;
- d_cm1=atof(ch_ptr);
+ if (header.a1_col != 0 && header.a1_col == i + 1) {
+ a1 = ch_ptr;
+ }
+ if (header.a0_col != 0 && header.a0_col == i + 1) {
+ a0 = ch_ptr;
}
- if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
- if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
- if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
- if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
- if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+ if (header.n_col != 0 && header.n_col == i + 1) {
+ n_total = atoi(ch_ptr);
+ }
+ if (header.nmis_col != 0 && header.nmis_col == i + 1) {
+ n_mis = atoi(ch_ptr);
+ }
+ if (header.nobs_col != 0 && header.nobs_col == i + 1) {
+ n_obs = atoi(ch_ptr);
+ }
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
}
- if (header.rs_col==0) {
- rs=chr+":"+pos;
+ if (header.rs_col == 0) {
+ rs = chr + ":" + pos;
}
- if (header.n_col==0) {
- n_total=n_mis+n_obs;
+ if (header.n_col == 0) {
+ n_total = n_mis + n_obs;
}
- rs1=rs;
-
- if ( (mapRS2cat.size()==0 || mapRS2cat.count(rs1)!=0) &&
- mapRS2in.count(rs1)!=0 && mapRS2in.at(rs1)==2) {
- var1=mapRS2var.at(rs1);
- nsamp1=mapRS2nsamp.at(rs1);
- d2=var1*var1;
-
- if (mapRS2cat.size()!=0) {
- mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ]+=
- (1-1.0/(double)vec_n[ns_total])*d2;
- mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ]+=
- d2*d2/((double)vec_n[ns_total]*(double)vec_n[ns_total]);
- if (crt==1) {
- mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ][0]+=
- (1-1.0/(double)vec_n[ns_total])*d2;
- }
+ rs1 = rs;
+
+ if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs1) != 0) &&
+ mapRS2in.count(rs1) != 0 && mapRS2in.at(rs1) == 2) {
+ var1 = mapRS2var.at(rs1);
+ nsamp1 = mapRS2nsamp.at(rs1);
+ d2 = var1 * var1;
+
+ if (mapRS2cat.size() != 0) {
+ mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] +=
+ (1 - 1.0 / (double)vec_n[ns_total]) * d2;
+ mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] +=
+ d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]);
+ if (crt == 1) {
+ mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)][0] +=
+ (1 - 1.0 / (double)vec_n[ns_total]) * d2;
+ }
} else {
- mat_S[0][0]+=(1-1.0/(double)vec_n[ns_total])*d2;
- mat_Svar[0][0]+=
- d2*d2/((double)vec_n[ns_total]*(double)vec_n[ns_total]);
- if (crt==1) {
- mat3d_Sbin[0][0][0]+=(1-1.0/(double)vec_n[ns_total])*d2;
- }
- }
-
- n_nb=0;
- while(ch_ptr!=NULL) {
- type=ch_ptr;
- if (type.compare("NA")!=0 && type.compare("na")!=0 &&
- type.compare("nan")!=0 && type.compare("-nan")!=0) {
- cor=atof(ch_ptr);
- rs2=vec_rs[ns_total+n_nb+1];
- d_pos2=vec_bp[ns_total+n_nb+1];
- d_cm2=vec_cm[ns_total+n_nb+1];
- d_pos=abs(d_pos2-d_pos1);
- d_cm=abs(d_cm2-d_cm1);
-
- if ( (mapRS2cat.size()==0 || mapRS2cat.count(rs2)!=0) &&
- mapRS2in.count(rs2)!=0 && mapRS2in.at(rs2)==2) {
- var2=mapRS2var.at(rs2);
- nsamp2=mapRS2nsamp.at(rs2);
- d1=cor*cor-1.0/(double)min(vec_n[ns_total],
- vec_n[ns_total+n_nb+1]);
- d2=var1*var2;
- d3=cor*cor/((double)nsamp1*(double)nsamp2);
- n12=min(vec_n[ns_total], vec_n[ns_total+n_nb+1]);
-
- // Compute bin.
- if (crt==1) {
- if (window_cm!=0 && d_cm1!=0 && d_cm2!=0) {
- bin=min( (int)floor(d_cm/window_cm*bin_size), (int)bin_size);
- } else if (window_bp!=0 && d_pos1!=0 && d_pos2!=0) {
- bin=min( (int)floor(d_pos/window_bp*bin_size), (int)bin_size);
- } else if (window_ns!=0) {
- bin=min( (int)floor(((double)n_nb+1)/window_ns*bin_size),
- (int)bin_size);
- }
- }
-
- if (mapRS2cat.size()!=0) {
- if (mapRS2cat.at(rs1)==mapRS2cat.at(rs2)) {
- vec_qvar[mapRS2cat.at(rs1)]+=2*d3*d2;
- mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=2*d1*d2;
- mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=
- 2*d2*d2/((double)n12*(double)n12);
- if (crt==1) {
- mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ][bin]+=
- 2*d1*d2;
- }
- } else {
- mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=d1*d2;
- mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=
- d2*d2/((double)n12*(double)n12);
- if (crt==1) {
- mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ][bin]+=
- d1*d2;
- }
- }
- } else {
- vec_qvar[0]+=2*d3*d2;
- mat_S[0][0]+=2*d1*d2;
- mat_Svar[0][0]+=2*d2*d2/((double)n12*(double)n12);
-
- if (crt==1) {
- mat3d_Sbin[0][0][bin]+=2*d1*d2;
- }
- }
- ns_pair++;
- }
- }
-
- ch_ptr=strtok (NULL, " , \t");
- n_nb++;
- }
- ni_total=max(ni_total, n_total);
+ mat_S[0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2;
+ mat_Svar[0][0] +=
+ d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]);
+ if (crt == 1) {
+ mat3d_Sbin[0][0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2;
+ }
+ }
+
+ n_nb = 0;
+ while (ch_ptr != NULL) {
+ type = ch_ptr;
+ if (type.compare("NA") != 0 && type.compare("na") != 0 &&
+ type.compare("nan") != 0 && type.compare("-nan") != 0) {
+ cor = atof(ch_ptr);
+ rs2 = vec_rs[ns_total + n_nb + 1];
+ d_pos2 = vec_bp[ns_total + n_nb + 1];
+ d_cm2 = vec_cm[ns_total + n_nb + 1];
+ d_pos = abs(d_pos2 - d_pos1);
+ d_cm = abs(d_cm2 - d_cm1);
+
+ if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs2) != 0) &&
+ mapRS2in.count(rs2) != 0 && mapRS2in.at(rs2) == 2) {
+ var2 = mapRS2var.at(rs2);
+ nsamp2 = mapRS2nsamp.at(rs2);
+ d1 = cor * cor -
+ 1.0 / (double)min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]);
+ d2 = var1 * var2;
+ d3 = cor * cor / ((double)nsamp1 * (double)nsamp2);
+ n12 = min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]);
+
+ // Compute bin.
+ if (crt == 1) {
+ if (window_cm != 0 && d_cm1 != 0 && d_cm2 != 0) {
+ bin =
+ min((int)floor(d_cm / window_cm * bin_size), (int)bin_size);
+ } else if (window_bp != 0 && d_pos1 != 0 && d_pos2 != 0) {
+ bin = min((int)floor(d_pos / window_bp * bin_size),
+ (int)bin_size);
+ } else if (window_ns != 0) {
+ bin = min((int)floor(((double)n_nb + 1) / window_ns * bin_size),
+ (int)bin_size);
+ }
+ }
+
+ if (mapRS2cat.size() != 0) {
+ if (mapRS2cat.at(rs1) == mapRS2cat.at(rs2)) {
+ vec_qvar[mapRS2cat.at(rs1)] += 2 * d3 * d2;
+ mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += 2 * d1 * d2;
+ mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] +=
+ 2 * d2 * d2 / ((double)n12 * (double)n12);
+ if (crt == 1) {
+ mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] +=
+ 2 * d1 * d2;
+ }
+ } else {
+ mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += d1 * d2;
+ mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] +=
+ d2 * d2 / ((double)n12 * (double)n12);
+ if (crt == 1) {
+ mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] +=
+ d1 * d2;
+ }
+ }
+ } else {
+ vec_qvar[0] += 2 * d3 * d2;
+ mat_S[0][0] += 2 * d1 * d2;
+ mat_Svar[0][0] += 2 * d2 * d2 / ((double)n12 * (double)n12);
+
+ if (crt == 1) {
+ mat3d_Sbin[0][0][bin] += 2 * d1 * d2;
+ }
+ }
+ ns_pair++;
+ }
+ }
+
+ ch_ptr = strtok(NULL, " , \t");
+ n_nb++;
+ }
+ ni_total = max(ni_total, n_total);
ns_test++;
}
@@ -1081,70 +1211,83 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
// x=seq(0.5,bin_size-0.5,by=1) and then compute a correlation
// factor as a percentage.
double a, b, x, y, n, var_y, var_x, mean_y, mean_x, cov_xy, crt_factor;
- if (crt==1) {
- for (size_t i=0; i<S_mat->size1; i++) {
- for (size_t j=i; j<S_mat->size2; j++) {
-
- // Correct mat_S.
- n=0; var_y=0; var_x=0; mean_y=0; mean_x=0; cov_xy=0;
- for (size_t k=0; k<bin_size; k++) {
- if (j==i) {
- y=mat3d_Sbin[i][j][k];
- } else {
- y=mat3d_Sbin[i][j][k]+mat3d_Sbin[j][i][k];
- }
- x=k+0.5;
- cout<<y<<", ";
- if (y>0) {
- y=1/sqrt(y);
- mean_x+=x; mean_y+=y; var_x+=x*x; var_y+=y*y; cov_xy+=x*y;
- n++;
- }
- }
- cout<<endl;
-
- if (n>=5) {
- mean_x/=n; mean_y/=n; var_x/=n; var_y/=n; cov_xy/=n;
- var_x-=mean_x*mean_x; var_y-=mean_y*mean_y; cov_xy-=mean_x*mean_y;
- b=cov_xy/var_x;
- a=mean_y-b*mean_x;
- crt_factor=a/(b*(bin_size+0.5))+1;
- if (i==j) {
- mat_S[i][j]*=crt_factor;
- } else {
- mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor;
- }
- cout<<crt_factor<<endl;
-
- // Correct qvar.
- if (i==j) {
- vec_qvar[i]*=crt_factor;
- }
- }
+ if (crt == 1) {
+ for (size_t i = 0; i < S_mat->size1; i++) {
+ for (size_t j = i; j < S_mat->size2; j++) {
+
+ // Correct mat_S.
+ n = 0;
+ var_y = 0;
+ var_x = 0;
+ mean_y = 0;
+ mean_x = 0;
+ cov_xy = 0;
+ for (size_t k = 0; k < bin_size; k++) {
+ if (j == i) {
+ y = mat3d_Sbin[i][j][k];
+ } else {
+ y = mat3d_Sbin[i][j][k] + mat3d_Sbin[j][i][k];
+ }
+ x = k + 0.5;
+ cout << y << ", ";
+ if (y > 0) {
+ y = 1 / sqrt(y);
+ mean_x += x;
+ mean_y += y;
+ var_x += x * x;
+ var_y += y * y;
+ cov_xy += x * y;
+ n++;
+ }
+ }
+ cout << endl;
+
+ if (n >= 5) {
+ mean_x /= n;
+ mean_y /= n;
+ var_x /= n;
+ var_y /= n;
+ cov_xy /= n;
+ var_x -= mean_x * mean_x;
+ var_y -= mean_y * mean_y;
+ cov_xy -= mean_x * mean_y;
+ b = cov_xy / var_x;
+ a = mean_y - b * mean_x;
+ crt_factor = a / (b * (bin_size + 0.5)) + 1;
+ if (i == j) {
+ mat_S[i][j] *= crt_factor;
+ } else {
+ mat_S[i][j] *= crt_factor;
+ mat_S[j][i] *= crt_factor;
+ }
+ cout << crt_factor << endl;
+
+ // Correct qvar.
+ if (i == j) {
+ vec_qvar[i] *= crt_factor;
+ }
+ }
}
}
}
// Save to gsl_vector and gsl_matrix: qvar_vec, S_mat, Svar_mat.
- for (size_t i=0; i<S_mat->size1; i++) {
- d1=gsl_vector_get(qvar_vec, i)+2*vec_qvar[i];
+ for (size_t i = 0; i < S_mat->size1; i++) {
+ d1 = gsl_vector_get(qvar_vec, i) + 2 * vec_qvar[i];
gsl_vector_set(qvar_vec, i, d1);
- for (size_t j=0; j<S_mat->size2; j++) {
- if (i==j) {
- gsl_matrix_set(S_mat, i, j, mat_S[i][i]);
- gsl_matrix_set(Svar_mat, i, j,
- 2.0*mat_Svar[i][i]*ns_test*ns_test/(2.0*ns_pair) );
+ for (size_t j = 0; j < S_mat->size2; j++) {
+ if (i == j) {
+ gsl_matrix_set(S_mat, i, j, mat_S[i][i]);
+ gsl_matrix_set(Svar_mat, i, j, 2.0 * mat_Svar[i][i] * ns_test *
+ ns_test / (2.0 * ns_pair));
} else {
- gsl_matrix_set(S_mat, i, j, mat_S[i][j]+mat_S[j][i]);
- gsl_matrix_set(Svar_mat, i, j,
- 2.0*(mat_Svar[i][j]+mat_Svar[j][i])*
- ns_test*ns_test/(2.0*ns_pair) );
+ gsl_matrix_set(S_mat, i, j, mat_S[i][j] + mat_S[j][i]);
+ gsl_matrix_set(Svar_mat, i, j, 2.0 * (mat_Svar[i][j] + mat_Svar[j][i]) *
+ ns_test * ns_test / (2.0 * ns_pair));
}
}
}
-
-
infile.clear();
infile.close();
@@ -1157,170 +1300,175 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
// compute the variance for S, use a set of genotypes, phenotypes, and
// individual ids, and snp category label.
void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
- const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
- const gsl_vector *s_vec, const double df,
- vector<double> &v_pve, vector<double> &v_se_pve,
- double &pve_total, double &se_pve_total,
- vector<double> &v_sigma2, vector<double> &v_se_sigma2,
- vector<double> &v_enrich, vector<double> &v_se_enrich) {
- size_t n_vc=S_mat->size1;
-
- gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
-
- gsl_vector *pve=gsl_vector_alloc (n_vc);
- gsl_vector *pve_plus=gsl_vector_alloc (n_vc+1);
- gsl_vector *tmp=gsl_vector_alloc (n_vc+1);
- gsl_vector *sigma2persnp=gsl_vector_alloc (n_vc);
- gsl_vector *enrich=gsl_vector_alloc (n_vc);
- gsl_vector *se_pve=gsl_vector_alloc (n_vc);
- gsl_vector *se_sigma2persnp=gsl_vector_alloc (n_vc);
- gsl_vector *se_enrich=gsl_vector_alloc (n_vc);
+ const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
+ const gsl_vector *s_vec, const double df, vector<double> &v_pve,
+ vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
+ vector<double> &v_sigma2, vector<double> &v_se_sigma2,
+ vector<double> &v_enrich, vector<double> &v_se_enrich) {
+ size_t n_vc = S_mat->size1;
+
+ gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
+
+ gsl_vector *pve = gsl_vector_alloc(n_vc);
+ gsl_vector *pve_plus = gsl_vector_alloc(n_vc + 1);
+ gsl_vector *tmp = gsl_vector_alloc(n_vc + 1);
+ gsl_vector *sigma2persnp = gsl_vector_alloc(n_vc);
+ gsl_vector *enrich = gsl_vector_alloc(n_vc);
+ gsl_vector *se_pve = gsl_vector_alloc(n_vc);
+ gsl_vector *se_sigma2persnp = gsl_vector_alloc(n_vc);
+ gsl_vector *se_enrich = gsl_vector_alloc(n_vc);
double d;
// Calculate S^{-1}q.
- gsl_matrix_memcpy (tmp_mat, S_mat);
+ gsl_matrix_memcpy(tmp_mat, S_mat);
int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
- LUDecomp (tmp_mat, pmt, &sig);
- LUInvert (tmp_mat, pmt, Si_mat);
+ gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
+ LUDecomp(tmp_mat, pmt, &sig);
+ LUInvert(tmp_mat, pmt, Si_mat);
// Calculate sigma2snp and pve.
- gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
gsl_vector_memcpy(sigma2persnp, pve);
gsl_vector_div(sigma2persnp, s_vec);
// Get qvar_mat.
- gsl_matrix_memcpy (qvar_mat, Vq);
- gsl_matrix_scale (qvar_mat, 1.0/(df*df));
+ gsl_matrix_memcpy(qvar_mat, Vq);
+ gsl_matrix_scale(qvar_mat, 1.0 / (df * df));
// Calculate variance for these estimates.
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=i; j<n_vc; j++) {
- d=gsl_matrix_get(Svar_mat, i, j);
- d*=gsl_vector_get(pve, i)*gsl_vector_get(pve, j);
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t j = i; j < n_vc; j++) {
+ d = gsl_matrix_get(Svar_mat, i, j);
+ d *= gsl_vector_get(pve, i) * gsl_vector_get(pve, j);
- d+=gsl_matrix_get(qvar_mat, i, j);
+ d += gsl_matrix_get(qvar_mat, i, j);
gsl_matrix_set(Var_mat, i, j, d);
- if (i!=j) {gsl_matrix_set(Var_mat, j, i, d);}
+ if (i != j) {
+ gsl_matrix_set(Var_mat, j, i, d);
+ }
}
}
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat,
- 0.0, tmp_mat);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat,
- 0.0, Var_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0,
+ tmp_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
+ Var_mat);
- for (size_t i=0; i<n_vc; i++) {
- d=sqrt(gsl_matrix_get(Var_mat, i, i));
+ for (size_t i = 0; i < n_vc; i++) {
+ d = sqrt(gsl_matrix_get(Var_mat, i, i));
gsl_vector_set(se_pve, i, d);
- d/=gsl_vector_get(s_vec, i);
+ d /= gsl_vector_get(s_vec, i);
gsl_vector_set(se_sigma2persnp, i, d);
}
// Compute pve_total, se_pve_total.
- pve_total=0; se_pve_total=0;
- for (size_t i=0; i<n_vc; i++) {
- pve_total+=gsl_vector_get(pve, i);
+ pve_total = 0;
+ se_pve_total = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ pve_total += gsl_vector_get(pve, i);
- for (size_t j=0; j<n_vc; j++) {
- se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+ for (size_t j = 0; j < n_vc; j++) {
+ se_pve_total += gsl_matrix_get(Var_mat, i, j);
}
}
- se_pve_total=sqrt(se_pve_total);
+ se_pve_total = sqrt(se_pve_total);
// Compute enrichment and its variance.
- double s_pve=0, s_snp=0;
- for (size_t i=0; i<n_vc; i++) {
- s_pve+=gsl_vector_get(pve, i);
- s_snp+=gsl_vector_get(s_vec, i);
+ double s_pve = 0, s_snp = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ s_pve += gsl_vector_get(pve, i);
+ s_snp += gsl_vector_get(s_vec, i);
}
- gsl_vector_memcpy (enrich, sigma2persnp);
- gsl_vector_scale (enrich, s_snp/s_pve);
+ gsl_vector_memcpy(enrich, sigma2persnp);
+ gsl_vector_scale(enrich, s_snp / s_pve);
gsl_matrix_set_identity(tmp_mat);
double d1;
- for (size_t i=0; i<n_vc; i++) {
- d=gsl_vector_get(pve, i)/s_pve;
- d1=gsl_vector_get(s_vec, i);
- for (size_t j=0; j<n_vc; j++) {
- if (i==j) {
- gsl_matrix_set(tmp_mat, i, j, (1-d)/d1*s_snp/s_pve);
+ for (size_t i = 0; i < n_vc; i++) {
+ d = gsl_vector_get(pve, i) / s_pve;
+ d1 = gsl_vector_get(s_vec, i);
+ for (size_t j = 0; j < n_vc; j++) {
+ if (i == j) {
+ gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve);
} else {
- gsl_matrix_set(tmp_mat, i, j, -1*d/d1*s_snp/s_pve);
+ gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve);
}
}
}
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0,
- tmp_mat1);
+ tmp_mat1);
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0,
- VarEnrich_mat);
+ VarEnrich_mat);
- for (size_t i=0; i<n_vc; i++) {
- d=sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
+ for (size_t i = 0; i < n_vc; i++) {
+ d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
gsl_vector_set(se_enrich, i, d);
}
- cout<<"pve = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<gsl_vector_get(pve, i)<<" ";
+ cout << "pve = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << gsl_vector_get(pve, i) << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(pve) = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<gsl_vector_get(se_pve, i)<<" ";
+ cout << "se(pve) = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << gsl_vector_get(se_pve, i) << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"sigma2 per snp = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<gsl_vector_get(sigma2persnp, i)<<" ";
+ cout << "sigma2 per snp = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << gsl_vector_get(sigma2persnp, i) << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(sigma2 per snp) = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<gsl_vector_get(se_sigma2persnp, i)<<" ";
+ cout << "se(sigma2 per snp) = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << gsl_vector_get(se_sigma2persnp, i) << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"enrichment = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<gsl_vector_get(enrich, i)<<" ";
+ cout << "enrichment = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << gsl_vector_get(enrich, i) << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(enrichment) = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<gsl_vector_get(se_enrich, i)<<" ";
+ cout << "se(enrichment) = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << gsl_vector_get(se_enrich, i) << " ";
}
- cout<<endl;
+ cout << endl;
// Save data.
- v_pve.clear(); v_se_pve.clear();
- v_sigma2.clear(); v_se_sigma2.clear();
- v_enrich.clear(); v_se_enrich.clear();
- for (size_t i=0; i<n_vc; i++) {
- d=gsl_vector_get(pve, i);
+ v_pve.clear();
+ v_se_pve.clear();
+ v_sigma2.clear();
+ v_se_sigma2.clear();
+ v_enrich.clear();
+ v_se_enrich.clear();
+ for (size_t i = 0; i < n_vc; i++) {
+ d = gsl_vector_get(pve, i);
v_pve.push_back(d);
- d=gsl_vector_get(se_pve, i);
+ d = gsl_vector_get(se_pve, i);
v_se_pve.push_back(d);
- d=gsl_vector_get(sigma2persnp, i);
+ d = gsl_vector_get(sigma2persnp, i);
v_sigma2.push_back(d);
- d=gsl_vector_get(se_sigma2persnp, i);
+ d = gsl_vector_get(se_sigma2persnp, i);
v_se_sigma2.push_back(d);
- d=gsl_vector_get(enrich, i);
+ d = gsl_vector_get(enrich, i);
v_enrich.push_back(d);
- d=gsl_vector_get(se_enrich, i);
+ d = gsl_vector_get(se_enrich, i);
v_se_enrich.push_back(d);
}
@@ -1345,196 +1493,206 @@ void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
}
// Ks are not scaled.
-void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W,
- const gsl_vector *y) {
- size_t n1=K->size1, n2=K->size2;
- size_t n_vc=n2/n1;
+void VC::CalcVChe(const gsl_matrix *K, const gsl_matrix *W,
+ const gsl_vector *y) {
+ size_t n1 = K->size1, n2 = K->size2;
+ size_t n_vc = n2 / n1;
- double r=(double)n1/(double)(n1 - W->size2);
+ double r = (double)n1 / (double)(n1 - W->size2);
double var_y, var_y_new;
double d, tr, s, v;
vector<double> traceG_new;
// New matrices/vectors.
- gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2);
- gsl_vector *y_scale=gsl_vector_alloc (n1);
- gsl_matrix *Kry=gsl_matrix_alloc (n1, n_vc);
- gsl_matrix *yKrKKry=gsl_matrix_alloc (n_vc, n_vc*(n_vc+1) );
- gsl_vector *KKry=gsl_vector_alloc (n1);
+ gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
+ gsl_vector *y_scale = gsl_vector_alloc(n1);
+ gsl_matrix *Kry = gsl_matrix_alloc(n1, n_vc);
+ gsl_matrix *yKrKKry = gsl_matrix_alloc(n_vc, n_vc * (n_vc + 1));
+ gsl_vector *KKry = gsl_vector_alloc(n1);
// Old matrices/vectors.
- gsl_vector *pve=gsl_vector_alloc (n_vc);
- gsl_vector *se_pve=gsl_vector_alloc (n_vc);
- gsl_vector *q_vec=gsl_vector_alloc (n_vc);
- gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_vector *pve = gsl_vector_alloc(n_vc);
+ gsl_vector *se_pve = gsl_vector_alloc(n_vc);
+ gsl_vector *q_vec = gsl_vector_alloc(n_vc);
+ gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
// Center and scale K by W.
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
gsl_matrix_view Kscale_sub =
- gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+ gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
gsl_matrix_const_view K_sub =
- gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1);
- gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix);
+ gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1);
+ gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix);
- CenterMatrix (&Kscale_sub.matrix, W);
- d=ScaleMatrix (&Kscale_sub.matrix);
+ CenterMatrix(&Kscale_sub.matrix, W);
+ d = ScaleMatrix(&Kscale_sub.matrix);
traceG_new.push_back(d);
}
// Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1).
- gsl_vector_memcpy (y_scale, y);
- CenterVector (y_scale, W);
+ gsl_vector_memcpy(y_scale, y);
+ CenterVector(y_scale, W);
- var_y=VectorVar (y);
- var_y_new=VectorVar (y_scale);
+ var_y = VectorVar(y);
+ var_y_new = VectorVar(y_scale);
- StandardizeVector (y_scale);
+ StandardizeVector(y_scale);
// Compute Kry, which is used for confidence interval; also compute
// q_vec (*n^2).
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
gsl_matrix_const_view Kscale_sub =
- gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
- gsl_vector_view Kry_col=gsl_matrix_column (Kry, i);
+ gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
+ gsl_vector_view Kry_col = gsl_matrix_column(Kry, i);
- gsl_vector_memcpy (&Kry_col.vector, y_scale);
- gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0*r,
- &Kry_col.vector);
+ gsl_vector_memcpy(&Kry_col.vector, y_scale);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0 * r,
+ &Kry_col.vector);
- gsl_blas_ddot (&Kry_col.vector, y_scale, &d);
+ gsl_blas_ddot(&Kry_col.vector, y_scale, &d);
gsl_vector_set(q_vec, i, d);
}
// Compute yKrKKry, which is used later for confidence interval.
- for (size_t i=0; i<n_vc; i++) {
- gsl_vector_const_view Kry_coli=gsl_matrix_const_column (Kry, i);
- for (size_t j=i; j<n_vc; j++) {
- gsl_vector_const_view Kry_colj=gsl_matrix_const_column (Kry, j);
- for (size_t l=0; l<n_vc; l++) {
- gsl_matrix_const_view Kscale_sub =
- gsl_matrix_const_submatrix (K_scale, 0, n1*l, n1, n1);
- gsl_blas_dgemv (CblasNoTrans, 1.0, &Kscale_sub.matrix,
- &Kry_coli.vector, 0.0, KKry);
- gsl_blas_ddot (&Kry_colj.vector, KKry, &d);
- gsl_matrix_set(yKrKKry, i, l*n_vc+j, d);
- if (i!=j) {gsl_matrix_set(yKrKKry, j, l*n_vc+i, d);}
+ for (size_t i = 0; i < n_vc; i++) {
+ gsl_vector_const_view Kry_coli = gsl_matrix_const_column(Kry, i);
+ for (size_t j = i; j < n_vc; j++) {
+ gsl_vector_const_view Kry_colj = gsl_matrix_const_column(Kry, j);
+ for (size_t l = 0; l < n_vc; l++) {
+ gsl_matrix_const_view Kscale_sub =
+ gsl_matrix_const_submatrix(K_scale, 0, n1 * l, n1, n1);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, &Kry_coli.vector,
+ 0.0, KKry);
+ gsl_blas_ddot(&Kry_colj.vector, KKry, &d);
+ gsl_matrix_set(yKrKKry, i, l * n_vc + j, d);
+ if (i != j) {
+ gsl_matrix_set(yKrKKry, j, l * n_vc + i, d);
+ }
+ }
+ gsl_blas_ddot(&Kry_coli.vector, &Kry_colj.vector, &d);
+ gsl_matrix_set(yKrKKry, i, n_vc * n_vc + j, d);
+ if (i != j) {
+ gsl_matrix_set(yKrKKry, j, n_vc * n_vc + i, d);
}
- gsl_blas_ddot (&Kry_coli.vector, &Kry_colj.vector, &d);
- gsl_matrix_set(yKrKKry, i, n_vc*n_vc+j, d);
- if (i!=j) {gsl_matrix_set(yKrKKry, j, n_vc*n_vc+i, d);}
}
}
// Compute Sij (*n^2).
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=i; j<n_vc; j++) {
- tr=0;
- for (size_t l=0; l<n1; l++) {
- gsl_vector_const_view Ki_col=gsl_matrix_const_column (K_scale, i*n1+l);
- gsl_vector_const_view Kj_col=gsl_matrix_const_column (K_scale, j*n1+l);
- gsl_blas_ddot (&Ki_col.vector, &Kj_col.vector, &d);
- tr+=d;
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t j = i; j < n_vc; j++) {
+ tr = 0;
+ for (size_t l = 0; l < n1; l++) {
+ gsl_vector_const_view Ki_col =
+ gsl_matrix_const_column(K_scale, i * n1 + l);
+ gsl_vector_const_view Kj_col =
+ gsl_matrix_const_column(K_scale, j * n1 + l);
+ gsl_blas_ddot(&Ki_col.vector, &Kj_col.vector, &d);
+ tr += d;
}
- tr=tr-r*(double)n1;
- gsl_matrix_set (S_mat, i, j, tr);
- if (i!=j) {gsl_matrix_set (S_mat, j, i, tr);}
+ tr = tr - r * (double)n1;
+ gsl_matrix_set(S_mat, i, j, tr);
+ if (i != j) {
+ gsl_matrix_set(S_mat, j, i, tr);
+ }
}
}
// Compute S^{-1}q.
int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
- LUDecomp (S_mat, pmt, &sig);
- LUInvert (S_mat, pmt, Si_mat);
+ gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
+ LUDecomp(S_mat, pmt, &sig);
+ LUInvert(S_mat, pmt, Si_mat);
// Compute pve (on the transformed scale).
- gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
// Compute q_var (*n^4).
- gsl_matrix_set_zero (qvar_mat);
- s=1;
- for (size_t i=0; i<n_vc; i++) {
- d=gsl_vector_get(pve, i);
- gsl_matrix_view yKrKKry_sub=
- gsl_matrix_submatrix(yKrKKry, 0, i*n_vc, n_vc, n_vc);
- gsl_matrix_memcpy (tmp_mat, &yKrKKry_sub.matrix);
+ gsl_matrix_set_zero(qvar_mat);
+ s = 1;
+ for (size_t i = 0; i < n_vc; i++) {
+ d = gsl_vector_get(pve, i);
+ gsl_matrix_view yKrKKry_sub =
+ gsl_matrix_submatrix(yKrKKry, 0, i * n_vc, n_vc, n_vc);
+ gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix);
gsl_matrix_scale(tmp_mat, d);
- gsl_matrix_add (qvar_mat, tmp_mat);
- s-=d;
+ gsl_matrix_add(qvar_mat, tmp_mat);
+ s -= d;
}
- gsl_matrix_view yKrKKry_sub=gsl_matrix_submatrix(yKrKKry, 0, n_vc*n_vc,
- n_vc, n_vc);
- gsl_matrix_memcpy (tmp_mat, &yKrKKry_sub.matrix);
+ gsl_matrix_view yKrKKry_sub =
+ gsl_matrix_submatrix(yKrKKry, 0, n_vc * n_vc, n_vc, n_vc);
+ gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix);
gsl_matrix_scale(tmp_mat, s);
- gsl_matrix_add (qvar_mat, tmp_mat);
+ gsl_matrix_add(qvar_mat, tmp_mat);
gsl_matrix_scale(qvar_mat, 2.0);
// Compute S^{-1}var_qS^{-1}.
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat,
- 0.0, tmp_mat);
- gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat,
- 0.0, Var_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat, 0.0,
+ tmp_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
+ Var_mat);
// Transform pve back to the original scale and save data.
- v_pve.clear(); v_se_pve.clear();
- v_sigma2.clear(); v_se_sigma2.clear();
-
- s=1.0, v=0, pve_total=0, se_pve_total=0;
- for (size_t i=0; i<n_vc; i++) {
- d=gsl_vector_get (pve, i);
- v_sigma2.push_back(d*var_y_new/traceG_new[i]);
- v_pve.push_back(d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y));
- s-=d;
- pve_total+=d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y);
-
- d=sqrt(gsl_matrix_get (Var_mat, i, i));
- v_se_sigma2.push_back(d*var_y_new/traceG_new[i]);
- v_se_pve.push_back(d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y));
-
- for (size_t j=0; j<n_vc; j++) {
- v+=gsl_matrix_get(Var_mat, i, j);
- se_pve_total+=gsl_matrix_get(Var_mat, i, j)*
- (var_y_new/traceG_new[i])*(v_traceG[i]/var_y)*
- (var_y_new/traceG_new[j])*(v_traceG[j]/var_y);
+ v_pve.clear();
+ v_se_pve.clear();
+ v_sigma2.clear();
+ v_se_sigma2.clear();
+
+ s = 1.0, v = 0, pve_total = 0, se_pve_total = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ d = gsl_vector_get(pve, i);
+ v_sigma2.push_back(d * var_y_new / traceG_new[i]);
+ v_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y));
+ s -= d;
+ pve_total += d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y);
+
+ d = sqrt(gsl_matrix_get(Var_mat, i, i));
+ v_se_sigma2.push_back(d * var_y_new / traceG_new[i]);
+ v_se_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y));
+
+ for (size_t j = 0; j < n_vc; j++) {
+ v += gsl_matrix_get(Var_mat, i, j);
+ se_pve_total += gsl_matrix_get(Var_mat, i, j) *
+ (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y) *
+ (var_y_new / traceG_new[j]) * (v_traceG[j] / var_y);
}
}
- v_sigma2.push_back(s*r*var_y_new);
- v_se_sigma2.push_back(sqrt(v)*r*var_y_new);
- se_pve_total=sqrt(se_pve_total);
+ v_sigma2.push_back(s * r * var_y_new);
+ v_se_sigma2.push_back(sqrt(v) * r * var_y_new);
+ se_pve_total = sqrt(se_pve_total);
- cout<<"sigma2 = ";
- for (size_t i=0; i<n_vc+1; i++) {
- cout<<v_sigma2[i]<<" ";
+ cout << "sigma2 = ";
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ cout << v_sigma2[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(sigma2) = ";
- for (size_t i=0; i<n_vc+1; i++) {
- cout<<v_se_sigma2[i]<<" ";
+ cout << "se(sigma2) = ";
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ cout << v_se_sigma2[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"pve = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_pve[i]<<" ";
+ cout << "pve = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_pve[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(pve) = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_se_pve[i]<<" ";
+ cout << "se(pve) = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_se_pve[i] << " ";
}
- cout<<endl;
+ cout << endl;
- if (n_vc>1) {
- cout<<"total pve = "<<pve_total<<endl;
- cout<<"se(total pve) = "<<se_pve_total<<endl;
+ if (n_vc > 1) {
+ cout << "total pve = " << pve_total << endl;
+ cout << "se(total pve) = " << se_pve_total << endl;
}
gsl_permutation_free(pmt);
@@ -1558,188 +1716,195 @@ void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W,
}
// REML for log(sigma2) based on the AI algorithm.
-void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K,
- const gsl_matrix *W, const gsl_vector *y) {
- size_t n1=K->size1, n2=K->size2;
- size_t n_vc=n2/n1;
- gsl_vector *log_sigma2=gsl_vector_alloc (n_vc+1);
+void VC::CalcVCreml(bool noconstrain, const gsl_matrix *K, const gsl_matrix *W,
+ const gsl_vector *y) {
+ size_t n1 = K->size1, n2 = K->size2;
+ size_t n_vc = n2 / n1;
+ gsl_vector *log_sigma2 = gsl_vector_alloc(n_vc + 1);
double d, s;
// Set up params.
- gsl_matrix *P=gsl_matrix_alloc (n1, n1);
- gsl_vector *Py=gsl_vector_alloc (n1);
- gsl_matrix *KPy_mat=gsl_matrix_alloc (n1, n_vc+1);
- gsl_matrix *PKPy_mat=gsl_matrix_alloc (n1, n_vc+1);
- gsl_vector *dev1=gsl_vector_alloc (n_vc+1);
- gsl_matrix *dev2=gsl_matrix_alloc (n_vc+1, n_vc+1);
- gsl_matrix *Hessian=gsl_matrix_alloc (n_vc+1, n_vc+1);
- VC_PARAM params={K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain};
+ gsl_matrix *P = gsl_matrix_alloc(n1, n1);
+ gsl_vector *Py = gsl_vector_alloc(n1);
+ gsl_matrix *KPy_mat = gsl_matrix_alloc(n1, n_vc + 1);
+ gsl_matrix *PKPy_mat = gsl_matrix_alloc(n1, n_vc + 1);
+ gsl_vector *dev1 = gsl_vector_alloc(n_vc + 1);
+ gsl_matrix *dev2 = gsl_matrix_alloc(n_vc + 1, n_vc + 1);
+ gsl_matrix *Hessian = gsl_matrix_alloc(n_vc + 1, n_vc + 1);
+ VC_PARAM params = {K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain};
// Initialize sigma2/log_sigma2.
- CalcVChe (K, W, y);
+ CalcVChe(K, W, y);
- gsl_blas_ddot (y, y, &s);
- s/=(double)n1;
- for (size_t i=0; i<n_vc+1; i++) {
+ gsl_blas_ddot(y, y, &s);
+ s /= (double)n1;
+ for (size_t i = 0; i < n_vc + 1; i++) {
if (noconstrain) {
- d=v_sigma2[i];
+ d = v_sigma2[i];
} else {
- if (v_sigma2[i]<=0) {d=log(0.1);} else {d=log(v_sigma2[i]);}
+ if (v_sigma2[i] <= 0) {
+ d = log(0.1);
+ } else {
+ d = log(v_sigma2[i]);
+ }
}
- gsl_vector_set (log_sigma2, i, d);
+ gsl_vector_set(log_sigma2, i, d);
}
- cout<<"iteration "<<0<<endl;
- cout<<"sigma2 = ";
- for (size_t i=0; i<n_vc+1; i++) {
+ cout << "iteration " << 0 << endl;
+ cout << "sigma2 = ";
+ for (size_t i = 0; i < n_vc + 1; i++) {
if (noconstrain) {
- cout<<gsl_vector_get(log_sigma2, i)<<" ";
+ cout << gsl_vector_get(log_sigma2, i) << " ";
} else {
- cout<<exp(gsl_vector_get(log_sigma2, i))<<" ";
+ cout << exp(gsl_vector_get(log_sigma2, i)) << " ";
}
}
- cout<<endl;
+ cout << endl;
// Set up fdf.
gsl_multiroot_function_fdf FDF;
- FDF.n=n_vc+1;
- FDF.params=&params;
- FDF.f=&LogRL_dev1;
- FDF.df=&LogRL_dev2;
- FDF.fdf=&LogRL_dev12;
+ FDF.n = n_vc + 1;
+ FDF.params = &params;
+ FDF.f = &LogRL_dev1;
+ FDF.df = &LogRL_dev2;
+ FDF.fdf = &LogRL_dev12;
// Set up solver.
int status;
- int iter=0, max_iter=100;
+ int iter = 0, max_iter = 100;
const gsl_multiroot_fdfsolver_type *T_fdf;
gsl_multiroot_fdfsolver *s_fdf;
- T_fdf=gsl_multiroot_fdfsolver_hybridsj;
- s_fdf=gsl_multiroot_fdfsolver_alloc (T_fdf, n_vc+1);
+ T_fdf = gsl_multiroot_fdfsolver_hybridsj;
+ s_fdf = gsl_multiroot_fdfsolver_alloc(T_fdf, n_vc + 1);
- gsl_multiroot_fdfsolver_set (s_fdf, &FDF, log_sigma2);
+ gsl_multiroot_fdfsolver_set(s_fdf, &FDF, log_sigma2);
do {
iter++;
- status=gsl_multiroot_fdfsolver_iterate (s_fdf);
+ status = gsl_multiroot_fdfsolver_iterate(s_fdf);
- if (status) break;
+ if (status)
+ break;
- cout<<"iteration "<<iter<<endl;
- cout<<"sigma2 = ";
- for (size_t i=0; i<n_vc+1; i++) {
+ cout << "iteration " << iter << endl;
+ cout << "sigma2 = ";
+ for (size_t i = 0; i < n_vc + 1; i++) {
if (noconstrain) {
- cout<<gsl_vector_get(s_fdf->x, i)<<" ";
+ cout << gsl_vector_get(s_fdf->x, i) << " ";
} else {
- cout<<exp(gsl_vector_get(s_fdf->x, i))<<" ";
+ cout << exp(gsl_vector_get(s_fdf->x, i)) << " ";
}
}
- cout<<endl;
- status=gsl_multiroot_test_residual (s_fdf->f, 1e-3);
- }
- while (status==GSL_CONTINUE && iter<max_iter);
+ cout << endl;
+ status = gsl_multiroot_test_residual(s_fdf->f, 1e-3);
+ } while (status == GSL_CONTINUE && iter < max_iter);
// Obtain Hessian and Hessian inverse.
- int sig=LogRL_dev12 (s_fdf->x, &params, dev1, dev2);
+ int sig = LogRL_dev12(s_fdf->x, &params, dev1, dev2);
- gsl_permutation * pmt=gsl_permutation_alloc (n_vc+1);
- LUDecomp (dev2, pmt, &sig);
- LUInvert (dev2, pmt, Hessian);
+ gsl_permutation *pmt = gsl_permutation_alloc(n_vc + 1);
+ LUDecomp(dev2, pmt, &sig);
+ LUInvert(dev2, pmt, Hessian);
gsl_permutation_free(pmt);
// Save sigma2 and se_sigma2.
- v_sigma2.clear(); v_se_sigma2.clear();
- for (size_t i=0; i<n_vc+1; i++) {
+ v_sigma2.clear();
+ v_se_sigma2.clear();
+ for (size_t i = 0; i < n_vc + 1; i++) {
if (noconstrain) {
- d=gsl_vector_get(s_fdf->x, i);
+ d = gsl_vector_get(s_fdf->x, i);
} else {
- d=exp(gsl_vector_get(s_fdf->x, i));
+ d = exp(gsl_vector_get(s_fdf->x, i));
}
v_sigma2.push_back(d);
if (noconstrain) {
- d=-1.0*gsl_matrix_get(Hessian, i, i);
+ d = -1.0 * gsl_matrix_get(Hessian, i, i);
} else {
- d=-1.0*d*d*gsl_matrix_get(Hessian, i, i);
+ d = -1.0 * d * d * gsl_matrix_get(Hessian, i, i);
}
v_se_sigma2.push_back(sqrt(d));
}
- s=0;
- for (size_t i=0; i<n_vc; i++) {
- s+=v_traceG[i]*v_sigma2[i];
+ s = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ s += v_traceG[i] * v_sigma2[i];
}
- s+=v_sigma2[n_vc];
+ s += v_sigma2[n_vc];
// Compute pve.
- v_pve.clear(); pve_total=0;
- for (size_t i=0; i<n_vc; i++) {
- d=v_traceG[i]*v_sigma2[i]/s;
+ v_pve.clear();
+ pve_total = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ d = v_traceG[i] * v_sigma2[i] / s;
v_pve.push_back(d);
- pve_total+=d;
+ pve_total += d;
}
// Compute se_pve; k=n_vc+1: total.
double d1, d2;
- v_se_pve.clear(); se_pve_total=0;
- for (size_t k=0; k<n_vc+1; k++) {
- d=0;
- for (size_t i=0; i<n_vc+1; i++) {
+ v_se_pve.clear();
+ se_pve_total = 0;
+ for (size_t k = 0; k < n_vc + 1; k++) {
+ d = 0;
+ for (size_t i = 0; i < n_vc + 1; i++) {
if (noconstrain) {
- d1=gsl_vector_get(s_fdf->x, i);
- d1=1;
+ d1 = gsl_vector_get(s_fdf->x, i);
+ d1 = 1;
} else {
- d1=exp(gsl_vector_get(s_fdf->x, i));
+ d1 = exp(gsl_vector_get(s_fdf->x, i));
}
- if (k<n_vc) {
- if (i==k) {
- d1*=v_traceG[k]*(s-v_sigma2[k]*v_traceG[k])/(s*s);
- } else if (i==n_vc) {
- d1*=-1*v_traceG[k]*v_sigma2[k]/(s*s);
- } else {
- d1*=-1*v_traceG[i]*v_traceG[k]*v_sigma2[k]/(s*s);
- }
+ if (k < n_vc) {
+ if (i == k) {
+ d1 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s);
+ } else if (i == n_vc) {
+ d1 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s);
+ } else {
+ d1 *= -1 * v_traceG[i] * v_traceG[k] * v_sigma2[k] / (s * s);
+ }
} else {
- if (i==k) {
- d1*=-1*(s-v_sigma2[n_vc])/(s*s);
- } else {
- d1*=v_traceG[i]*v_sigma2[n_vc]/(s*s);
- }
- }
-
- for (size_t j=0; j<n_vc+1; j++) {
- if (noconstrain) {
- d2=gsl_vector_get(s_fdf->x, j);
- d2=1;
- } else {
- d2=exp(gsl_vector_get(s_fdf->x, j));
- }
-
- if (k<n_vc) {
- if (j==k) {
- d2*=v_traceG[k]*(s-v_sigma2[k]*v_traceG[k])/(s*s);
- } else if (j==n_vc) {
- d2*=-1*v_traceG[k]*v_sigma2[k]/(s*s);
- } else {
- d2*=-1*v_traceG[j]*v_traceG[k]*v_sigma2[k]/(s*s);
- }
- } else {
- if (j==k) {
- d2*=-1*(s-v_sigma2[n_vc])/(s*s);
- } else {
- d2*=v_traceG[j]*v_sigma2[n_vc]/(s*s);
- }
- }
-
- d+=-1.0*d1*d2*gsl_matrix_get(Hessian, i, j);
- }
- }
-
- if (k<n_vc) {
- v_se_pve.push_back(sqrt(d) );
+ if (i == k) {
+ d1 *= -1 * (s - v_sigma2[n_vc]) / (s * s);
+ } else {
+ d1 *= v_traceG[i] * v_sigma2[n_vc] / (s * s);
+ }
+ }
+
+ for (size_t j = 0; j < n_vc + 1; j++) {
+ if (noconstrain) {
+ d2 = gsl_vector_get(s_fdf->x, j);
+ d2 = 1;
+ } else {
+ d2 = exp(gsl_vector_get(s_fdf->x, j));
+ }
+
+ if (k < n_vc) {
+ if (j == k) {
+ d2 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s);
+ } else if (j == n_vc) {
+ d2 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s);
+ } else {
+ d2 *= -1 * v_traceG[j] * v_traceG[k] * v_sigma2[k] / (s * s);
+ }
+ } else {
+ if (j == k) {
+ d2 *= -1 * (s - v_sigma2[n_vc]) / (s * s);
+ } else {
+ d2 *= v_traceG[j] * v_sigma2[n_vc] / (s * s);
+ }
+ }
+
+ d += -1.0 * d1 * d2 * gsl_matrix_get(Hessian, i, j);
+ }
+ }
+
+ if (k < n_vc) {
+ v_se_pve.push_back(sqrt(d));
} else {
- se_pve_total=sqrt(d);
+ se_pve_total = sqrt(d);
}
}
@@ -1758,252 +1923,265 @@ void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K,
}
// Ks are not scaled.
-void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W,
- const gsl_vector *y) {
- size_t n1=K->size1, n2=K->size2;
- size_t n_vc=n2/n1;
+void VC::CalcVCacl(const gsl_matrix *K, const gsl_matrix *W,
+ const gsl_vector *y) {
+ size_t n1 = K->size1, n2 = K->size2;
+ size_t n_vc = n2 / n1;
double d, y2_sum, tau_inv, se_tau_inv;
// New matrices/vectors.
- gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2);
- gsl_vector *y_scale=gsl_vector_alloc (n1);
- gsl_vector *y2=gsl_vector_alloc (n1);
- gsl_vector *n1_vec=gsl_vector_alloc (n1);
- gsl_matrix *Ay=gsl_matrix_alloc (n1, n_vc);
- gsl_matrix *K2=gsl_matrix_alloc (n1, n_vc*n_vc);
- gsl_matrix *K_tmp=gsl_matrix_alloc (n1, n1);
- gsl_matrix *V_mat=gsl_matrix_alloc (n1, n1);
+ gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
+ gsl_vector *y_scale = gsl_vector_alloc(n1);
+ gsl_vector *y2 = gsl_vector_alloc(n1);
+ gsl_vector *n1_vec = gsl_vector_alloc(n1);
+ gsl_matrix *Ay = gsl_matrix_alloc(n1, n_vc);
+ gsl_matrix *K2 = gsl_matrix_alloc(n1, n_vc * n_vc);
+ gsl_matrix *K_tmp = gsl_matrix_alloc(n1, n1);
+ gsl_matrix *V_mat = gsl_matrix_alloc(n1, n1);
// Old matrices/vectors.
- gsl_vector *pve=gsl_vector_alloc (n_vc);
- gsl_vector *se_pve=gsl_vector_alloc (n_vc);
- gsl_vector *q_vec=gsl_vector_alloc (n_vc);
- gsl_matrix *S1=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *S2=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *J_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_vector *pve = gsl_vector_alloc(n_vc);
+ gsl_vector *se_pve = gsl_vector_alloc(n_vc);
+ gsl_vector *q_vec = gsl_vector_alloc(n_vc);
+ gsl_matrix *S1 = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *S2 = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *J_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+ gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
// Center and scale K by W, and standardize K further so that all
// diagonal elements are 1
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
gsl_matrix_view Kscale_sub =
- gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+ gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
gsl_matrix_const_view K_sub =
- gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1);
- gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix);
+ gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1);
+ gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix);
- CenterMatrix (&Kscale_sub.matrix, W);
- StandardizeMatrix (&Kscale_sub.matrix);
+ CenterMatrix(&Kscale_sub.matrix, W);
+ StandardizeMatrix(&Kscale_sub.matrix);
}
// Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1)
- gsl_vector_memcpy (y_scale, y);
- CenterVector (y_scale, W);
+ gsl_vector_memcpy(y_scale, y);
+ CenterVector(y_scale, W);
// Compute y^2 and sum(y^2), which is also the variance of y*n1.
- gsl_vector_memcpy (y2, y_scale);
- gsl_vector_mul (y2, y_scale);
+ gsl_vector_memcpy(y2, y_scale);
+ gsl_vector_mul(y2, y_scale);
- y2_sum=0;
- for (size_t i=0; i<y2->size; i++) {
- y2_sum+=gsl_vector_get(y2, i);
+ y2_sum = 0;
+ for (size_t i = 0; i < y2->size; i++) {
+ y2_sum += gsl_vector_get(y2, i);
}
// Compute the n_vc size q vector.
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
gsl_matrix_const_view Kscale_sub =
- gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+ gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
- gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale,
- 0.0, n1_vec);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, n1_vec);
- gsl_blas_ddot (n1_vec, y_scale, &d);
- gsl_vector_set(q_vec, i, d-y2_sum);
+ gsl_blas_ddot(n1_vec, y_scale, &d);
+ gsl_vector_set(q_vec, i, d - y2_sum);
}
// Compute the n_vc by n_vc S1 and S2 matrix (and eventually
// S=S1-\tau^{-1}S2).
- for (size_t i=0; i<n_vc; i++) {
+ for (size_t i = 0; i < n_vc; i++) {
gsl_matrix_const_view Kscale_sub1 =
- gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+ gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1);
- for (size_t j=i; j<n_vc; j++) {
+ for (size_t j = i; j < n_vc; j++) {
gsl_matrix_const_view Kscale_sub2 =
- gsl_matrix_const_submatrix (K_scale, 0, n1*j, n1, n1);
+ gsl_matrix_const_submatrix(K_scale, 0, n1 * j, n1, n1);
- gsl_matrix_memcpy (K_tmp, &Kscale_sub1.matrix);
- gsl_matrix_mul_elements (K_tmp, &Kscale_sub2.matrix);
+ gsl_matrix_memcpy(K_tmp, &Kscale_sub1.matrix);
+ gsl_matrix_mul_elements(K_tmp, &Kscale_sub2.matrix);
gsl_vector_set_zero(n1_vec);
- for (size_t t=0; t<K_tmp->size1; t++) {
- gsl_vector_view Ktmp_col=gsl_matrix_column (K_tmp, t);
- gsl_vector_add (n1_vec, &Ktmp_col.vector);
+ for (size_t t = 0; t < K_tmp->size1; t++) {
+ gsl_vector_view Ktmp_col = gsl_matrix_column(K_tmp, t);
+ gsl_vector_add(n1_vec, &Ktmp_col.vector);
}
- gsl_vector_add_constant (n1_vec, -1.0);
+ gsl_vector_add_constant(n1_vec, -1.0);
// Compute S1.
- gsl_blas_ddot (n1_vec, y2, &d);
- gsl_matrix_set (S1, i, j, 2*d);
- if (i!=j) {gsl_matrix_set (S1, j, i, 2*d);}
+ gsl_blas_ddot(n1_vec, y2, &d);
+ gsl_matrix_set(S1, i, j, 2 * d);
+ if (i != j) {
+ gsl_matrix_set(S1, j, i, 2 * d);
+ }
// Compute S2.
- d=0;
- for (size_t t=0; t<n1_vec->size; t++) {
- d+=gsl_vector_get (n1_vec, t);
+ d = 0;
+ for (size_t t = 0; t < n1_vec->size; t++) {
+ d += gsl_vector_get(n1_vec, t);
+ }
+ gsl_matrix_set(S2, i, j, d);
+ if (i != j) {
+ gsl_matrix_set(S2, j, i, d);
}
- gsl_matrix_set (S2, i, j, d);
- if (i!=j) {gsl_matrix_set (S2, j, i, d);}
// Save information to compute J.
- gsl_vector_view K2col1=gsl_matrix_column (K2, n_vc*i+j);
- gsl_vector_view K2col2=gsl_matrix_column (K2, n_vc*j+i);
+ gsl_vector_view K2col1 = gsl_matrix_column(K2, n_vc * i + j);
+ gsl_vector_view K2col2 = gsl_matrix_column(K2, n_vc * j + i);
gsl_vector_memcpy(&K2col1.vector, n1_vec);
- if (i!=j) {gsl_vector_memcpy(&K2col2.vector, n1_vec);}
+ if (i != j) {
+ gsl_vector_memcpy(&K2col2.vector, n1_vec);
+ }
}
}
// Iterate to solve tau and h's.
- size_t it=0;
- double s=1;
- while (abs(s)>1e-3 && it<100) {
+ size_t it = 0;
+ double s = 1;
+ while (abs(s) > 1e-3 && it < 100) {
// Update tau_inv.
- gsl_blas_ddot (q_vec, pve, &d);
- if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;}
- tau_inv=y2_sum/(double)n1-d/((double)n1*((double)n1-1));
- if (it>0) {s/=tau_inv;}
+ gsl_blas_ddot(q_vec, pve, &d);
+ if (it > 0) {
+ s = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1)) - tau_inv;
+ }
+ tau_inv = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1));
+ if (it > 0) {
+ s /= tau_inv;
+ }
// Update S.
- gsl_matrix_memcpy (S_mat, S2);
- gsl_matrix_scale (S_mat, -1*tau_inv);
- gsl_matrix_add (S_mat, S1);
+ gsl_matrix_memcpy(S_mat, S2);
+ gsl_matrix_scale(S_mat, -1 * tau_inv);
+ gsl_matrix_add(S_mat, S1);
// Update h=S^{-1}q.
int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
- LUDecomp (S_mat, pmt, &sig);
- LUInvert (S_mat, pmt, Si_mat);
- gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+ gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
+ LUDecomp(S_mat, pmt, &sig);
+ LUInvert(S_mat, pmt, Si_mat);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
it++;
}
// Compute V matrix and A matrix (K_scale is destroyed, so need to
// compute V first).
- gsl_matrix_set_zero (V_mat);
- for (size_t i=0; i<n_vc; i++) {
+ gsl_matrix_set_zero(V_mat);
+ for (size_t i = 0; i < n_vc; i++) {
gsl_matrix_view Kscale_sub =
- gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+ gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1);
// Compute V.
- gsl_matrix_memcpy (K_tmp, &Kscale_sub.matrix);
- gsl_matrix_scale (K_tmp, gsl_vector_get(pve, i));
- gsl_matrix_add (V_mat, K_tmp);
+ gsl_matrix_memcpy(K_tmp, &Kscale_sub.matrix);
+ gsl_matrix_scale(K_tmp, gsl_vector_get(pve, i));
+ gsl_matrix_add(V_mat, K_tmp);
// Compute A; the corresponding Kscale is destroyed.
gsl_matrix_const_view K2_sub =
- gsl_matrix_const_submatrix (K2, 0, n_vc*i, n1, n_vc);
- gsl_blas_dgemv (CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec);
+ gsl_matrix_const_submatrix(K2, 0, n_vc * i, n1, n_vc);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec);
- for (size_t t=0; t<n1; t++) {
- gsl_matrix_set (K_scale, t, n1*i+t, gsl_vector_get(n1_vec, t) );
+ for (size_t t = 0; t < n1; t++) {
+ gsl_matrix_set(K_scale, t, n1 * i + t, gsl_vector_get(n1_vec, t));
}
// Compute Ay.
- gsl_vector_view Ay_col=gsl_matrix_column (Ay, i);
- gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale,
- 0.0, &Ay_col.vector);
+ gsl_vector_view Ay_col = gsl_matrix_column(Ay, i);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0,
+ &Ay_col.vector);
}
- gsl_matrix_scale (V_mat, tau_inv);
+ gsl_matrix_scale(V_mat, tau_inv);
// Compute J matrix.
- for (size_t i=0; i<n_vc; i++) {
- gsl_vector_view Ay_col1=gsl_matrix_column (Ay, i);
+ for (size_t i = 0; i < n_vc; i++) {
+ gsl_vector_view Ay_col1 = gsl_matrix_column(Ay, i);
gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, &Ay_col1.vector, 0.0, n1_vec);
- for (size_t j=i; j<n_vc; j++) {
- gsl_vector_view Ay_col2=gsl_matrix_column (Ay, j);
+ for (size_t j = i; j < n_vc; j++) {
+ gsl_vector_view Ay_col2 = gsl_matrix_column(Ay, j);
- gsl_blas_ddot (&Ay_col2.vector, n1_vec, &d);
- gsl_matrix_set (J_mat, i, j, 2.0*d);
- if (i!=j) {gsl_matrix_set (J_mat, j, i, 2.0*d);}
+ gsl_blas_ddot(&Ay_col2.vector, n1_vec, &d);
+ gsl_matrix_set(J_mat, i, j, 2.0 * d);
+ if (i != j) {
+ gsl_matrix_set(J_mat, j, i, 2.0 * d);
+ }
}
}
// Compute H^{-1}JH^{-1} as V(\hat h), where H=S2*tau_inv; this is
// stored in Var_mat.
- gsl_matrix_memcpy (S_mat, S2);
- gsl_matrix_scale (S_mat, tau_inv);
+ gsl_matrix_memcpy(S_mat, S2);
+ gsl_matrix_scale(S_mat, tau_inv);
- LUDecomp (S_mat, pmt, &sig);
- LUInvert (S_mat, pmt, Si_mat);
+ LUDecomp(S_mat, pmt, &sig);
+ LUInvert(S_mat, pmt, Si_mat);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, J_mat, 0.0, S_mat);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, S_mat, Si_mat, 0.0, Var_mat);
// Compute variance for tau_inv.
gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, y_scale, 0.0, n1_vec);
- gsl_blas_ddot (y_scale, n1_vec, &d);
- se_tau_inv=sqrt(2*d)/(double)n1;
+ gsl_blas_ddot(y_scale, n1_vec, &d);
+ se_tau_inv = sqrt(2 * d) / (double)n1;
// Transform pve back to the original scale and save data.
- v_pve.clear(); v_se_pve.clear();
- v_sigma2.clear(); v_se_sigma2.clear();
+ v_pve.clear();
+ v_se_pve.clear();
+ v_sigma2.clear();
+ v_se_sigma2.clear();
- pve_total=0, se_pve_total=0;
- for (size_t i=0; i<n_vc; i++) {
- d=gsl_vector_get (pve, i);
- pve_total+=d;
+ pve_total = 0, se_pve_total = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ d = gsl_vector_get(pve, i);
+ pve_total += d;
v_pve.push_back(d);
- v_sigma2.push_back(d*tau_inv/v_traceG[i] );
+ v_sigma2.push_back(d * tau_inv / v_traceG[i]);
- d=sqrt(gsl_matrix_get (Var_mat, i, i));
+ d = sqrt(gsl_matrix_get(Var_mat, i, i));
v_se_pve.push_back(d);
- v_se_sigma2.push_back(d*tau_inv/v_traceG[i]);
+ v_se_sigma2.push_back(d * tau_inv / v_traceG[i]);
- for (size_t j=0; j<n_vc; j++) {
- se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+ for (size_t j = 0; j < n_vc; j++) {
+ se_pve_total += gsl_matrix_get(Var_mat, i, j);
}
}
- v_sigma2.push_back( (1-pve_total)*tau_inv );
- v_se_sigma2.push_back(sqrt(se_pve_total)*tau_inv );
- se_pve_total=sqrt(se_pve_total);
+ v_sigma2.push_back((1 - pve_total) * tau_inv);
+ v_se_sigma2.push_back(sqrt(se_pve_total) * tau_inv);
+ se_pve_total = sqrt(se_pve_total);
- cout<<"sigma2 = ";
- for (size_t i=0; i<n_vc+1; i++) {
- cout<<v_sigma2[i]<<" ";
+ cout << "sigma2 = ";
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ cout << v_sigma2[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(sigma2) = ";
- for (size_t i=0; i<n_vc+1; i++) {
- cout<<v_se_sigma2[i]<<" ";
+ cout << "se(sigma2) = ";
+ for (size_t i = 0; i < n_vc + 1; i++) {
+ cout << v_se_sigma2[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"pve = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_pve[i]<<" ";
+ cout << "pve = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_pve[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(pve) = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_se_pve[i]<<" ";
+ cout << "se(pve) = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_se_pve[i] << " ";
}
- cout<<endl;
+ cout << endl;
- if (n_vc>1) {
- cout<<"total pve = "<<pve_total<<endl;
- cout<<"se(total pve) = "<<se_pve_total<<endl;
+ if (n_vc > 1) {
+ cout << "total pve = " << pve_total << endl;
+ cout << "se(total pve) = " << se_pve_total << endl;
}
gsl_permutation_free(pmt);
@@ -2031,234 +2209,248 @@ void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W,
}
// Read bimbam mean genotype file and compute XWz.
-bool BimbamXwz (const string &file_geno, const int display_pace,
- vector<int> &indicator_idv, vector<int> &indicator_snp,
- const vector<size_t> &vec_cat, const gsl_vector *w,
- const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- size_t n_miss;
- double d, geno_mean, geno_var;
-
- size_t ni_test=XWz->size1;
- gsl_vector *geno=gsl_vector_alloc (ni_test);
- gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
- gsl_vector *wz=gsl_vector_alloc (w->size);
- gsl_vector_memcpy (wz, z);
- gsl_vector_mul(wz, w);
-
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
- ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- geno_mean=0.0; n_miss=0; geno_var=0.0;
- gsl_vector_set_all(geno_miss, 0);
-
- size_t j=0;
- for (size_t i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0) {continue;}
- ch_ptr=strtok (NULL, " , \t");
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(geno_miss, i, 0);
- n_miss++;
- } else {
- d=atof(ch_ptr);
- gsl_vector_set (geno, j, d);
- gsl_vector_set (geno_miss, j, 1);
- geno_mean+=d;
- geno_var+=d*d;
- }
- j++;
- }
-
- geno_mean/=(double)(ni_test-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_test;
- geno_var-=geno_mean*geno_mean;
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (geno_miss, i)==0) {
- gsl_vector_set(geno, i, geno_mean);
- }
- }
-
- gsl_vector_add_constant (geno, -1.0*geno_mean);
-
- gsl_vector_view XWz_col=
- gsl_matrix_column(XWz, vec_cat[ns_test]);
- d=gsl_vector_get (wz, ns_test);
- gsl_blas_daxpy (d/sqrt(geno_var), geno, &XWz_col.vector);
-
- ns_test++;
- }
-
- cout<<endl;
-
- gsl_vector_free (geno);
- gsl_vector_free (geno_miss);
- gsl_vector_free (wz);
-
- infile.close();
- infile.clear();
-
- return true;
+bool BimbamXwz(const string &file_geno, const int display_pace,
+ vector<int> &indicator_idv, vector<int> &indicator_snp,
+ const vector<size_t> &vec_cat, const gsl_vector *w,
+ const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test = XWz->size1;
+ gsl_vector *geno = gsl_vector_alloc(ni_test);
+ gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
+ gsl_vector *wz = gsl_vector_alloc(w->size);
+ gsl_vector_memcpy(wz, z);
+ gsl_vector_mul(wz, w);
+
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ geno_mean = 0.0;
+ n_miss = 0;
+ geno_var = 0.0;
+ gsl_vector_set_all(geno_miss, 0);
+
+ size_t j = 0;
+ for (size_t i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ ch_ptr = strtok(NULL, " , \t");
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(geno_miss, i, 0);
+ n_miss++;
+ } else {
+ d = atof(ch_ptr);
+ gsl_vector_set(geno, j, d);
+ gsl_vector_set(geno_miss, j, 1);
+ geno_mean += d;
+ geno_var += d * d;
+ }
+ j++;
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_test;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(geno_miss, i) == 0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
+
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+ gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]);
+ d = gsl_vector_get(wz, ns_test);
+ gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector);
+
+ ns_test++;
+ }
+
+ cout << endl;
+
+ gsl_vector_free(geno);
+ gsl_vector_free(geno_miss);
+ gsl_vector_free(wz);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read PLINK bed file and compute XWz.
-bool PlinkXwz (const string &file_bed, const int display_pace,
- vector<int> &indicator_idv, vector<int> &indicator_snp,
- const vector<size_t> &vec_cat, const gsl_vector *w,
- const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return false;
- }
-
- char ch[1];
- bitset<8> b;
-
- size_t n_miss, ci_total, ci_test;
- double d, geno_mean, geno_var;
-
- size_t ni_test=XWz->size1;
- size_t ni_total=indicator_idv.size();
- gsl_vector *geno=gsl_vector_alloc (ni_test);
- gsl_vector *wz=gsl_vector_alloc (w->size);
- gsl_vector_memcpy (wz, z);
- gsl_vector_mul(wz, w);
-
- int n_bit;
-
- // Calculate n_bit and c, the number of bit for each snp.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
- ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && ci_total==ni_total) {
- break;
- }
- if (indicator_idv[ci_total]==0) {
- ci_total++;
- continue;
- }
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(geno, ci_test, 2.0);
- geno_mean+=2.0; geno_var+=4.0;
- }
- else {
- gsl_vector_set(geno, ci_test, 1.0);
- geno_mean+=1.0; geno_var+=1.0;
- }
- }
- else {
- if (b[2*j+1]==1) {
- gsl_vector_set(geno, ci_test, 0.0);
- }
- else {
- gsl_vector_set(geno, ci_test, -9.0);
- n_miss++;
- }
- }
-
- ci_test++;
- ci_total++;
- }
- }
-
- geno_mean/=(double)(ni_test-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_test;
- geno_var-=geno_mean*geno_mean;
-
- for (size_t i=0; i<ni_test; ++i) {
- d=gsl_vector_get(geno,i);
- if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
- }
-
- gsl_vector_add_constant (geno, -1.0*geno_mean);
-
- gsl_vector_view XWz_col=
- gsl_matrix_column(XWz, vec_cat[ns_test]);
- d=gsl_vector_get (wz, ns_test);
- gsl_blas_daxpy (d/sqrt(geno_var), geno, &XWz_col.vector);
-
- ns_test++;
- }
- cout<<endl;
-
- gsl_vector_free (geno);
- gsl_vector_free (wz);
-
- infile.close();
- infile.clear();
-
- return true;
+bool PlinkXwz(const string &file_bed, const int display_pace,
+ vector<int> &indicator_idv, vector<int> &indicator_snp,
+ const vector<size_t> &vec_cat, const gsl_vector *w,
+ const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) {
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return false;
+ }
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t n_miss, ci_total, ci_test;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test = XWz->size1;
+ size_t ni_total = indicator_idv.size();
+ gsl_vector *geno = gsl_vector_alloc(ni_test);
+ gsl_vector *wz = gsl_vector_alloc(w->size);
+ gsl_vector_memcpy(wz, z);
+ gsl_vector_mul(wz, w);
+
+ int n_bit;
+
+ // Calculate n_bit and c, the number of bit for each snp.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ geno_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ geno_var = 0.0;
+ ci_test = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0.
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(geno, ci_test, 2.0);
+ geno_mean += 2.0;
+ geno_var += 4.0;
+ } else {
+ gsl_vector_set(geno, ci_test, 1.0);
+ geno_mean += 1.0;
+ geno_var += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(geno, ci_test, 0.0);
+ } else {
+ gsl_vector_set(geno, ci_test, -9.0);
+ n_miss++;
+ }
+ }
+
+ ci_test++;
+ ci_total++;
+ }
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_test;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ d = gsl_vector_get(geno, i);
+ if (d == -9.0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
+
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+ gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]);
+ d = gsl_vector_get(wz, ns_test);
+ gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector);
+
+ ns_test++;
+ }
+ cout << endl;
+
+ gsl_vector_free(geno);
+ gsl_vector_free(wz);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read multiple genotype files and compute XWz.
-bool MFILEXwz (const size_t mfile_mode, const string &file_mfile,
- const int display_pace, vector<int> &indicator_idv,
- vector<vector<int> > &mindicator_snp,
- const vector<size_t> &vec_cat, const gsl_vector *w,
- const gsl_vector *z, gsl_matrix *XWz) {
+bool MFILEXwz(const size_t mfile_mode, const string &file_mfile,
+ const int display_pace, vector<int> &indicator_idv,
+ vector<vector<int>> &mindicator_snp,
+ const vector<size_t> &vec_cat, const gsl_vector *w,
+ const gsl_vector *z, gsl_matrix *XWz) {
gsl_matrix_set_zero(XWz);
- igzstream infile (file_mfile.c_str(), igzstream::in);
+ igzstream infile(file_mfile.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open mfile file: "<<file_mfile<<endl;
+ cout << "error! fail to open mfile file: " << file_mfile << endl;
return false;
}
string file_name;
- size_t l=0, ns_test=0;
+ size_t l = 0, ns_test = 0;
while (!safeGetline(infile, file_name).eof()) {
- if (mfile_mode==1) {
- file_name+=".bed";
- PlinkXwz (file_name, display_pace, indicator_idv, mindicator_snp[l],
- vec_cat, w, z, ns_test, XWz);
+ if (mfile_mode == 1) {
+ file_name += ".bed";
+ PlinkXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
+ vec_cat, w, z, ns_test, XWz);
} else {
- BimbamXwz (file_name, display_pace, indicator_idv, mindicator_snp[l],
- vec_cat, w, z, ns_test, XWz);
+ BimbamXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
+ vec_cat, w, z, ns_test, XWz);
}
l++;
@@ -2271,228 +2463,241 @@ bool MFILEXwz (const size_t mfile_mode, const string &file_mfile,
}
// Read bimbam mean genotype file and compute X_i^TX_jWz.
-bool BimbamXtXwz (const string &file_geno, const int display_pace,
- vector<int> &indicator_idv, vector<int> &indicator_snp,
- const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
- igzstream infile (file_geno.c_str(), igzstream::in);
- if (!infile) {
- cout<<"error reading genotype file:"<<file_geno<<endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- size_t n_miss;
- double d, geno_mean, geno_var;
-
- size_t ni_test=XWz->size1;
- gsl_vector *geno=gsl_vector_alloc (ni_test);
- gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
-
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {
- ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);
- }
- if (indicator_snp[t]==0) {continue;}
-
- ch_ptr=strtok ((char *)line.c_str(), " , \t");
- ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
- geno_mean=0.0; n_miss=0; geno_var=0.0;
- gsl_vector_set_all(geno_miss, 0);
-
- size_t j=0;
- for (size_t i=0; i<indicator_idv.size(); ++i) {
- if (indicator_idv[i]==0) {continue;}
- ch_ptr=strtok (NULL, " , \t");
- if (strcmp(ch_ptr, "NA")==0) {
- gsl_vector_set(geno_miss, i, 0);
- n_miss++;
- }
- else {
- d=atof(ch_ptr);
- gsl_vector_set (geno, j, d);
- gsl_vector_set (geno_miss, j, 1);
- geno_mean+=d;
- geno_var+=d*d;
- }
- j++;
- }
-
- geno_mean/=(double)(ni_test-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_test;
- geno_var-=geno_mean*geno_mean;
-
- for (size_t i=0; i<ni_test; ++i) {
- if (gsl_vector_get (geno_miss, i)==0) {
- gsl_vector_set(geno, i, geno_mean);
- }
- }
-
- gsl_vector_add_constant (geno, -1.0*geno_mean);
-
- for (size_t i=0; i<XWz->size2; i++) {
- gsl_vector_const_view XWz_col=
- gsl_matrix_const_column(XWz, i);
- gsl_blas_ddot (geno, &XWz_col.vector, &d);
- gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var));
- }
-
- ns_test++;
- }
-
- cout<<endl;
-
- gsl_vector_free (geno);
- gsl_vector_free (geno_miss);
-
- infile.close();
- infile.clear();
-
- return true;
+bool BimbamXtXwz(const string &file_geno, const int display_pace,
+ vector<int> &indicator_idv, vector<int> &indicator_snp,
+ const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ if (!infile) {
+ cout << "error reading genotype file:" << file_geno << endl;
+ return false;
+ }
+
+ string line;
+ char *ch_ptr;
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test = XWz->size1;
+ gsl_vector *geno = gsl_vector_alloc(ni_test);
+ gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
+
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok(NULL, " , \t");
+
+ geno_mean = 0.0;
+ n_miss = 0;
+ geno_var = 0.0;
+ gsl_vector_set_all(geno_miss, 0);
+
+ size_t j = 0;
+ for (size_t i = 0; i < indicator_idv.size(); ++i) {
+ if (indicator_idv[i] == 0) {
+ continue;
+ }
+ ch_ptr = strtok(NULL, " , \t");
+ if (strcmp(ch_ptr, "NA") == 0) {
+ gsl_vector_set(geno_miss, i, 0);
+ n_miss++;
+ } else {
+ d = atof(ch_ptr);
+ gsl_vector_set(geno, j, d);
+ gsl_vector_set(geno_miss, j, 1);
+ geno_mean += d;
+ geno_var += d * d;
+ }
+ j++;
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_test;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ if (gsl_vector_get(geno_miss, i) == 0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
+
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+ for (size_t i = 0; i < XWz->size2; i++) {
+ gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i);
+ gsl_blas_ddot(geno, &XWz_col.vector, &d);
+ gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var));
+ }
+
+ ns_test++;
+ }
+
+ cout << endl;
+
+ gsl_vector_free(geno);
+ gsl_vector_free(geno_miss);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read PLINK bed file and compute XWz.
-bool PlinkXtXwz (const string &file_bed, const int display_pace,
- vector<int> &indicator_idv, vector<int> &indicator_snp,
- const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
- ifstream infile (file_bed.c_str(), ios::binary);
- if (!infile) {
- cout<<"error reading bed file:"<<file_bed<<endl;
- return false;
- }
-
- char ch[1];
- bitset<8> b;
-
- size_t n_miss, ci_total, ci_test;
- double d, geno_mean, geno_var;
-
- size_t ni_test=XWz->size1;
- size_t ni_total=indicator_idv.size();
- gsl_vector *geno=gsl_vector_alloc (ni_test);
-
- int n_bit;
-
- // Calculate n_bit and c, the number of bit for each snp.
- if (ni_total%4==0) {n_bit=ni_total/4;}
- else {n_bit=ni_total/4+1; }
-
- // Print the first three magic numbers.
- for (int i=0; i<3; ++i) {
- infile.read(ch,1);
- b=ch[0];
- }
-
- for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
- if (indicator_snp[t]==0) {continue;}
-
- // n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
-
- // Read genotypes.
- geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
- for (int i=0; i<n_bit; ++i) {
- infile.read(ch,1);
- b=ch[0];
-
- // Minor allele homozygous: 2.0; major: 0.0;
- for (size_t j=0; j<4; ++j) {
- if ((i==(n_bit-1)) && ci_total==ni_total) {
- break;
- }
- if (indicator_idv[ci_total]==0) {
- ci_total++;
- continue;
- }
-
- if (b[2*j]==0) {
- if (b[2*j+1]==0) {
- gsl_vector_set(geno, ci_test, 2.0);
- geno_mean+=2.0;
- geno_var+=4.0;
- }
- else {
- gsl_vector_set(geno, ci_test, 1.0);
- geno_mean+=1.0;
- geno_var+=1.0;
- }
- }
- else {
- if (b[2*j+1]==1) {
- gsl_vector_set(geno, ci_test, 0.0);
- }
- else {
- gsl_vector_set(geno, ci_test, -9.0);
- n_miss++;
- }
- }
-
- ci_test++;
- ci_total++;
- }
- }
-
- geno_mean/=(double)(ni_test-n_miss);
- geno_var+=geno_mean*geno_mean*(double)n_miss;
- geno_var/=(double)ni_test;
- geno_var-=geno_mean*geno_mean;
-
- for (size_t i=0; i<ni_test; ++i) {
- d=gsl_vector_get(geno,i);
- if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
- }
-
- gsl_vector_add_constant (geno, -1.0*geno_mean);
-
- for (size_t i=0; i<XWz->size2; i++) {
- gsl_vector_const_view XWz_col=
- gsl_matrix_const_column(XWz, i);
- gsl_blas_ddot (geno, &XWz_col.vector, &d);
- gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var));
- }
-
- ns_test++;
- }
- cout<<endl;
-
- gsl_vector_free (geno);
-
- infile.close();
- infile.clear();
-
- return true;
+bool PlinkXtXwz(const string &file_bed, const int display_pace,
+ vector<int> &indicator_idv, vector<int> &indicator_snp,
+ const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) {
+ ifstream infile(file_bed.c_str(), ios::binary);
+ if (!infile) {
+ cout << "error reading bed file:" << file_bed << endl;
+ return false;
+ }
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t n_miss, ci_total, ci_test;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test = XWz->size1;
+ size_t ni_total = indicator_idv.size();
+ gsl_vector *geno = gsl_vector_alloc(ni_test);
+
+ int n_bit;
+
+ // Calculate n_bit and c, the number of bit for each snp.
+ if (ni_total % 4 == 0) {
+ n_bit = ni_total / 4;
+ } else {
+ n_bit = ni_total / 4 + 1;
+ }
+
+ // Print the first three magic numbers.
+ for (int i = 0; i < 3; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+ }
+
+ for (size_t t = 0; t < indicator_snp.size(); ++t) {
+ if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
+ ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ }
+ if (indicator_snp[t] == 0) {
+ continue;
+ }
+
+ // n_bit, and 3 is the number of magic numbers.
+ infile.seekg(t * n_bit + 3);
+
+ // Read genotypes.
+ geno_mean = 0.0;
+ n_miss = 0;
+ ci_total = 0;
+ geno_var = 0.0;
+ ci_test = 0;
+ for (int i = 0; i < n_bit; ++i) {
+ infile.read(ch, 1);
+ b = ch[0];
+
+ // Minor allele homozygous: 2.0; major: 0.0;
+ for (size_t j = 0; j < 4; ++j) {
+ if ((i == (n_bit - 1)) && ci_total == ni_total) {
+ break;
+ }
+ if (indicator_idv[ci_total] == 0) {
+ ci_total++;
+ continue;
+ }
+
+ if (b[2 * j] == 0) {
+ if (b[2 * j + 1] == 0) {
+ gsl_vector_set(geno, ci_test, 2.0);
+ geno_mean += 2.0;
+ geno_var += 4.0;
+ } else {
+ gsl_vector_set(geno, ci_test, 1.0);
+ geno_mean += 1.0;
+ geno_var += 1.0;
+ }
+ } else {
+ if (b[2 * j + 1] == 1) {
+ gsl_vector_set(geno, ci_test, 0.0);
+ } else {
+ gsl_vector_set(geno, ci_test, -9.0);
+ n_miss++;
+ }
+ }
+
+ ci_test++;
+ ci_total++;
+ }
+ }
+
+ geno_mean /= (double)(ni_test - n_miss);
+ geno_var += geno_mean * geno_mean * (double)n_miss;
+ geno_var /= (double)ni_test;
+ geno_var -= geno_mean * geno_mean;
+
+ for (size_t i = 0; i < ni_test; ++i) {
+ d = gsl_vector_get(geno, i);
+ if (d == -9.0) {
+ gsl_vector_set(geno, i, geno_mean);
+ }
+ }
+
+ gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+ for (size_t i = 0; i < XWz->size2; i++) {
+ gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i);
+ gsl_blas_ddot(geno, &XWz_col.vector, &d);
+ gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var));
+ }
+
+ ns_test++;
+ }
+ cout << endl;
+
+ gsl_vector_free(geno);
+
+ infile.close();
+ infile.clear();
+
+ return true;
}
// Read multiple genotype files and compute XWz.
-bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile,
- const int display_pace, vector<int> &indicator_idv,
- vector<vector<int> > &mindicator_snp, const gsl_matrix *XWz,
- gsl_matrix *XtXWz) {
+bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile,
+ const int display_pace, vector<int> &indicator_idv,
+ vector<vector<int>> &mindicator_snp, const gsl_matrix *XWz,
+ gsl_matrix *XtXWz) {
gsl_matrix_set_zero(XtXWz);
- igzstream infile (file_mfile.c_str(), igzstream::in);
+ igzstream infile(file_mfile.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open mfile file: "<<file_mfile<<endl;
+ cout << "error! fail to open mfile file: " << file_mfile << endl;
return false;
}
string file_name;
- size_t l=0, ns_test=0;
+ size_t l = 0, ns_test = 0;
while (!safeGetline(infile, file_name).eof()) {
- if (mfile_mode==1) {
- file_name+=".bed";
- PlinkXtXwz (file_name, display_pace, indicator_idv, mindicator_snp[l],
- XWz, ns_test, XtXWz);
+ if (mfile_mode == 1) {
+ file_name += ".bed";
+ PlinkXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l], XWz,
+ ns_test, XtXWz);
} else {
- BimbamXtXwz (file_name, display_pace, indicator_idv, mindicator_snp[l],
- XWz, ns_test, XtXWz);
+ BimbamXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l],
+ XWz, ns_test, XtXWz);
}
l++;
@@ -2506,217 +2711,225 @@ bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile,
// Compute confidence intervals from summary statistics.
void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz,
- const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
- const gsl_matrix *Svar_mat, const gsl_vector *w,
- const gsl_vector *z, const gsl_vector *s_vec,
- const vector<size_t> &vec_cat, const vector<double> &v_pve,
- vector<double> &v_se_pve, double &pve_total,
- double &se_pve_total, vector<double> &v_sigma2,
- vector<double> &v_se_sigma2, vector<double> &v_enrich,
- vector<double> &v_se_enrich) {
- size_t n_vc=XWz->size2, ns_test=w->size, ni_test=XWz->size1;
+ const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
+ const gsl_matrix *Svar_mat, const gsl_vector *w,
+ const gsl_vector *z, const gsl_vector *s_vec,
+ const vector<size_t> &vec_cat, const vector<double> &v_pve,
+ vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
+ vector<double> &v_sigma2, vector<double> &v_se_sigma2,
+ vector<double> &v_enrich, vector<double> &v_se_enrich) {
+ size_t n_vc = XWz->size2, ns_test = w->size, ni_test = XWz->size1;
// Set up matrices.
- gsl_vector *w_pve=gsl_vector_alloc (ns_test);
- gsl_vector *wz=gsl_vector_alloc (ns_test);
- gsl_vector *zwz=gsl_vector_alloc (n_vc);
- gsl_vector *zz=gsl_vector_alloc (n_vc);
- gsl_vector *Xz_pve=gsl_vector_alloc (ni_test);
- gsl_vector *WXtXWz=gsl_vector_alloc (ns_test);
-
- gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc);
- gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_vector *w_pve = gsl_vector_alloc(ns_test);
+ gsl_vector *wz = gsl_vector_alloc(ns_test);
+ gsl_vector *zwz = gsl_vector_alloc(n_vc);
+ gsl_vector *zz = gsl_vector_alloc(n_vc);
+ gsl_vector *Xz_pve = gsl_vector_alloc(ni_test);
+ gsl_vector *WXtXWz = gsl_vector_alloc(ns_test);
+
+ gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc);
+ gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc);
double d, s0, s1, s, s_pve, s_snp;
// Compute wz and zwz.
- gsl_vector_memcpy (wz, z);
- gsl_vector_mul (wz, w);
+ gsl_vector_memcpy(wz, z);
+ gsl_vector_mul(wz, w);
- gsl_vector_set_zero (zwz);
- gsl_vector_set_zero (zz);
- for (size_t i=0; i<w->size; i++) {
- d=gsl_vector_get (wz, i)*gsl_vector_get (z, i);
- d+=gsl_vector_get (zwz, vec_cat[i]);
- gsl_vector_set (zwz, vec_cat[i], d);
+ gsl_vector_set_zero(zwz);
+ gsl_vector_set_zero(zz);
+ for (size_t i = 0; i < w->size; i++) {
+ d = gsl_vector_get(wz, i) * gsl_vector_get(z, i);
+ d += gsl_vector_get(zwz, vec_cat[i]);
+ gsl_vector_set(zwz, vec_cat[i], d);
- d=gsl_vector_get (z, i)*gsl_vector_get (z, i);
- d+=gsl_vector_get (zz, vec_cat[i]);
- gsl_vector_set (zz, vec_cat[i], d);
+ d = gsl_vector_get(z, i) * gsl_vector_get(z, i);
+ d += gsl_vector_get(zz, vec_cat[i]);
+ gsl_vector_set(zz, vec_cat[i], d);
}
// Compute wz, ve and Xz_pve.
- gsl_vector_set_zero (Xz_pve); s_pve=0; s_snp=0;
- for (size_t i=0; i<n_vc; i++) {
- s_pve+=v_pve[i];
- s_snp+=gsl_vector_get(s_vec, i);
+ gsl_vector_set_zero(Xz_pve);
+ s_pve = 0;
+ s_snp = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ s_pve += v_pve[i];
+ s_snp += gsl_vector_get(s_vec, i);
- gsl_vector_const_view Xz_col=gsl_matrix_const_column (Xz, i);
- gsl_blas_daxpy (v_pve[i]/gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve);
+ gsl_vector_const_view Xz_col = gsl_matrix_const_column(Xz, i);
+ gsl_blas_daxpy(v_pve[i] / gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve);
}
// Set up wpve vector.
- for (size_t i=0; i<w->size; i++) {
- d=v_pve[vec_cat[i]]/gsl_vector_get(s_vec, vec_cat[i]);
- gsl_vector_set (w_pve, i, d);
+ for (size_t i = 0; i < w->size; i++) {
+ d = v_pve[vec_cat[i]] / gsl_vector_get(s_vec, vec_cat[i]);
+ gsl_vector_set(w_pve, i, d);
}
// Compute Vq (in qvar_mat).
- s0=1-s_pve;
- for (size_t i=0; i<n_vc; i++) {
- s0+=gsl_vector_get (zz, i)*v_pve[i]/gsl_vector_get(s_vec, i);
+ s0 = 1 - s_pve;
+ for (size_t i = 0; i < n_vc; i++) {
+ s0 += gsl_vector_get(zz, i) * v_pve[i] / gsl_vector_get(s_vec, i);
}
- for (size_t i=0; i<n_vc; i++) {
- s1=s0;
- s1-=gsl_vector_get (zwz, i)*(1-s_pve)/gsl_vector_get(s_vec, i);
+ for (size_t i = 0; i < n_vc; i++) {
+ s1 = s0;
+ s1 -= gsl_vector_get(zwz, i) * (1 - s_pve) / gsl_vector_get(s_vec, i);
- gsl_vector_const_view XWz_col1=gsl_matrix_const_column (XWz, i);
- gsl_vector_const_view XtXWz_col1=gsl_matrix_const_column (XtXWz, i);
+ gsl_vector_const_view XWz_col1 = gsl_matrix_const_column(XWz, i);
+ gsl_vector_const_view XtXWz_col1 = gsl_matrix_const_column(XtXWz, i);
- gsl_vector_memcpy (WXtXWz, &XtXWz_col1.vector);
- gsl_vector_mul (WXtXWz, w_pve);
+ gsl_vector_memcpy(WXtXWz, &XtXWz_col1.vector);
+ gsl_vector_mul(WXtXWz, w_pve);
- gsl_blas_ddot (Xz_pve, &XWz_col1.vector, &d);
- s1-=d/gsl_vector_get(s_vec, i);
+ gsl_blas_ddot(Xz_pve, &XWz_col1.vector, &d);
+ s1 -= d / gsl_vector_get(s_vec, i);
- for (size_t j=0; j<n_vc; j++) {
- s=s1;
+ for (size_t j = 0; j < n_vc; j++) {
+ s = s1;
- s-=gsl_vector_get (zwz, j)*(1-s_pve)/gsl_vector_get(s_vec, j);
+ s -= gsl_vector_get(zwz, j) * (1 - s_pve) / gsl_vector_get(s_vec, j);
- gsl_vector_const_view XWz_col2=gsl_matrix_const_column (XWz, j);
- gsl_vector_const_view XtXWz_col2=gsl_matrix_const_column (XtXWz, j);
+ gsl_vector_const_view XWz_col2 = gsl_matrix_const_column(XWz, j);
+ gsl_vector_const_view XtXWz_col2 = gsl_matrix_const_column(XtXWz, j);
- gsl_blas_ddot (WXtXWz, &XtXWz_col2.vector, &d);
- s+=d/(gsl_vector_get(s_vec, i)*gsl_vector_get(s_vec, j));
+ gsl_blas_ddot(WXtXWz, &XtXWz_col2.vector, &d);
+ s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j));
- gsl_blas_ddot (&XWz_col1.vector, &XWz_col2.vector, &d);
- s+=d/(gsl_vector_get(s_vec, i)*gsl_vector_get(s_vec, j))*(1-s_pve);
+ gsl_blas_ddot(&XWz_col1.vector, &XWz_col2.vector, &d);
+ s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j)) *
+ (1 - s_pve);
- gsl_blas_ddot (Xz_pve, &XWz_col2.vector, &d);
- s-=d/gsl_vector_get(s_vec, j);
+ gsl_blas_ddot(Xz_pve, &XWz_col2.vector, &d);
+ s -= d / gsl_vector_get(s_vec, j);
- gsl_matrix_set (qvar_mat, i, j, s);
+ gsl_matrix_set(qvar_mat, i, j, s);
}
}
- d=(double)(ni_test-1);
- gsl_matrix_scale (qvar_mat, 2.0/(d*d*d));
+ d = (double)(ni_test - 1);
+ gsl_matrix_scale(qvar_mat, 2.0 / (d * d * d));
// Calculate S^{-1}.
- gsl_matrix_memcpy (tmp_mat, S_mat);
+ gsl_matrix_memcpy(tmp_mat, S_mat);
int sig;
- gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
- LUDecomp (tmp_mat, pmt, &sig);
- LUInvert (tmp_mat, pmt, Si_mat);
+ gsl_permutation *pmt = gsl_permutation_alloc(n_vc);
+ LUDecomp(tmp_mat, pmt, &sig);
+ LUInvert(tmp_mat, pmt, Si_mat);
// Calculate variance for the estimates.
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=i; j<n_vc; j++) {
- d=gsl_matrix_get(Svar_mat, i, j);
- d*=v_pve[i]*v_pve[j];
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t j = i; j < n_vc; j++) {
+ d = gsl_matrix_get(Svar_mat, i, j);
+ d *= v_pve[i] * v_pve[j];
- d+=gsl_matrix_get(qvar_mat, i, j);
+ d += gsl_matrix_get(qvar_mat, i, j);
gsl_matrix_set(Var_mat, i, j, d);
- if (i!=j) {gsl_matrix_set(Var_mat, j, i, d);}
+ if (i != j) {
+ gsl_matrix_set(Var_mat, j, i, d);
+ }
}
}
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Si_mat,Var_mat,0.0,tmp_mat);
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,tmp_mat,Si_mat,0.0,Var_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0,
+ tmp_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0,
+ Var_mat);
// Compute sigma2 per snp, enrich.
- v_sigma2.clear(); v_enrich.clear();
- for (size_t i=0; i<n_vc; i++) {
- v_sigma2.push_back(v_pve[i]/gsl_vector_get(s_vec, i) );
- v_enrich.push_back(v_pve[i]/gsl_vector_get(s_vec, i)*s_snp/s_pve);
+ v_sigma2.clear();
+ v_enrich.clear();
+ for (size_t i = 0; i < n_vc; i++) {
+ v_sigma2.push_back(v_pve[i] / gsl_vector_get(s_vec, i));
+ v_enrich.push_back(v_pve[i] / gsl_vector_get(s_vec, i) * s_snp / s_pve);
}
// Compute se_pve, se_sigma2.
- for (size_t i=0; i<n_vc; i++) {
- d=sqrt(gsl_matrix_get(Var_mat, i, i));
+ for (size_t i = 0; i < n_vc; i++) {
+ d = sqrt(gsl_matrix_get(Var_mat, i, i));
v_se_pve.push_back(d);
- v_se_sigma2.push_back(d/gsl_vector_get(s_vec, i));
+ v_se_sigma2.push_back(d / gsl_vector_get(s_vec, i));
}
// Compute pve_total, se_pve_total.
- pve_total=0;
- for (size_t i=0; i<n_vc; i++) {
- pve_total+=v_pve[i];
+ pve_total = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ pve_total += v_pve[i];
}
- se_pve_total=0;
- for (size_t i=0; i<n_vc; i++) {
- for (size_t j=0; j<n_vc; j++) {
- se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+ se_pve_total = 0;
+ for (size_t i = 0; i < n_vc; i++) {
+ for (size_t j = 0; j < n_vc; j++) {
+ se_pve_total += gsl_matrix_get(Var_mat, i, j);
}
}
- se_pve_total=sqrt(se_pve_total);
+ se_pve_total = sqrt(se_pve_total);
// Compute se_enrich.
gsl_matrix_set_identity(tmp_mat);
double d1;
- for (size_t i=0; i<n_vc; i++) {
- d=v_pve[i]/s_pve;
- d1=gsl_vector_get(s_vec, i);
- for (size_t j=0; j<n_vc; j++) {
- if (i==j) {
- gsl_matrix_set(tmp_mat, i, j, (1-d)/d1*s_snp/s_pve);
+ for (size_t i = 0; i < n_vc; i++) {
+ d = v_pve[i] / s_pve;
+ d1 = gsl_vector_get(s_vec, i);
+ for (size_t j = 0; j < n_vc; j++) {
+ if (i == j) {
+ gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve);
} else {
- gsl_matrix_set(tmp_mat, i, j, -1*d/d1*s_snp/s_pve);
+ gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve);
}
}
}
- gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,tmp_mat,Var_mat,0.0,tmp_mat1);
- gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,tmp_mat1,tmp_mat,0.0,
- VarEnrich_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0,
+ tmp_mat1);
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0,
+ VarEnrich_mat);
- for (size_t i=0; i<n_vc; i++) {
- d=sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
+ for (size_t i = 0; i < n_vc; i++) {
+ d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
v_se_enrich.push_back(d);
}
- cout<<"pve = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_pve[i]<<" ";
+ cout << "pve = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_pve[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(pve) = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_se_pve[i]<<" ";
+ cout << "se(pve) = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_se_pve[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"sigma2 per snp = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_sigma2[i]<<" ";
+ cout << "sigma2 per snp = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_sigma2[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(sigma2 per snp) = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_se_sigma2[i]<<" ";
+ cout << "se(sigma2 per snp) = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_se_sigma2[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"enrichment = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_enrich[i]<<" ";
+ cout << "enrichment = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_enrich[i] << " ";
}
- cout<<endl;
+ cout << endl;
- cout<<"se(enrichment) = ";
- for (size_t i=0; i<n_vc; i++) {
- cout<<v_se_enrich[i]<<" ";
+ cout << "se(enrichment) = ";
+ for (size_t i = 0; i < n_vc; i++) {
+ cout << v_se_enrich[i] << " ";
}
- cout<<endl;
+ cout << endl;
// Delete matrices.
gsl_matrix_free(Si_mat);
diff --git a/src/vc.h b/src/vc.h
index 43c6979..c6f66b4 100644
--- a/src/vc.h
+++ b/src/vc.h
@@ -19,25 +19,25 @@
#ifndef __VC_H__
#define __VC_H__
-#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
-#include "param.h"
+#include "gsl/gsl_vector.h"
#include "io.h"
+#include "param.h"
using namespace std;
class VC_PARAM {
public:
- const gsl_matrix *K;
- const gsl_matrix *W;
- const gsl_vector *y;
- gsl_matrix *P;
- gsl_vector *Py;
- gsl_matrix *KPy_mat;
- gsl_matrix *PKPy_mat;
- gsl_matrix *Hessian;
- bool noconstrain;
+ const gsl_matrix *K;
+ const gsl_matrix *W;
+ const gsl_vector *y;
+ gsl_matrix *P;
+ gsl_vector *Py;
+ gsl_matrix *KPy_mat;
+ gsl_matrix *PKPy_mat;
+ gsl_matrix *Hessian;
+ bool noconstrain;
};
class VC {
@@ -45,91 +45,87 @@ class VC {
public:
// IO-related parameters
size_t a_mode;
- string file_cat;
- string file_beta;
- string file_cor;
- string file_mq;
- string file_ms;
-
- string file_out;
- string path_out;
-
- set<string> setSnps;
-
- size_t ni_total_ref, ns_total_ref, ns_pair;
- size_t ni_total, ns_total, ns_test;
- size_t n_vc;
-
- double pve_total, se_pve_total;
- vector<double> v_sigma2;
- vector<double> v_se_sigma2;
- vector<double> v_pve;
- vector<double> v_se_pve;
- vector<double> v_traceG;
- vector<double> v_beta;
- vector<double> v_se_beta;
-
- size_t crt;
- double window_cm, window_bp, window_ns;
-
- double time_UtX;
- double time_opt;
-
- // Main functions.
- void CopyFromParam (PARAM &cPar);
- void CopyToParam (PARAM &cPar);
- void WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec,
- const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
- const gsl_matrix *Svar_mat);
- void CalcVChe (const gsl_matrix *K, const gsl_matrix *W,
- const gsl_vector *y);
- void CalcVCreml (const bool noconstrain, const gsl_matrix *K,
- const gsl_matrix *W, const gsl_vector *y);
- void CalcVCacl (const gsl_matrix *K, const gsl_matrix *W,
- const gsl_vector *y);
+ string file_cat;
+ string file_beta;
+ string file_cor;
+ string file_mq;
+ string file_ms;
+
+ string file_out;
+ string path_out;
+
+ set<string> setSnps;
+
+ size_t ni_total_ref, ns_total_ref, ns_pair;
+ size_t ni_total, ns_total, ns_test;
+ size_t n_vc;
+
+ double pve_total, se_pve_total;
+ vector<double> v_sigma2;
+ vector<double> v_se_sigma2;
+ vector<double> v_pve;
+ vector<double> v_se_pve;
+ vector<double> v_traceG;
+ vector<double> v_beta;
+ vector<double> v_se_beta;
+
+ size_t crt;
+ double window_cm, window_bp, window_ns;
+
+ double time_UtX;
+ double time_opt;
+
+ // Main functions.
+ void CopyFromParam(PARAM &cPar);
+ void CopyToParam(PARAM &cPar);
+ void WriteFile_qs(const gsl_vector *s_vec, const gsl_vector *q_vec,
+ const gsl_vector *qvar_vec, const gsl_matrix *S_mat,
+ const gsl_matrix *Svar_mat);
+ void CalcVChe(const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y);
+ void CalcVCreml(const bool noconstrain, const gsl_matrix *K,
+ const gsl_matrix *W, const gsl_vector *y);
+ void CalcVCacl(const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y);
};
void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
- const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
- const gsl_vector *s_vec, const double df, vector<double> &v_pve,
- vector<double> &v_se_pve, double &pve_total,
- double &se_pve_total, vector<double> &v_sigma2,
- vector<double> &v_se_sigma2, vector<double> &v_enrich,
- vector<double> &v_se_enrich);
-
-bool BimbamXwz (const string &file_geno, const int display_pace,
- vector<int> &indicator_idv, vector<int> &indicator_snp,
- const vector<size_t> &vec_cat, const gsl_vector *w,
- const gsl_vector *z, size_t ns_test, gsl_matrix *XWz);
-bool PlinkXwz (const string &file_bed, const int display_pace,
- vector<int> &indicator_idv, vector<int> &indicator_snp,
- const vector<size_t> &vec_cat, const gsl_vector *w,
- const gsl_vector *z, size_t ns_test, gsl_matrix *XWz);
-bool MFILEXwz (const size_t mfile_mode, const string &file_mfile,
- const int display_pace, vector<int> &indicator_idv,
- vector<vector<int> > &mindicator_snp,
- const vector<size_t> &vec_cat, const gsl_vector *w,
- const gsl_vector *z, gsl_matrix *XWz);
-
-bool BimbamXtXwz (const string &file_geno, const int display_pace,
- vector<int> &indicator_idv, vector<int> &indicator_snp,
- const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz);
-bool PlinkXtXwz (const string &file_bed, const int display_pace,
- vector<int> &indicator_idv, vector<int> &indicator_snp,
- const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz);
-bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile,
- const int display_pace, vector<int> &indicator_idv,
- vector<vector<int> > &mindicator_snp, const gsl_matrix *XWz,
- gsl_matrix *XtXWz);
+ const gsl_matrix *Svar_mat, const gsl_vector *q_vec,
+ const gsl_vector *s_vec, const double df, vector<double> &v_pve,
+ vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
+ vector<double> &v_sigma2, vector<double> &v_se_sigma2,
+ vector<double> &v_enrich, vector<double> &v_se_enrich);
+
+bool BimbamXwz(const string &file_geno, const int display_pace,
+ vector<int> &indicator_idv, vector<int> &indicator_snp,
+ const vector<size_t> &vec_cat, const gsl_vector *w,
+ const gsl_vector *z, size_t ns_test, gsl_matrix *XWz);
+bool PlinkXwz(const string &file_bed, const int display_pace,
+ vector<int> &indicator_idv, vector<int> &indicator_snp,
+ const vector<size_t> &vec_cat, const gsl_vector *w,
+ const gsl_vector *z, size_t ns_test, gsl_matrix *XWz);
+bool MFILEXwz(const size_t mfile_mode, const string &file_mfile,
+ const int display_pace, vector<int> &indicator_idv,
+ vector<vector<int>> &mindicator_snp,
+ const vector<size_t> &vec_cat, const gsl_vector *w,
+ const gsl_vector *z, gsl_matrix *XWz);
+
+bool BimbamXtXwz(const string &file_geno, const int display_pace,
+ vector<int> &indicator_idv, vector<int> &indicator_snp,
+ const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz);
+bool PlinkXtXwz(const string &file_bed, const int display_pace,
+ vector<int> &indicator_idv, vector<int> &indicator_snp,
+ const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz);
+bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile,
+ const int display_pace, vector<int> &indicator_idv,
+ vector<vector<int>> &mindicator_snp, const gsl_matrix *XWz,
+ gsl_matrix *XtXWz);
void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz,
- const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
- const gsl_matrix *Svar_mat, const gsl_vector *w,
- const gsl_vector *z, const gsl_vector *s_vec,
- const vector<size_t> &vec_cat, const vector<double> &v_pve,
- vector<double> &v_se_pve, double &pve_total,
- double &se_pve_total, vector<double> &v_sigma2,
- vector<double> &v_se_sigma2, vector<double> &v_enrich,
- vector<double> &v_se_enrich);
+ const gsl_matrix *XtXWz, const gsl_matrix *S_mat,
+ const gsl_matrix *Svar_mat, const gsl_vector *w,
+ const gsl_vector *z, const gsl_vector *s_vec,
+ const vector<size_t> &vec_cat, const vector<double> &v_pve,
+ vector<double> &v_se_pve, double &pve_total, double &se_pve_total,
+ vector<double> &v_sigma2, vector<double> &v_se_sigma2,
+ vector<double> &v_enrich, vector<double> &v_se_enrich);
#endif