diff options
author | Pjotr Prins | 2017-08-02 08:46:58 +0000 |
---|---|---|
committer | Pjotr Prins | 2017-08-02 08:46:58 +0000 |
commit | 3935ba39d30666dd7d4a831155631847c77b70c4 (patch) | |
tree | c45fc682b473618a219e324d5c85b5e1f9361d0c | |
parent | 84360c191f418bf8682b35e0c8235fcc3bd19a06 (diff) | |
download | pangemma-3935ba39d30666dd7d4a831155631847c77b70c4.tar.gz |
Massive patch using LLVM coding style. It was generated with:
clang-format -style=LLVM -i *.cpp *.h
Please set your editor to replace tabs with spaces and use indentation of 2 spaces.
-rw-r--r-- | src/bslmm.cpp | 3331 | ||||
-rw-r--r-- | src/bslmm.h | 282 | ||||
-rw-r--r-- | src/bslmmdap.cpp | 1258 | ||||
-rw-r--r-- | src/bslmmdap.h | 160 | ||||
-rw-r--r-- | src/eigenlib.cpp | 103 | ||||
-rw-r--r-- | src/eigenlib.h | 16 | ||||
-rw-r--r-- | src/gemma.cpp | 6549 | ||||
-rw-r--r-- | src/gemma.h | 31 | ||||
-rw-r--r-- | src/gzstream.cpp | 176 | ||||
-rw-r--r-- | src/gzstream.h | 90 | ||||
-rw-r--r-- | src/io.cpp | 7531 | ||||
-rw-r--r-- | src/io.h | 335 | ||||
-rw-r--r-- | src/lapack.cpp | 1030 | ||||
-rw-r--r-- | src/lapack.h | 66 | ||||
-rw-r--r-- | src/ldr.cpp | 83 | ||||
-rw-r--r-- | src/ldr.h | 62 | ||||
-rw-r--r-- | src/lm.cpp | 1500 | ||||
-rw-r--r-- | src/lm.h | 92 | ||||
-rw-r--r-- | src/lmm.cpp | 4813 | ||||
-rw-r--r-- | src/lmm.h | 197 | ||||
-rw-r--r-- | src/logistic.cpp | 747 | ||||
-rw-r--r-- | src/logistic.h | 96 | ||||
-rw-r--r-- | src/main.cpp | 104 | ||||
-rw-r--r-- | src/mathfunc.cpp | 585 | ||||
-rw-r--r-- | src/mathfunc.h | 34 | ||||
-rw-r--r-- | src/mvlmm.cpp | 10159 | ||||
-rw-r--r-- | src/mvlmm.h | 145 | ||||
-rw-r--r-- | src/param.cpp | 4138 | ||||
-rw-r--r-- | src/param.h | 600 | ||||
-rw-r--r-- | src/prdt.cpp | 988 | ||||
-rw-r--r-- | src/prdt.h | 80 | ||||
-rw-r--r-- | src/varcov.cpp | 386 | ||||
-rw-r--r-- | src/varcov.h | 60 | ||||
-rw-r--r-- | src/vc.cpp | 3655 | ||||
-rw-r--r-- | src/vc.h | 180 |
35 files changed, 25588 insertions, 24074 deletions
diff --git a/src/bslmm.cpp b/src/bslmm.cpp index d579802..3305639 100644 --- a/src/bslmm.cpp +++ b/src/bslmm.cpp @@ -16,1360 +16,1428 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> #include <fstream> +#include <iostream> #include <sstream> -#include <iomanip> +#include <algorithm> #include <cmath> +#include <cstring> +#include <ctime> +#include <iomanip> #include <iostream> #include <stdio.h> #include <stdlib.h> -#include <ctime> -#include <cstring> -#include <algorithm> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" +#include "gsl/gsl_cdf.h" #include "gsl/gsl_eigen.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" #include "gsl/gsl_randist.h" -#include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" +#include "gsl/gsl_vector.h" -#include "lapack.h" -#include "param.h" #include "bslmm.h" -#include "lmm.h" +#include "lapack.h" #include "lm.h" +#include "lmm.h" #include "mathfunc.h" +#include "param.h" using namespace std; -void BSLMM::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; - - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; - - l_min=cPar.h_min; - l_max=cPar.h_max; - n_region=cPar.n_region; - pve_null=cPar.pve_null; - pheno_mean=cPar.pheno_mean; - - time_UtZ=0.0; - time_Omega=0.0; - n_accept=0; - - h_min=cPar.h_min; - h_max=cPar.h_max; - h_scale=cPar.h_scale; - rho_min=cPar.rho_min; - rho_max=cPar.rho_max; - rho_scale=cPar.rho_scale; - logp_min=cPar.logp_min; - logp_max=cPar.logp_max; - logp_scale=cPar.logp_scale; - - s_min=cPar.s_min; - s_max=cPar.s_max; - w_step=cPar.w_step; - s_step=cPar.s_step; - r_pace=cPar.r_pace; - w_pace=cPar.w_pace; - n_mh=cPar.n_mh; - geo_mean=cPar.geo_mean; - randseed=cPar.randseed; - trace_G=cPar.trace_G; - - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ni_test=cPar.ni_test; - ns_test=cPar.ns_test; - n_cvt=cPar.n_cvt; - - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; - - return; +void BSLMM::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; + d_pace = cPar.d_pace; + + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_out = cPar.file_out; + path_out = cPar.path_out; + + l_min = cPar.h_min; + l_max = cPar.h_max; + n_region = cPar.n_region; + pve_null = cPar.pve_null; + pheno_mean = cPar.pheno_mean; + + time_UtZ = 0.0; + time_Omega = 0.0; + n_accept = 0; + + h_min = cPar.h_min; + h_max = cPar.h_max; + h_scale = cPar.h_scale; + rho_min = cPar.rho_min; + rho_max = cPar.rho_max; + rho_scale = cPar.rho_scale; + logp_min = cPar.logp_min; + logp_max = cPar.logp_max; + logp_scale = cPar.logp_scale; + + s_min = cPar.s_min; + s_max = cPar.s_max; + w_step = cPar.w_step; + s_step = cPar.s_step; + r_pace = cPar.r_pace; + w_pace = cPar.w_pace; + n_mh = cPar.n_mh; + geo_mean = cPar.geo_mean; + randseed = cPar.randseed; + trace_G = cPar.trace_G; + + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ni_test = cPar.ni_test; + ns_test = cPar.ns_test; + n_cvt = cPar.n_cvt; + + indicator_idv = cPar.indicator_idv; + indicator_snp = cPar.indicator_snp; + snpInfo = cPar.snpInfo; + + return; } -void BSLMM::CopyToParam (PARAM &cPar) { - cPar.time_UtZ=time_UtZ; - cPar.time_Omega=time_Omega; - cPar.time_Proposal=time_Proposal; - cPar.cHyp_initial=cHyp_initial; - cPar.n_accept=n_accept; - cPar.pheno_mean=pheno_mean; - cPar.randseed=randseed; +void BSLMM::CopyToParam(PARAM &cPar) { + cPar.time_UtZ = time_UtZ; + cPar.time_Omega = time_Omega; + cPar.time_Proposal = time_Proposal; + cPar.cHyp_initial = cHyp_initial; + cPar.n_accept = n_accept; + cPar.pheno_mean = pheno_mean; + cPar.randseed = randseed; - return; + return; } -void BSLMM::WriteBV (const gsl_vector *bv) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".bv.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - size_t t=0; - for (size_t i=0; i<ni_total; ++i) { - if (indicator_idv[i]==0) { - outfile<<"NA"<<endl; - } - else { - outfile<<scientific<<setprecision(6)<< - gsl_vector_get(bv, t)<<endl; - t++; - } - } - - outfile.clear(); - outfile.close(); - return; +void BSLMM::WriteBV(const gsl_vector *bv) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".bv.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + size_t t = 0; + for (size_t i = 0; i < ni_total; ++i) { + if (indicator_idv[i] == 0) { + outfile << "NA" << endl; + } else { + outfile << scientific << setprecision(6) << gsl_vector_get(bv, t) << endl; + t++; + } + } + + outfile.clear(); + outfile.close(); + return; } -void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, - const gsl_vector *alpha, const size_t w) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".param.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return;} - - outfile<<"chr"<<"\t"<<"rs"<<"\t" - <<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t" - <<"beta"<<"\t"<<"gamma"<<endl; - - size_t t=0; - for (size_t i=0; i<ns_total; ++i) { - if (indicator_snp[i]==0) {continue;} - - outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t" - <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"; - - outfile<<scientific<<setprecision(6)<< - gsl_vector_get(alpha, t)<<"\t"; - if (beta_g[t].second!=0) { - outfile<<beta_g[t].first/beta_g[t].second<< - "\t"<<beta_g[t].second/(double)w<<endl; - } - else { - outfile<<0.0<<"\t"<<0.0<<endl; - } - t++; - } - - outfile.clear(); - outfile.close(); - return; +void BSLMM::WriteParam(vector<pair<double, double>> &beta_g, + const gsl_vector *alpha, const size_t w) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".param.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + outfile << "chr" + << "\t" + << "rs" + << "\t" + << "ps" + << "\t" + << "n_miss" + << "\t" + << "alpha" + << "\t" + << "beta" + << "\t" + << "gamma" << endl; + + size_t t = 0; + for (size_t i = 0; i < ns_total; ++i) { + if (indicator_snp[i] == 0) { + continue; + } + + outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" + << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"; + + outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t) + << "\t"; + if (beta_g[t].second != 0) { + outfile << beta_g[t].first / beta_g[t].second << "\t" + << beta_g[t].second / (double)w << endl; + } else { + outfile << 0.0 << "\t" << 0.0 << endl; + } + t++; + } + + outfile.clear(); + outfile.close(); + return; } -void BSLMM::WriteParam (const gsl_vector *alpha) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".param.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - outfile<<"chr"<<"\t"<<"rs"<<"\t" - <<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t" - <<"beta"<<"\t"<<"gamma"<<endl; - - size_t t=0; - for (size_t i=0; i<ns_total; ++i) { - if (indicator_snp[i]==0) {continue;} - - outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<< - snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"; - outfile<<scientific<<setprecision(6)<< - gsl_vector_get(alpha, t)<<"\t"; - outfile<<0.0<<"\t"<<0.0<<endl; - t++; - } - - outfile.clear(); - outfile.close(); - return; +void BSLMM::WriteParam(const gsl_vector *alpha) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".param.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + outfile << "chr" + << "\t" + << "rs" + << "\t" + << "ps" + << "\t" + << "n_miss" + << "\t" + << "alpha" + << "\t" + << "beta" + << "\t" + << "gamma" << endl; + + size_t t = 0; + for (size_t i = 0; i < ns_total; ++i) { + if (indicator_snp[i] == 0) { + continue; + } + + outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" + << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"; + outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t) + << "\t"; + outfile << 0.0 << "\t" << 0.0 << endl; + t++; + } + + outfile.clear(); + outfile.close(); + return; } -void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, - const gsl_matrix *Result_gamma, const size_t w_col) { - string file_gamma, file_hyp; - file_gamma=path_out+"/"+file_out; - file_gamma+=".gamma.txt"; - file_hyp=path_out+"/"+file_out; - file_hyp+=".hyp.txt"; - - ofstream outfile_gamma, outfile_hyp; - - if (flag==0) { - outfile_gamma.open (file_gamma.c_str(), ofstream::out); - outfile_hyp.open (file_hyp.c_str(), ofstream::out); - if (!outfile_gamma) { - cout<<"error writing file: "<<file_gamma<<endl; - return; - } - if (!outfile_hyp) { - cout<<"error writing file: "<<file_hyp<<endl; - return; - } - - outfile_hyp<<"h \t pve \t rho \t pge \t pi \t n_gamma"<<endl; - - for (size_t i=0; i<s_max; ++i) { - outfile_gamma<<"s"<<i<<"\t"; - } - outfile_gamma<<endl; - } - else { - outfile_gamma.open (file_gamma.c_str(), ofstream::app); - outfile_hyp.open (file_hyp.c_str(), ofstream::app); - if (!outfile_gamma) { - cout<<"error writing file: "<<file_gamma<<endl; - return; - } - if (!outfile_hyp) { - cout<<"error writing file: "<<file_hyp<<endl; - return; - } - - size_t w; - if (w_col==0) {w=w_pace;} - else {w=w_col;} - - for (size_t i=0; i<w; ++i) { - outfile_hyp<<scientific; - for (size_t j=0; j<4; ++j) { - outfile_hyp<<setprecision(6)<< - gsl_matrix_get (Result_hyp, i, j)<<"\t"; - } - outfile_hyp<<setprecision(6)<< - exp(gsl_matrix_get (Result_hyp, i, 4))<<"\t"; - outfile_hyp<<(int)gsl_matrix_get(Result_hyp,i,5)<<"\t"; - outfile_hyp<<endl; - } - - for (size_t i=0; i<w; ++i) { - for (size_t j=0; j<s_max; ++j) { - outfile_gamma<< - (int)gsl_matrix_get(Result_gamma,i,j)<<"\t"; - } - outfile_gamma<<endl; - } - - } - - outfile_hyp.close(); - outfile_hyp.clear(); - outfile_gamma.close(); - outfile_gamma.clear(); - return; +void BSLMM::WriteResult(const int flag, const gsl_matrix *Result_hyp, + const gsl_matrix *Result_gamma, const size_t w_col) { + string file_gamma, file_hyp; + file_gamma = path_out + "/" + file_out; + file_gamma += ".gamma.txt"; + file_hyp = path_out + "/" + file_out; + file_hyp += ".hyp.txt"; + + ofstream outfile_gamma, outfile_hyp; + + if (flag == 0) { + outfile_gamma.open(file_gamma.c_str(), ofstream::out); + outfile_hyp.open(file_hyp.c_str(), ofstream::out); + if (!outfile_gamma) { + cout << "error writing file: " << file_gamma << endl; + return; + } + if (!outfile_hyp) { + cout << "error writing file: " << file_hyp << endl; + return; + } + + outfile_hyp << "h \t pve \t rho \t pge \t pi \t n_gamma" << endl; + + for (size_t i = 0; i < s_max; ++i) { + outfile_gamma << "s" << i << "\t"; + } + outfile_gamma << endl; + } else { + outfile_gamma.open(file_gamma.c_str(), ofstream::app); + outfile_hyp.open(file_hyp.c_str(), ofstream::app); + if (!outfile_gamma) { + cout << "error writing file: " << file_gamma << endl; + return; + } + if (!outfile_hyp) { + cout << "error writing file: " << file_hyp << endl; + return; + } + + size_t w; + if (w_col == 0) { + w = w_pace; + } else { + w = w_col; + } + + for (size_t i = 0; i < w; ++i) { + outfile_hyp << scientific; + for (size_t j = 0; j < 4; ++j) { + outfile_hyp << setprecision(6) << gsl_matrix_get(Result_hyp, i, j) + << "\t"; + } + outfile_hyp << setprecision(6) << exp(gsl_matrix_get(Result_hyp, i, 4)) + << "\t"; + outfile_hyp << (int)gsl_matrix_get(Result_hyp, i, 5) << "\t"; + outfile_hyp << endl; + } + + for (size_t i = 0; i < w; ++i) { + for (size_t j = 0; j < s_max; ++j) { + outfile_gamma << (int)gsl_matrix_get(Result_gamma, i, j) << "\t"; + } + outfile_gamma << endl; + } + } + + outfile_hyp.close(); + outfile_hyp.clear(); + outfile_gamma.close(); + outfile_gamma.clear(); + return; } -void BSLMM::CalcPgamma (double *p_gamma) { - double p, s=0.0; - for (size_t i=0; i<ns_test; ++i) { - p=0.7*gsl_ran_geometric_pdf (i+1, 1.0/geo_mean)+0.3/ - (double)ns_test; - p_gamma[i]=p; - s+=p; - } - for (size_t i=0; i<ns_test; ++i) { - p=p_gamma[i]; - p_gamma[i]=p/s; - } - return; +void BSLMM::CalcPgamma(double *p_gamma) { + double p, s = 0.0; + for (size_t i = 0; i < ns_test; ++i) { + p = 0.7 * gsl_ran_geometric_pdf(i + 1, 1.0 / geo_mean) + + 0.3 / (double)ns_test; + p_gamma[i] = p; + s += p; + } + for (size_t i = 0; i < ns_test; ++i) { + p = p_gamma[i]; + p_gamma[i] = p / s; + } + return; } -void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, - vector<size_t> &rank) { - size_t pos; - for (size_t i=0; i<rank.size(); ++i) { - pos=mapRank2pos[rank[i]]; - gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, i); - gsl_vector_const_view X_col=gsl_matrix_const_column (X, pos); - gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector); - } - - return; +void BSLMM::SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X, + vector<size_t> &rank) { + size_t pos; + for (size_t i = 0; i < rank.size(); ++i) { + pos = mapRank2pos[rank[i]]; + gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, i); + gsl_vector_const_view X_col = gsl_matrix_const_column(X, pos); + gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector); + } + + return; } -double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, - const double sigma_a2) { - double pve, var_y; +double BSLMM::CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const double sigma_a2) { + double pve, var_y; - gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); - gsl_vector *Xty=gsl_vector_alloc (UtXgamma->size2); - gsl_vector *OiXty=gsl_vector_alloc (UtXgamma->size2); + gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2); + gsl_vector *Xty = gsl_vector_alloc(UtXgamma->size2); + gsl_vector *OiXty = gsl_vector_alloc(UtXgamma->size2); - gsl_matrix_set_identity (Omega); - gsl_matrix_scale (Omega, 1.0/sigma_a2); + gsl_matrix_set_identity(Omega); + gsl_matrix_scale(Omega, 1.0 / sigma_a2); - lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, - 1.0, Omega); - gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty); + lapack_dgemm((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega); + gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty); - CholeskySolve(Omega, Xty, OiXty); + CholeskySolve(Omega, Xty, OiXty); - gsl_blas_ddot (Xty, OiXty, &pve); - gsl_blas_ddot (Uty, Uty, &var_y); + gsl_blas_ddot(Xty, OiXty, &pve); + gsl_blas_ddot(Uty, Uty, &var_y); - pve/=var_y; + pve /= var_y; - gsl_matrix_free (Omega); - gsl_vector_free (Xty); - gsl_vector_free (OiXty); + gsl_matrix_free(Omega); + gsl_vector_free(Xty); + gsl_vector_free(OiXty); - return pve; + return pve; } -void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, - vector<size_t> &rank, class HYPBSLMM &cHyp, - vector<pair<size_t, double> > &pos_loglr) { - double q_genome=gsl_cdf_chisq_Qinv(0.05/(double)ns_test, 1); - - cHyp.n_gamma=0; - for (size_t i=0; i<pos_loglr.size(); ++i) { - if (2.0*pos_loglr[i].second>q_genome) {cHyp.n_gamma++;} - } - if (cHyp.n_gamma<10) {cHyp.n_gamma=10;} - - if (cHyp.n_gamma>s_max) {cHyp.n_gamma=s_max;} - if (cHyp.n_gamma<s_min) {cHyp.n_gamma=s_min;} - - rank.clear(); - for (size_t i=0; i<cHyp.n_gamma; ++i) { - rank.push_back(i); - } - - cHyp.logp=log((double)cHyp.n_gamma/(double)ns_test); - cHyp.h=pve_null; - - if (cHyp.logp==0) {cHyp.logp=-0.000001;} - if (cHyp.h==0) {cHyp.h=0.1;} - - gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp.n_gamma); - SetXgamma (UtXgamma, UtX, rank); - double sigma_a2; - if (trace_G!=0) { - sigma_a2=cHyp.h*1.0/ - (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); - } else { - sigma_a2=cHyp.h*1.0/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); - } - if (sigma_a2==0) {sigma_a2=0.025;} - cHyp.rho=CalcPveLM (UtXgamma, Uty, sigma_a2)/cHyp.h; - gsl_matrix_free (UtXgamma); - - if (cHyp.rho>1.0) {cHyp.rho=1.0;} - - if (cHyp.h<h_min) {cHyp.h=h_min;} - if (cHyp.h>h_max) {cHyp.h=h_max;} - if (cHyp.rho<rho_min) {cHyp.rho=rho_min;} - if (cHyp.rho>rho_max) {cHyp.rho=rho_max;} - if (cHyp.logp<logp_min) {cHyp.logp=logp_min;} - if (cHyp.logp>logp_max) {cHyp.logp=logp_max;} - - cout<<"initial value of h = "<<cHyp.h<<endl; - cout<<"initial value of rho = "<<cHyp.rho<<endl; - cout<<"initial value of pi = "<<exp(cHyp.logp)<<endl; - cout<<"initial value of |gamma| = "<<cHyp.n_gamma<<endl; - - return; +void BSLMM::InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty, + vector<size_t> &rank, class HYPBSLMM &cHyp, + vector<pair<size_t, double>> &pos_loglr) { + double q_genome = gsl_cdf_chisq_Qinv(0.05 / (double)ns_test, 1); + + cHyp.n_gamma = 0; + for (size_t i = 0; i < pos_loglr.size(); ++i) { + if (2.0 * pos_loglr[i].second > q_genome) { + cHyp.n_gamma++; + } + } + if (cHyp.n_gamma < 10) { + cHyp.n_gamma = 10; + } + + if (cHyp.n_gamma > s_max) { + cHyp.n_gamma = s_max; + } + if (cHyp.n_gamma < s_min) { + cHyp.n_gamma = s_min; + } + + rank.clear(); + for (size_t i = 0; i < cHyp.n_gamma; ++i) { + rank.push_back(i); + } + + cHyp.logp = log((double)cHyp.n_gamma / (double)ns_test); + cHyp.h = pve_null; + + if (cHyp.logp == 0) { + cHyp.logp = -0.000001; + } + if (cHyp.h == 0) { + cHyp.h = 0.1; + } + + gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp.n_gamma); + SetXgamma(UtXgamma, UtX, rank); + double sigma_a2; + if (trace_G != 0) { + sigma_a2 = cHyp.h * 1.0 / + (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); + } else { + sigma_a2 = cHyp.h * 1.0 / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); + } + if (sigma_a2 == 0) { + sigma_a2 = 0.025; + } + cHyp.rho = CalcPveLM(UtXgamma, Uty, sigma_a2) / cHyp.h; + gsl_matrix_free(UtXgamma); + + if (cHyp.rho > 1.0) { + cHyp.rho = 1.0; + } + + if (cHyp.h < h_min) { + cHyp.h = h_min; + } + if (cHyp.h > h_max) { + cHyp.h = h_max; + } + if (cHyp.rho < rho_min) { + cHyp.rho = rho_min; + } + if (cHyp.rho > rho_max) { + cHyp.rho = rho_max; + } + if (cHyp.logp < logp_min) { + cHyp.logp = logp_min; + } + if (cHyp.logp > logp_max) { + cHyp.logp = logp_max; + } + + cout << "initial value of h = " << cHyp.h << endl; + cout << "initial value of rho = " << cHyp.rho << endl; + cout << "initial value of pi = " << exp(cHyp.logp) << endl; + cout << "initial value of |gamma| = " << cHyp.n_gamma << endl; + + return; } -double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, - gsl_vector *Utu, gsl_vector *alpha_prime, - class HYPBSLMM &cHyp) { - double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h)); - - gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size); - gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size); - - double logpost=0.0; - double d, ds, uy, Hi_yy=0, logdet_H=0.0; - for (size_t i=0; i<ni_test; ++i) { - d=gsl_vector_get (K_eval, i)*sigma_b2; - ds=d/(d+1.0); - d=1.0/(d+1.0); - gsl_vector_set (weight_Hi, i, d); - - logdet_H-=log(d); - uy=gsl_vector_get (Uty, i); - Hi_yy+=d*uy*uy; - - gsl_vector_set (Utu_rand, i, - gsl_ran_gaussian(gsl_r, 1)*sqrt(ds)); - } - - // Sample tau. - double tau=1.0; - if (a_mode==11) { - tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/Hi_yy); - } - - // Sample alpha. - gsl_vector_memcpy (alpha_prime, Uty); - gsl_vector_mul (alpha_prime, weight_Hi); - gsl_vector_scale (alpha_prime, sigma_b2); - - // Sample u. - gsl_vector_memcpy (Utu, alpha_prime); - gsl_vector_mul (Utu, K_eval); - if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));} - gsl_vector_add (Utu, Utu_rand); - - // For quantitative traits, calculate pve and ppe. - if (a_mode==11) { - gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=d/(double)ni_test; - cHyp.pve/=cHyp.pve+1.0/tau; - cHyp.pge=0.0; - } - - // Calculate likelihood. - logpost=-0.5*logdet_H; - if (a_mode==11) {logpost-=0.5*(double)ni_test*log(Hi_yy);} - else {logpost-=0.5*Hi_yy;} - - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ - ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); - - gsl_vector_free (Utu_rand); - gsl_vector_free (weight_Hi); - - return logpost; +double BSLMM::CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *Utu, gsl_vector *alpha_prime, + class HYPBSLMM &cHyp) { + double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h)); + + gsl_vector *Utu_rand = gsl_vector_alloc(Uty->size); + gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size); + + double logpost = 0.0; + double d, ds, uy, Hi_yy = 0, logdet_H = 0.0; + for (size_t i = 0; i < ni_test; ++i) { + d = gsl_vector_get(K_eval, i) * sigma_b2; + ds = d / (d + 1.0); + d = 1.0 / (d + 1.0); + gsl_vector_set(weight_Hi, i, d); + + logdet_H -= log(d); + uy = gsl_vector_get(Uty, i); + Hi_yy += d * uy * uy; + + gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds)); + } + + // Sample tau. + double tau = 1.0; + if (a_mode == 11) { + tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / Hi_yy); + } + + // Sample alpha. + gsl_vector_memcpy(alpha_prime, Uty); + gsl_vector_mul(alpha_prime, weight_Hi); + gsl_vector_scale(alpha_prime, sigma_b2); + + // Sample u. + gsl_vector_memcpy(Utu, alpha_prime); + gsl_vector_mul(Utu, K_eval); + if (a_mode == 11) { + gsl_vector_scale(Utu_rand, sqrt(1.0 / tau)); + } + gsl_vector_add(Utu, Utu_rand); + + // For quantitative traits, calculate pve and ppe. + if (a_mode == 11) { + gsl_blas_ddot(Utu, Utu, &d); + cHyp.pve = d / (double)ni_test; + cHyp.pve /= cHyp.pve + 1.0 / tau; + cHyp.pge = 0.0; + } + + // Calculate likelihood. + logpost = -0.5 * logdet_H; + if (a_mode == 11) { + logpost -= 0.5 * (double)ni_test * log(Hi_yy); + } else { + logpost -= 0.5 * Hi_yy; + } + + logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp + + ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp)); + + gsl_vector_free(Utu_rand); + gsl_vector_free(weight_Hi); + + return logpost; } -double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, - const gsl_vector *Uty, const gsl_vector *K_eval, - gsl_vector *UtXb, gsl_vector *Utu, - gsl_vector *alpha_prime, gsl_vector *beta, - class HYPBSLMM &cHyp) { - clock_t time_start; - - double sigma_a2=cHyp.h*cHyp.rho/ - (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); - double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h)); - - double logpost=0.0; - double d, ds, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0; - - gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, - UtXgamma->size2); - gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); - gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2); - gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2); - gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1); - gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1); - - gsl_matrix_memcpy (UtXgamma_eval, UtXgamma); - - logdet_H=0.0; P_yy=0.0; - for (size_t i=0; i<ni_test; ++i) { - gsl_vector_view UtXgamma_row= - gsl_matrix_row (UtXgamma_eval, i); - d=gsl_vector_get (K_eval, i)*sigma_b2; - ds=d/(d+1.0); - d=1.0/(d+1.0); - gsl_vector_set (weight_Hi, i, d); - - logdet_H-=log(d); - uy=gsl_vector_get (Uty, i); - P_yy+=d*uy*uy; - gsl_vector_scale (&UtXgamma_row.vector, d); - - gsl_vector_set(Utu_rand,i,gsl_ran_gaussian(gsl_r,1)*sqrt(ds)); - } - - // Calculate Omega. - gsl_matrix_set_identity (Omega); - - time_start=clock(); - lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, - UtXgamma, 1.0, Omega); - time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - - // Calculate beta_hat. - gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); - - logdet_O=CholeskySolve(Omega, XtHiy, beta_hat); - - gsl_vector_scale (beta_hat, sigma_a2); - - gsl_blas_ddot (XtHiy, beta_hat, &d); - P_yy-=d; - - // Sample tau. - double tau=1.0; - if (a_mode==11) { - tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); - } - - // Sample beta. - for (size_t i=0; i<beta->size; i++) - { - d=gsl_ran_gaussian(gsl_r, 1); - gsl_vector_set(beta, i, d); - } - gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta); - - // This computes inv(L^T(Omega)) %*% beta. - gsl_vector_scale(beta, sqrt(sigma_a2/tau)); - gsl_vector_add(beta, beta_hat); - gsl_blas_dgemv (CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb); - - // Sample alpha. - gsl_vector_memcpy (alpha_prime, Uty); - gsl_vector_sub (alpha_prime, UtXb); - gsl_vector_mul (alpha_prime, weight_Hi); - gsl_vector_scale (alpha_prime, sigma_b2); - - // Sample u. - gsl_vector_memcpy (Utu, alpha_prime); - gsl_vector_mul (Utu, K_eval); - - if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));} - gsl_vector_add (Utu, Utu_rand); - - // For quantitative traits, calculate pve and pge. - if (a_mode==11) { - gsl_blas_ddot (UtXb, UtXb, &d); - cHyp.pge=d/(double)ni_test; - - gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=cHyp.pge+d/(double)ni_test; - - if (cHyp.pve==0) {cHyp.pge=0.0;} - else {cHyp.pge/=cHyp.pve;} - cHyp.pve/=cHyp.pve+1.0/tau; - } - - gsl_matrix_free (UtXgamma_eval); - gsl_matrix_free (Omega); - gsl_vector_free (XtHiy); - gsl_vector_free (beta_hat); - gsl_vector_free (Utu_rand); - gsl_vector_free (weight_Hi); - - logpost=-0.5*logdet_H-0.5*logdet_O; - if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);} - else {logpost-=0.5*P_yy;} - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ - ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); - - return logpost; +double BSLMM::CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const gsl_vector *K_eval, gsl_vector *UtXb, + gsl_vector *Utu, gsl_vector *alpha_prime, + gsl_vector *beta, class HYPBSLMM &cHyp) { + clock_t time_start; + + double sigma_a2 = cHyp.h * cHyp.rho / + (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); + double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h)); + + double logpost = 0.0; + double d, ds, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0; + + gsl_matrix *UtXgamma_eval = + gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2); + gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2); + gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2); + gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2); + gsl_vector *Utu_rand = gsl_vector_alloc(UtXgamma->size1); + gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1); + + gsl_matrix_memcpy(UtXgamma_eval, UtXgamma); + + logdet_H = 0.0; + P_yy = 0.0; + for (size_t i = 0; i < ni_test; ++i) { + gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i); + d = gsl_vector_get(K_eval, i) * sigma_b2; + ds = d / (d + 1.0); + d = 1.0 / (d + 1.0); + gsl_vector_set(weight_Hi, i, d); + + logdet_H -= log(d); + uy = gsl_vector_get(Uty, i); + P_yy += d * uy * uy; + gsl_vector_scale(&UtXgamma_row.vector, d); + + gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds)); + } + + // Calculate Omega. + gsl_matrix_set_identity(Omega); + + time_start = clock(); + lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, + Omega); + time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Calculate beta_hat. + gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); + + logdet_O = CholeskySolve(Omega, XtHiy, beta_hat); + + gsl_vector_scale(beta_hat, sigma_a2); + + gsl_blas_ddot(XtHiy, beta_hat, &d); + P_yy -= d; + + // Sample tau. + double tau = 1.0; + if (a_mode == 11) { + tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy); + } + + // Sample beta. + for (size_t i = 0; i < beta->size; i++) { + d = gsl_ran_gaussian(gsl_r, 1); + gsl_vector_set(beta, i, d); + } + gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta); + + // This computes inv(L^T(Omega)) %*% beta. + gsl_vector_scale(beta, sqrt(sigma_a2 / tau)); + gsl_vector_add(beta, beta_hat); + gsl_blas_dgemv(CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb); + + // Sample alpha. + gsl_vector_memcpy(alpha_prime, Uty); + gsl_vector_sub(alpha_prime, UtXb); + gsl_vector_mul(alpha_prime, weight_Hi); + gsl_vector_scale(alpha_prime, sigma_b2); + + // Sample u. + gsl_vector_memcpy(Utu, alpha_prime); + gsl_vector_mul(Utu, K_eval); + + if (a_mode == 11) { + gsl_vector_scale(Utu_rand, sqrt(1.0 / tau)); + } + gsl_vector_add(Utu, Utu_rand); + + // For quantitative traits, calculate pve and pge. + if (a_mode == 11) { + gsl_blas_ddot(UtXb, UtXb, &d); + cHyp.pge = d / (double)ni_test; + + gsl_blas_ddot(Utu, Utu, &d); + cHyp.pve = cHyp.pge + d / (double)ni_test; + + if (cHyp.pve == 0) { + cHyp.pge = 0.0; + } else { + cHyp.pge /= cHyp.pve; + } + cHyp.pve /= cHyp.pve + 1.0 / tau; + } + + gsl_matrix_free(UtXgamma_eval); + gsl_matrix_free(Omega); + gsl_vector_free(XtHiy); + gsl_vector_free(beta_hat); + gsl_vector_free(Utu_rand); + gsl_vector_free(weight_Hi); + + logpost = -0.5 * logdet_H - 0.5 * logdet_O; + if (a_mode == 11) { + logpost -= 0.5 * (double)ni_test * log(P_yy); + } else { + logpost -= 0.5 * P_yy; + } + logpost += + ((double)cHyp.n_gamma - 1.0) * cHyp.logp + + ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp)); + + return logpost; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, - gsl_vector *z_hat, class HYPBSLMM &cHyp) { - double d; +void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu, + gsl_vector *z_hat, class HYPBSLMM &cHyp) { + double d; - gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=d/(double)ni_test; + gsl_blas_ddot(Utu, Utu, &d); + cHyp.pve = d / (double)ni_test; - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, z_hat); + gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, z_hat); - cHyp.pve/=cHyp.pve+1.0; - cHyp.pge=0.0; + cHyp.pve /= cHyp.pve + 1.0; + cHyp.pge = 0.0; - return; + return; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, - const gsl_vector *Utu, gsl_vector *z_hat, - class HYPBSLMM &cHyp) { - double d; - gsl_vector *UtXbU=gsl_vector_alloc (Utu->size); +void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb, + const gsl_vector *Utu, gsl_vector *z_hat, + class HYPBSLMM &cHyp) { + double d; + gsl_vector *UtXbU = gsl_vector_alloc(Utu->size); - gsl_blas_ddot (UtXb, UtXb, &d); - cHyp.pge=d/(double)ni_test; + gsl_blas_ddot(UtXb, UtXb, &d); + cHyp.pge = d / (double)ni_test; - gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=cHyp.pge+d/(double)ni_test; + gsl_blas_ddot(Utu, Utu, &d); + cHyp.pve = cHyp.pge + d / (double)ni_test; - gsl_vector_memcpy (UtXbU, Utu); - gsl_vector_add (UtXbU, UtXb); - gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat); + gsl_vector_memcpy(UtXbU, Utu); + gsl_vector_add(UtXbU, UtXb); + gsl_blas_dgemv(CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat); - if (cHyp.pve==0) {cHyp.pge=0.0;} - else {cHyp.pge/=cHyp.pve;} + if (cHyp.pve == 0) { + cHyp.pge = 0.0; + } else { + cHyp.pge /= cHyp.pve; + } - cHyp.pve/=cHyp.pve+1.0; + cHyp.pve /= cHyp.pve + 1.0; - gsl_vector_free(UtXbU); - return; + gsl_vector_free(UtXbU); + return; } -void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, - gsl_vector *z) { - double d1, d2, z_rand=0.0; - for (size_t i=0; i<z->size; ++i) { - d1=gsl_vector_get (y, i); - d2=gsl_vector_get (z_hat, i); - - // y is centered for case control studies. - if (d1<=0.0) { - - // Control, right truncated. - do { - z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0); - } while (z_rand>0.0); - } - else { - do { - z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0); - } while (z_rand<0.0); - } - - gsl_vector_set (z, i, z_rand); - } - - return; +void BSLMM::SampleZ(const gsl_vector *y, const gsl_vector *z_hat, + gsl_vector *z) { + double d1, d2, z_rand = 0.0; + for (size_t i = 0; i < z->size; ++i) { + d1 = gsl_vector_get(y, i); + d2 = gsl_vector_get(z_hat, i); + + // y is centered for case control studies. + if (d1 <= 0.0) { + + // Control, right truncated. + do { + z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0); + } while (z_rand > 0.0); + } else { + do { + z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0); + } while (z_rand < 0.0); + } + + gsl_vector_set(z, i, z_rand); + } + + return; } -double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, const size_t &repeat) { +double BSLMM::ProposeHnRho(const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { - double h=cHyp_old.h, rho=cHyp_old.rho; + double h = cHyp_old.h, rho = cHyp_old.rho; - double d_h=(h_max-h_min)*h_scale, d_rho=(rho_max-rho_min)*rho_scale; + double d_h = (h_max - h_min) * h_scale, + d_rho = (rho_max - rho_min) * rho_scale; - for (size_t i=0; i<repeat; ++i) { - h=h+(gsl_rng_uniform(gsl_r)-0.5)*d_h; - if (h<h_min) {h=2*h_min-h;} - if (h>h_max) {h=2*h_max-h;} + for (size_t i = 0; i < repeat; ++i) { + h = h + (gsl_rng_uniform(gsl_r) - 0.5) * d_h; + if (h < h_min) { + h = 2 * h_min - h; + } + if (h > h_max) { + h = 2 * h_max - h; + } - rho=rho+(gsl_rng_uniform(gsl_r)-0.5)*d_rho; - if (rho<rho_min) {rho=2*rho_min-rho;} - if (rho>rho_max) {rho=2*rho_max-rho;} - } - cHyp_new.h=h; - cHyp_new.rho=rho; - return 0.0; + rho = rho + (gsl_rng_uniform(gsl_r) - 0.5) * d_rho; + if (rho < rho_min) { + rho = 2 * rho_min - rho; + } + if (rho > rho_max) { + rho = 2 * rho_max - rho; + } + } + cHyp_new.h = h; + cHyp_new.rho = rho; + return 0.0; } -double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, const size_t &repeat) { - double logp_old=cHyp_old.logp, logp_new=cHyp_old.logp; - double log_ratio=0.0; +double BSLMM::ProposePi(const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { + double logp_old = cHyp_old.logp, logp_new = cHyp_old.logp; + double log_ratio = 0.0; - double d_logp=min(0.1, (logp_max-logp_min)*logp_scale); + double d_logp = min(0.1, (logp_max - logp_min) * logp_scale); - for (size_t i=0; i<repeat; ++i) { - logp_new=logp_old+(gsl_rng_uniform(gsl_r)-0.5)*d_logp; - if (logp_new<logp_min) {logp_new=2*logp_min-logp_new;} - if (logp_new>logp_max) {logp_new=2*logp_max-logp_new;} - log_ratio+=logp_new-logp_old; - logp_old=logp_new; - } - cHyp_new.logp=logp_new; + for (size_t i = 0; i < repeat; ++i) { + logp_new = logp_old + (gsl_rng_uniform(gsl_r) - 0.5) * d_logp; + if (logp_new < logp_min) { + logp_new = 2 * logp_min - logp_new; + } + if (logp_new > logp_max) { + logp_new = 2 * logp_max - logp_new; + } + log_ratio += logp_new - logp_old; + logp_old = logp_new; + } + cHyp_new.logp = logp_new; - return log_ratio; + return log_ratio; } -bool comp_vec (size_t a, size_t b) { - return (a < b); -} +bool comp_vec(size_t a, size_t b) { return (a < b); } + +double BSLMM::ProposeGamma(const vector<size_t> &rank_old, + vector<size_t> &rank_new, const double *p_gamma, + const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { + map<size_t, int> mapRank2in; + size_t r; + double unif, logp = 0.0; + int flag_gamma; + size_t r_add, r_remove, col_id; + + rank_new.clear(); + if (cHyp_old.n_gamma != rank_old.size()) { + cout << "size wrong" << endl; + } + + if (cHyp_old.n_gamma != 0) { + for (size_t i = 0; i < rank_old.size(); ++i) { + r = rank_old[i]; + rank_new.push_back(r); + mapRank2in[r] = 1; + } + } + cHyp_new.n_gamma = cHyp_old.n_gamma; + + for (size_t i = 0; i < repeat; ++i) { + unif = gsl_rng_uniform(gsl_r); + + if (unif < 0.40 && cHyp_new.n_gamma < s_max) { + flag_gamma = 1; + } else if (unif >= 0.40 && unif < 0.80 && cHyp_new.n_gamma > s_min) { + flag_gamma = 2; + } else if (unif >= 0.80 && cHyp_new.n_gamma > 0 && + cHyp_new.n_gamma < ns_test) { + flag_gamma = 3; + } else { + flag_gamma = 4; + } + + if (flag_gamma == 1) { + + // Add a SNP. + do { + r_add = gsl_ran_discrete(gsl_r, gsl_t); + } while (mapRank2in.count(r_add) != 0); + + double prob_total = 1.0; + for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { + r = rank_new[i]; + prob_total -= p_gamma[r]; + } + + mapRank2in[r_add] = 1; + rank_new.push_back(r_add); + cHyp_new.n_gamma++; + logp += -log(p_gamma[r_add] / prob_total) - log((double)cHyp_new.n_gamma); + } else if (flag_gamma == 2) { + + // Delete a SNP. + col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); + r_remove = rank_new[col_id]; + + double prob_total = 1.0; + for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { + r = rank_new[i]; + prob_total -= p_gamma[r]; + } + prob_total += p_gamma[r_remove]; + + mapRank2in.erase(r_remove); + rank_new.erase(rank_new.begin() + col_id); + logp += + log(p_gamma[r_remove] / prob_total) + log((double)cHyp_new.n_gamma); + cHyp_new.n_gamma--; + } else if (flag_gamma == 3) { + + // Switch a SNP. + col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); + r_remove = rank_new[col_id]; + + // Be careful with the proposal. + do { + r_add = gsl_ran_discrete(gsl_r, gsl_t); + } while (mapRank2in.count(r_add) != 0); + + double prob_total = 1.0; + for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { + r = rank_new[i]; + prob_total -= p_gamma[r]; + } + + logp += log(p_gamma[r_remove] / + (prob_total + p_gamma[r_remove] - p_gamma[r_add])); + logp -= log(p_gamma[r_add] / prob_total); + + mapRank2in.erase(r_remove); + mapRank2in[r_add] = 1; + rank_new.erase(rank_new.begin() + col_id); + rank_new.push_back(r_add); + } else { + logp += 0; + } // Do not change. + } + + stable_sort(rank_new.begin(), rank_new.end(), comp_vec); -double BSLMM::ProposeGamma (const vector<size_t> &rank_old, - vector<size_t> &rank_new, - const double *p_gamma, - const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, - const size_t &repeat) { - map<size_t, int> mapRank2in; - size_t r; - double unif, logp=0.0; - int flag_gamma; - size_t r_add, r_remove, col_id; - - rank_new.clear(); - if (cHyp_old.n_gamma!=rank_old.size()) {cout<<"size wrong"<<endl;} - - if (cHyp_old.n_gamma!=0) { - for (size_t i=0; i<rank_old.size(); ++i) { - r=rank_old[i]; - rank_new.push_back(r); - mapRank2in[r]=1; - } - } - cHyp_new.n_gamma=cHyp_old.n_gamma; - - for (size_t i=0; i<repeat; ++i) { - unif=gsl_rng_uniform(gsl_r); - - if (unif < 0.40 && cHyp_new.n_gamma<s_max) {flag_gamma=1;} - else if (unif>=0.40 && unif < 0.80 && - cHyp_new.n_gamma>s_min) { - flag_gamma=2; - } - else if (unif>=0.80 && cHyp_new.n_gamma>0 && - cHyp_new.n_gamma<ns_test) { - flag_gamma=3; - } - else {flag_gamma=4;} - - if(flag_gamma==1) { - - // Add a SNP. - do { - r_add=gsl_ran_discrete (gsl_r, gsl_t); - } while (mapRank2in.count(r_add)!=0); - - double prob_total=1.0; - for (size_t i=0; i<cHyp_new.n_gamma; ++i) { - r=rank_new[i]; - prob_total-=p_gamma[r]; - } - - mapRank2in[r_add]=1; - rank_new.push_back(r_add); - cHyp_new.n_gamma++; - logp+=-log(p_gamma[r_add]/prob_total)- - log((double)cHyp_new.n_gamma); - } - else if (flag_gamma==2) { - - // Delete a SNP. - col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); - r_remove=rank_new[col_id]; - - double prob_total=1.0; - for (size_t i=0; i<cHyp_new.n_gamma; ++i) { - r=rank_new[i]; - prob_total-=p_gamma[r]; - } - prob_total+=p_gamma[r_remove]; - - mapRank2in.erase(r_remove); - rank_new.erase(rank_new.begin()+col_id); - logp+=log(p_gamma[r_remove]/prob_total)+ - log((double)cHyp_new.n_gamma); - cHyp_new.n_gamma--; - } - else if (flag_gamma==3) { - - // Switch a SNP. - col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); - r_remove=rank_new[col_id]; - - // Be careful with the proposal. - do { - r_add=gsl_ran_discrete (gsl_r, gsl_t); - } while (mapRank2in.count(r_add)!=0); - - double prob_total=1.0; - for (size_t i=0; i<cHyp_new.n_gamma; ++i) { - r=rank_new[i]; - prob_total-=p_gamma[r]; - } - - logp+=log(p_gamma[r_remove]/ - (prob_total+p_gamma[r_remove]-p_gamma[r_add])); - logp-=log(p_gamma[r_add]/prob_total); - - mapRank2in.erase(r_remove); - mapRank2in[r_add]=1; - rank_new.erase(rank_new.begin()+col_id); - rank_new.push_back(r_add); - } - else {logp+=0;} // Do not change. - } - - stable_sort (rank_new.begin(), rank_new.end(), comp_vec); - - mapRank2in.clear(); - return logp; + mapRank2in.clear(); + return logp; } -bool comp_lr (pair<size_t, double> a, pair<size_t, double> b) { - return (a.second > b.second); +bool comp_lr(pair<size_t, double> a, pair<size_t, double> b) { + return (a.second > b.second); } // If a_mode==13 then Uty==y. -void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - const gsl_vector *y) { - clock_t time_start; - - class HYPBSLMM cHyp_old, cHyp_new; - - gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6); - gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max); - - gsl_vector *alpha_prime=gsl_vector_alloc (ni_test); - gsl_vector *alpha_new=gsl_vector_alloc (ni_test); - gsl_vector *alpha_old=gsl_vector_alloc (ni_test); - gsl_vector *Utu=gsl_vector_alloc (ni_test); - gsl_vector *Utu_new=gsl_vector_alloc (ni_test); - gsl_vector *Utu_old=gsl_vector_alloc (ni_test); - - gsl_vector *UtXb_new=gsl_vector_alloc (ni_test); - gsl_vector *UtXb_old=gsl_vector_alloc (ni_test); - - gsl_vector *z_hat=gsl_vector_alloc (ni_test); - gsl_vector *z=gsl_vector_alloc (ni_test); - gsl_vector *Utz=gsl_vector_alloc (ni_test); - - gsl_vector_memcpy (Utz, Uty); - - double logPost_new, logPost_old; - double logMHratio; - double mean_z=0.0; - - gsl_matrix_set_zero (Result_gamma); - gsl_vector_set_zero (Utu); - gsl_vector_set_zero (alpha_prime); - if (a_mode==13) { - pheno_mean=0.0; - } - - vector<pair<double, double> > beta_g; - for (size_t i=0; i<ns_test; i++) { - beta_g.push_back(make_pair(0.0, 0.0)); - } - - vector<size_t> rank_new, rank_old; - vector<double> beta_new, beta_old; - - vector<pair<size_t, double> > pos_loglr; - - time_start=clock(); - MatrixCalcLR (U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr); - time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - stable_sort (pos_loglr.begin(), pos_loglr.end(), comp_lr); - for (size_t i=0; i<ns_test; ++i) { - mapRank2pos[i]=pos_loglr[i].first; - } - - // Calculate proposal distribution for gamma (unnormalized), - // and set up gsl_r and gsl_t. - gsl_rng_env_setup(); - const gsl_rng_type * gslType; - gslType = gsl_rng_default; - if (randseed<0) - { - time_t rawtime; - time (&rawtime); - tm * ptm = gmtime (&rawtime); - - randseed = (unsigned) (ptm->tm_hour%24*3600+ - ptm->tm_min*60+ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - - double *p_gamma = new double[ns_test]; - CalcPgamma (p_gamma); - - gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma); - - // Initial parameters. - InitialMCMC (UtX, Utz, rank_old, cHyp_old, pos_loglr); - - cHyp_initial=cHyp_old; - - if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, - cHyp_old); - - beta_old.clear(); - for (size_t i=0; i<cHyp_old.n_gamma; ++i) { - beta_old.push_back(0); - } - } - else { - gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, - cHyp_old.n_gamma); - gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma); - SetXgamma (UtXgamma, UtX, rank_old); - logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, - Utu_old, alpha_old, beta, cHyp_old); - - beta_old.clear(); - for (size_t i=0; i<beta->size; ++i) { - beta_old.push_back(gsl_vector_get(beta, i)); - } - gsl_matrix_free (UtXgamma); - gsl_vector_free (beta); - } - - // Calculate centered z_hat, and pve. - if (a_mode==13) { - time_start=clock(); - if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - CalcCC_PVEnZ (U, Utu_old, z_hat, cHyp_old); - } - else { - CalcCC_PVEnZ (U, UtXb_old, Utu_old, z_hat, cHyp_old); - } - time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - } - - // Start MCMC. - int accept; - size_t total_step=w_step+s_step; - size_t w=0, w_col, pos; - size_t repeat=0; - - for (size_t t=0; t<total_step; ++t) { - if (t%d_pace==0 || t==total_step-1) { - ProgressBar ("Running MCMC ", t, total_step-1, - (double)n_accept/(double)(t*n_mh+1)); - } - - if (a_mode==13) { - SampleZ (y, z_hat, z); - mean_z=CenterVector (z); - - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz); - time_UtZ+=(clock()-time_start)/ - (double(CLOCKS_PER_SEC)*60.0); - - // First proposal. - if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - logPost_old= - CalcPosterior(Utz, K_eval, Utu_old, - alpha_old, cHyp_old); - beta_old.clear(); - for (size_t i=0; i<cHyp_old.n_gamma; ++i) { - beta_old.push_back(0); - } - } - else { - gsl_matrix *UtXgamma= - gsl_matrix_alloc (ni_test, cHyp_old.n_gamma); - gsl_vector *beta= - gsl_vector_alloc (cHyp_old.n_gamma); - SetXgamma (UtXgamma, UtX, rank_old); - logPost_old= - CalcPosterior(UtXgamma, Utz, K_eval, - UtXb_old, Utu_old, alpha_old, - beta, cHyp_old); - - beta_old.clear(); - for (size_t i=0; i<beta->size; ++i) { - beta_old.push_back(gsl_vector_get(beta, i)); - } - gsl_matrix_free (UtXgamma); - gsl_vector_free (beta); - } - } - - // M-H steps. - for (size_t i=0; i<n_mh; ++i) { - if (gsl_rng_uniform(gsl_r)<0.33) { - repeat = 1+gsl_rng_uniform_int(gsl_r, 20); - } - else { - repeat=1; - } - - logMHratio=0.0; - logMHratio+=ProposeHnRho(cHyp_old, cHyp_new, repeat); - logMHratio+=ProposeGamma (rank_old, rank_new, p_gamma, - cHyp_old, cHyp_new, repeat); - logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat); - - if (cHyp_new.n_gamma==0 || cHyp_new.rho==0) { - logPost_new=CalcPosterior(Utz, K_eval, Utu_new, - alpha_new, cHyp_new); - beta_new.clear(); - for (size_t i=0; i<cHyp_new.n_gamma; ++i) { - beta_new.push_back(0); - } - } - else { - gsl_matrix *UtXgamma= - gsl_matrix_alloc (ni_test, cHyp_new.n_gamma); - gsl_vector *beta= - gsl_vector_alloc (cHyp_new.n_gamma); - SetXgamma (UtXgamma, UtX, rank_new); - logPost_new= - CalcPosterior(UtXgamma, Utz, K_eval, - UtXb_new, Utu_new, alpha_new, - beta, cHyp_new); - beta_new.clear(); - for (size_t i=0; i<beta->size; ++i) { - beta_new.push_back(gsl_vector_get(beta, i)); - } - gsl_matrix_free (UtXgamma); - gsl_vector_free (beta); - } - - logMHratio+=logPost_new-logPost_old; - - if (logMHratio>0 || - log(gsl_rng_uniform(gsl_r))<logMHratio) { - accept=1; n_accept++; - } - else {accept=0;} - - if (accept==1) { - logPost_old=logPost_new; - rank_old.clear(); beta_old.clear(); - if (rank_new.size()!=0) { - for (size_t i=0; i<rank_new.size(); ++i) { - rank_old.push_back(rank_new[i]); - beta_old.push_back(beta_new[i]); - } - } - cHyp_old=cHyp_new; - gsl_vector_memcpy (alpha_old, alpha_new); - gsl_vector_memcpy (UtXb_old, UtXb_new); - gsl_vector_memcpy (Utu_old, Utu_new); - } - else {cHyp_new=cHyp_old;} - } - - // Calculate z_hat, and pve. - if (a_mode==13) { - time_start=clock(); - if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - CalcCC_PVEnZ (U, Utu_old, z_hat, cHyp_old); - } - else { - CalcCC_PVEnZ (U, UtXb_old, Utu_old, - z_hat, cHyp_old); - } - - // Sample mu and update z_hat. - gsl_vector_sub (z, z_hat); - mean_z+=CenterVector(z); - mean_z+= - gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test)); - gsl_vector_add_constant (z_hat, mean_z); - - time_UtZ+=(clock()-time_start)/ - (double(CLOCKS_PER_SEC)*60.0); - } - - // Save data. - if (t<w_step) {continue;} - else { - if (t%r_pace==0) { - w_col=w%w_pace; - if (w_col==0) { - if (w==0) { - WriteResult (0, Result_hyp, - Result_gamma, w_col); - } - else { - WriteResult (1, Result_hyp, - Result_gamma, w_col); - gsl_matrix_set_zero (Result_hyp); - gsl_matrix_set_zero (Result_gamma); - } - } - - gsl_matrix_set(Result_hyp,w_col,0,cHyp_old.h); - gsl_matrix_set(Result_hyp,w_col,1,cHyp_old.pve); - gsl_matrix_set(Result_hyp,w_col,2,cHyp_old.rho); - gsl_matrix_set(Result_hyp,w_col,3,cHyp_old.pge); - gsl_matrix_set(Result_hyp,w_col,4,cHyp_old.logp); - gsl_matrix_set(Result_hyp,w_col,5,cHyp_old.n_gamma); - - for (size_t i=0; i<cHyp_old.n_gamma; ++i) { - pos=mapRank2pos[rank_old[i]]+1; - - gsl_matrix_set(Result_gamma,w_col,i, - pos); - - beta_g[pos-1].first+=beta_old[i]; - beta_g[pos-1].second+=1.0; - } - - gsl_vector_add (alpha_prime, alpha_old); - gsl_vector_add (Utu, Utu_old); - - if (a_mode==13) { - pheno_mean+=mean_z; - } - - w++; - - } - - } - } - cout<<endl; - - w_col=w%w_pace; - WriteResult (1, Result_hyp, Result_gamma, w_col); - - gsl_matrix_free(Result_hyp); - gsl_matrix_free(Result_gamma); - - gsl_vector_free(z_hat); - gsl_vector_free(z); - gsl_vector_free(Utz); - gsl_vector_free(UtXb_new); - gsl_vector_free(UtXb_old); - gsl_vector_free(alpha_new); - gsl_vector_free(alpha_old); - gsl_vector_free(Utu_new); - gsl_vector_free(Utu_old); - - gsl_vector_scale (alpha_prime, 1.0/(double)w); - gsl_vector_scale (Utu, 1.0/(double)w); - if (a_mode==13) { - pheno_mean/=(double)w; - } - - gsl_vector *alpha=gsl_vector_alloc (ns_test); - gsl_blas_dgemv (CblasTrans, 1.0/(double)ns_test, UtX, - alpha_prime, 0.0, alpha); - WriteParam (beta_g, alpha, w); - gsl_vector_free(alpha); - - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime); - WriteBV(alpha_prime); - - gsl_vector_free(alpha_prime); - gsl_vector_free(Utu); - - delete [] p_gamma; - beta_g.clear(); - - return; +void BSLMM::MCMC(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const gsl_vector *y) { + clock_t time_start; + + class HYPBSLMM cHyp_old, cHyp_new; + + gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6); + gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max); + + gsl_vector *alpha_prime = gsl_vector_alloc(ni_test); + gsl_vector *alpha_new = gsl_vector_alloc(ni_test); + gsl_vector *alpha_old = gsl_vector_alloc(ni_test); + gsl_vector *Utu = gsl_vector_alloc(ni_test); + gsl_vector *Utu_new = gsl_vector_alloc(ni_test); + gsl_vector *Utu_old = gsl_vector_alloc(ni_test); + + gsl_vector *UtXb_new = gsl_vector_alloc(ni_test); + gsl_vector *UtXb_old = gsl_vector_alloc(ni_test); + + gsl_vector *z_hat = gsl_vector_alloc(ni_test); + gsl_vector *z = gsl_vector_alloc(ni_test); + gsl_vector *Utz = gsl_vector_alloc(ni_test); + + gsl_vector_memcpy(Utz, Uty); + + double logPost_new, logPost_old; + double logMHratio; + double mean_z = 0.0; + + gsl_matrix_set_zero(Result_gamma); + gsl_vector_set_zero(Utu); + gsl_vector_set_zero(alpha_prime); + if (a_mode == 13) { + pheno_mean = 0.0; + } + + vector<pair<double, double>> beta_g; + for (size_t i = 0; i < ns_test; i++) { + beta_g.push_back(make_pair(0.0, 0.0)); + } + + vector<size_t> rank_new, rank_old; + vector<double> beta_new, beta_old; + + vector<pair<size_t, double>> pos_loglr; + + time_start = clock(); + MatrixCalcLR(U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr); + time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr); + for (size_t i = 0; i < ns_test; ++i) { + mapRank2pos[i] = pos_loglr[i].first; + } + + // Calculate proposal distribution for gamma (unnormalized), + // and set up gsl_r and gsl_t. + gsl_rng_env_setup(); + const gsl_rng_type *gslType; + gslType = gsl_rng_default; + if (randseed < 0) { + time_t rawtime; + time(&rawtime); + tm *ptm = gmtime(&rawtime); + + randseed = + (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); + } + gsl_r = gsl_rng_alloc(gslType); + gsl_rng_set(gsl_r, randseed); + + double *p_gamma = new double[ns_test]; + CalcPgamma(p_gamma); + + gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma); + + // Initial parameters. + InitialMCMC(UtX, Utz, rank_old, cHyp_old, pos_loglr); + + cHyp_initial = cHyp_old; + + if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { + logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); + + beta_old.clear(); + for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { + beta_old.push_back(0); + } + } else { + gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma); + gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma); + SetXgamma(UtXgamma, UtX, rank_old); + logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, + alpha_old, beta, cHyp_old); + + beta_old.clear(); + for (size_t i = 0; i < beta->size; ++i) { + beta_old.push_back(gsl_vector_get(beta, i)); + } + gsl_matrix_free(UtXgamma); + gsl_vector_free(beta); + } + + // Calculate centered z_hat, and pve. + if (a_mode == 13) { + time_start = clock(); + if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { + CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old); + } else { + CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old); + } + time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + + // Start MCMC. + int accept; + size_t total_step = w_step + s_step; + size_t w = 0, w_col, pos; + size_t repeat = 0; + + for (size_t t = 0; t < total_step; ++t) { + if (t % d_pace == 0 || t == total_step - 1) { + ProgressBar("Running MCMC ", t, total_step - 1, + (double)n_accept / (double)(t * n_mh + 1)); + } + + if (a_mode == 13) { + SampleZ(y, z_hat, z); + mean_z = CenterVector(z); + + time_start = clock(); + gsl_blas_dgemv(CblasTrans, 1.0, U, z, 0.0, Utz); + time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // First proposal. + if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { + logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); + beta_old.clear(); + for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { + beta_old.push_back(0); + } + } else { + gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma); + gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma); + SetXgamma(UtXgamma, UtX, rank_old); + logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, + alpha_old, beta, cHyp_old); + + beta_old.clear(); + for (size_t i = 0; i < beta->size; ++i) { + beta_old.push_back(gsl_vector_get(beta, i)); + } + gsl_matrix_free(UtXgamma); + gsl_vector_free(beta); + } + } + + // M-H steps. + for (size_t i = 0; i < n_mh; ++i) { + if (gsl_rng_uniform(gsl_r) < 0.33) { + repeat = 1 + gsl_rng_uniform_int(gsl_r, 20); + } else { + repeat = 1; + } + + logMHratio = 0.0; + logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat); + logMHratio += + ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); + logMHratio += ProposePi(cHyp_old, cHyp_new, repeat); + + if (cHyp_new.n_gamma == 0 || cHyp_new.rho == 0) { + logPost_new = CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new); + beta_new.clear(); + for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { + beta_new.push_back(0); + } + } else { + gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_new.n_gamma); + gsl_vector *beta = gsl_vector_alloc(cHyp_new.n_gamma); + SetXgamma(UtXgamma, UtX, rank_new); + logPost_new = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_new, Utu_new, + alpha_new, beta, cHyp_new); + beta_new.clear(); + for (size_t i = 0; i < beta->size; ++i) { + beta_new.push_back(gsl_vector_get(beta, i)); + } + gsl_matrix_free(UtXgamma); + gsl_vector_free(beta); + } + + logMHratio += logPost_new - logPost_old; + + if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) { + accept = 1; + n_accept++; + } else { + accept = 0; + } + + if (accept == 1) { + logPost_old = logPost_new; + rank_old.clear(); + beta_old.clear(); + if (rank_new.size() != 0) { + for (size_t i = 0; i < rank_new.size(); ++i) { + rank_old.push_back(rank_new[i]); + beta_old.push_back(beta_new[i]); + } + } + cHyp_old = cHyp_new; + gsl_vector_memcpy(alpha_old, alpha_new); + gsl_vector_memcpy(UtXb_old, UtXb_new); + gsl_vector_memcpy(Utu_old, Utu_new); + } else { + cHyp_new = cHyp_old; + } + } + + // Calculate z_hat, and pve. + if (a_mode == 13) { + time_start = clock(); + if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { + CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old); + } else { + CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old); + } + + // Sample mu and update z_hat. + gsl_vector_sub(z, z_hat); + mean_z += CenterVector(z); + mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test)); + gsl_vector_add_constant(z_hat, mean_z); + + time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + + // Save data. + if (t < w_step) { + continue; + } else { + if (t % r_pace == 0) { + w_col = w % w_pace; + if (w_col == 0) { + if (w == 0) { + WriteResult(0, Result_hyp, Result_gamma, w_col); + } else { + WriteResult(1, Result_hyp, Result_gamma, w_col); + gsl_matrix_set_zero(Result_hyp); + gsl_matrix_set_zero(Result_gamma); + } + } + + gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h); + gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve); + gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho); + gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge); + gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp); + gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma); + + for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { + pos = mapRank2pos[rank_old[i]] + 1; + + gsl_matrix_set(Result_gamma, w_col, i, pos); + + beta_g[pos - 1].first += beta_old[i]; + beta_g[pos - 1].second += 1.0; + } + + gsl_vector_add(alpha_prime, alpha_old); + gsl_vector_add(Utu, Utu_old); + + if (a_mode == 13) { + pheno_mean += mean_z; + } + + w++; + } + } + } + cout << endl; + + w_col = w % w_pace; + WriteResult(1, Result_hyp, Result_gamma, w_col); + + gsl_matrix_free(Result_hyp); + gsl_matrix_free(Result_gamma); + + gsl_vector_free(z_hat); + gsl_vector_free(z); + gsl_vector_free(Utz); + gsl_vector_free(UtXb_new); + gsl_vector_free(UtXb_old); + gsl_vector_free(alpha_new); + gsl_vector_free(alpha_old); + gsl_vector_free(Utu_new); + gsl_vector_free(Utu_old); + + gsl_vector_scale(alpha_prime, 1.0 / (double)w); + gsl_vector_scale(Utu, 1.0 / (double)w); + if (a_mode == 13) { + pheno_mean /= (double)w; + } + + gsl_vector *alpha = gsl_vector_alloc(ns_test); + gsl_blas_dgemv(CblasTrans, 1.0 / (double)ns_test, UtX, alpha_prime, 0.0, + alpha); + WriteParam(beta_g, alpha, w); + gsl_vector_free(alpha); + + gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime); + WriteBV(alpha_prime); + + gsl_vector_free(alpha_prime); + gsl_vector_free(Utu); + + delete[] p_gamma; + beta_g.clear(); + + return; } void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *eval, - const double lambda) { - gsl_vector *beta=gsl_vector_alloc (UtX->size2); - gsl_vector *H_eval=gsl_vector_alloc (Uty->size); - gsl_vector *bv=gsl_vector_alloc (Uty->size); + const gsl_vector *Uty, const gsl_vector *eval, + const double lambda) { + gsl_vector *beta = gsl_vector_alloc(UtX->size2); + gsl_vector *H_eval = gsl_vector_alloc(Uty->size); + gsl_vector *bv = gsl_vector_alloc(Uty->size); - gsl_vector_memcpy (H_eval, eval); - gsl_vector_scale (H_eval, lambda); - gsl_vector_add_constant (H_eval, 1.0); + gsl_vector_memcpy(H_eval, eval); + gsl_vector_scale(H_eval, lambda); + gsl_vector_add_constant(H_eval, 1.0); - gsl_vector_memcpy (bv, Uty); - gsl_vector_div (bv, H_eval); + gsl_vector_memcpy(bv, Uty); + gsl_vector_div(bv, H_eval); - gsl_blas_dgemv (CblasTrans, lambda/(double)UtX->size2, - UtX, bv, 0.0, beta); - gsl_vector_add_constant (H_eval, -1.0); - gsl_vector_mul (H_eval, bv); - gsl_blas_dgemv (CblasNoTrans, 1.0, U, H_eval, 0.0, bv); + gsl_blas_dgemv(CblasTrans, lambda / (double)UtX->size2, UtX, bv, 0.0, beta); + gsl_vector_add_constant(H_eval, -1.0); + gsl_vector_mul(H_eval, bv); + gsl_blas_dgemv(CblasNoTrans, 1.0, U, H_eval, 0.0, bv); - WriteParam (beta); - WriteBV(bv); + WriteParam(beta); + WriteBV(bv); - gsl_vector_free (H_eval); - gsl_vector_free (beta); - gsl_vector_free (bv); + gsl_vector_free(H_eval); + gsl_vector_free(beta); + gsl_vector_free(bv); - return; + return; } // Below fits MCMC for rho=1. -void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, - const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) { - time_t time_start=clock(); - gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, - s_size); - gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); - gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size); - - lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix, - &X_sub.matrix, 0.0, &XtX_sub.matrix); +void BSLMM::CalcXtX(const gsl_matrix *X, const gsl_vector *y, + const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) { + time_t time_start = clock(); + gsl_matrix_const_view X_sub = + gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size); + gsl_matrix_view XtX_sub = gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); + gsl_vector_view Xty_sub = gsl_vector_subvector(Xty, 0, s_size); + + lapack_dgemm((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, + &XtX_sub.matrix); gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector); - time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); return; } -void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, - const gsl_matrix *XtX_old, const gsl_vector *Xty_old, - const gsl_vector *y, const vector<size_t> &rank_old, - const vector<size_t> &rank_new, gsl_matrix *X_new, - gsl_matrix *XtX_new, gsl_vector *Xty_new) { +void BSLMM::SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old, + const gsl_matrix *XtX_old, const gsl_vector *Xty_old, + const gsl_vector *y, const vector<size_t> &rank_old, + const vector<size_t> &rank_new, gsl_matrix *X_new, + gsl_matrix *XtX_new, gsl_vector *Xty_new) { double d; // rank_old and rank_new are sorted already inside PorposeGamma // calculate vectors rank_remove and rank_add. // make sure that v_size is larger than repeat. - size_t v_size=20; + size_t v_size = 20; vector<size_t> rank_remove(v_size), rank_add(v_size), - rank_union(s_max+v_size); + rank_union(s_max + v_size); vector<size_t>::iterator it; - it=set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(), - rank_new.end(), rank_remove.begin()); - rank_remove.resize(it-rank_remove.begin()); + it = set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(), + rank_new.end(), rank_remove.begin()); + rank_remove.resize(it - rank_remove.begin()); - it=set_difference (rank_new.begin(), rank_new.end(), rank_old.begin(), - rank_old.end(), rank_add.begin()); - rank_add.resize(it-rank_add.begin()); + it = set_difference(rank_new.begin(), rank_new.end(), rank_old.begin(), + rank_old.end(), rank_add.begin()); + rank_add.resize(it - rank_add.begin()); - it=set_union (rank_new.begin(), rank_new.end(), rank_old.begin(), - rank_old.end(), rank_union.begin()); - rank_union.resize(it-rank_union.begin()); + it = set_union(rank_new.begin(), rank_new.end(), rank_old.begin(), + rank_old.end(), rank_union.begin()); + rank_union.resize(it - rank_union.begin()); // Map rank_remove and rank_add. map<size_t, int> mapRank2in_remove, mapRank2in_add; - for (size_t i=0; i<rank_remove.size(); i++) { - mapRank2in_remove[rank_remove[i]]=1; + for (size_t i = 0; i < rank_remove.size(); i++) { + mapRank2in_remove[rank_remove[i]] = 1; } - for (size_t i=0; i<rank_add.size(); i++) { - mapRank2in_add[rank_add[i]]=1; + for (size_t i = 0; i < rank_add.size(); i++) { + mapRank2in_add[rank_add[i]] = 1; } // Obtain the subset of matrix/vector. - gsl_matrix_const_view Xold_sub= - gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size()); - gsl_matrix_const_view XtXold_sub= - gsl_matrix_const_submatrix(XtX_old, 0, 0, rank_old.size(), - rank_old.size()); - gsl_vector_const_view Xtyold_sub= - gsl_vector_const_subvector(Xty_old, 0, rank_old.size()); - - gsl_matrix_view Xnew_sub= - gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size()); - gsl_matrix_view XtXnew_sub= - gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size()); - gsl_vector_view Xtynew_sub= - gsl_vector_subvector(Xty_new, 0, rank_new.size()); + gsl_matrix_const_view Xold_sub = + gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size()); + gsl_matrix_const_view XtXold_sub = gsl_matrix_const_submatrix( + XtX_old, 0, 0, rank_old.size(), rank_old.size()); + gsl_vector_const_view Xtyold_sub = + gsl_vector_const_subvector(Xty_old, 0, rank_old.size()); + + gsl_matrix_view Xnew_sub = + gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size()); + gsl_matrix_view XtXnew_sub = + gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size()); + gsl_vector_view Xtynew_sub = + gsl_vector_subvector(Xty_new, 0, rank_new.size()); // Get X_new and calculate XtX_new. - if (rank_remove.size()==0 && rank_add.size()==0) { + if (rank_remove.size() == 0 && rank_add.size() == 0) { gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix); gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix); gsl_vector_memcpy(&Xtynew_sub.vector, &Xtyold_sub.vector); } else { size_t i_old, j_old, i_new, j_new, i_add, j_add, i_flag, j_flag; - if (rank_add.size()==0) { - i_old=0; i_new=0; - for (size_t i=0; i<rank_union.size(); i++) { - if (mapRank2in_remove.count(rank_old[i_old])!=0) {i_old++; continue;} + if (rank_add.size() == 0) { + i_old = 0; + i_new = 0; + for (size_t i = 0; i < rank_union.size(); i++) { + if (mapRank2in_remove.count(rank_old[i_old]) != 0) { + i_old++; + continue; + } - gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new); - gsl_vector_const_view Xcopy_col=gsl_matrix_const_column(X_old, i_old); - gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); + gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new); + gsl_vector_const_view Xcopy_col = gsl_matrix_const_column(X_old, i_old); + gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector); - d=gsl_vector_get (Xty_old, i_old); - gsl_vector_set (Xty_new, i_new, d); + d = gsl_vector_get(Xty_old, i_old); + gsl_vector_set(Xty_new, i_new, d); - j_old=i_old; j_new=i_new; - for (size_t j=i; j<rank_union.size(); j++) { - if (mapRank2in_remove.count(rank_old[j_old])!=0) {j_old++; continue;} + j_old = i_old; + j_new = i_new; + for (size_t j = i; j < rank_union.size(); j++) { + if (mapRank2in_remove.count(rank_old[j_old]) != 0) { + j_old++; + continue; + } - d=gsl_matrix_get(XtX_old, i_old, j_old); + d = gsl_matrix_get(XtX_old, i_old, j_old); - gsl_matrix_set (XtX_new, i_new, j_new, d); - if (i_new!=j_new) {gsl_matrix_set (XtX_new, j_new, i_new, d);} + gsl_matrix_set(XtX_new, i_new, j_new, d); + if (i_new != j_new) { + gsl_matrix_set(XtX_new, j_new, i_new, d); + } - j_old++; j_new++; + j_old++; + j_new++; } - i_old++; i_new++; + i_old++; + i_new++; } } else { - gsl_matrix *X_add=gsl_matrix_alloc(X_old->size1, rank_add.size() ); - gsl_matrix *XtX_aa=gsl_matrix_alloc(X_add->size2, X_add->size2); - gsl_matrix *XtX_ao=gsl_matrix_alloc(X_add->size2, X_old->size2); - gsl_vector *Xty_add=gsl_vector_alloc(X_add->size2); + gsl_matrix *X_add = gsl_matrix_alloc(X_old->size1, rank_add.size()); + gsl_matrix *XtX_aa = gsl_matrix_alloc(X_add->size2, X_add->size2); + gsl_matrix *XtX_ao = gsl_matrix_alloc(X_add->size2, X_old->size2); + gsl_vector *Xty_add = gsl_vector_alloc(X_add->size2); // Get X_add. - SetXgamma (X_add, X, rank_add); + SetXgamma(X_add, X, rank_add); // Get t(X_add)X_add and t(X_add)X_temp. - clock_t time_start=clock(); + clock_t time_start = clock(); // Somehow the lapack_dgemm does not work here. - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add, - 0.0, XtX_aa); - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_old, - 0.0, XtX_ao); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_old, 0.0, XtX_ao); gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add); - time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Save to X_new, XtX_new and Xty_new. - i_old=0; i_new=0; i_add=0; - for (size_t i=0; i<rank_union.size(); i++) { - if (mapRank2in_remove.count(rank_old[i_old])!=0) { - i_old++; - continue; - } - if (mapRank2in_add.count(rank_new[i_new])!=0) { - i_flag=1; - } else { - i_flag=0; - } - - gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new); - if (i_flag==1) { - gsl_vector_view Xcopy_col=gsl_matrix_column(X_add, i_add); - gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); - } else { - gsl_vector_const_view Xcopy_col= - gsl_matrix_const_column(X_old, i_old); - gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); - } - - if (i_flag==1) { - d=gsl_vector_get (Xty_add, i_add); + i_old = 0; + i_new = 0; + i_add = 0; + for (size_t i = 0; i < rank_union.size(); i++) { + if (mapRank2in_remove.count(rank_old[i_old]) != 0) { + i_old++; + continue; + } + if (mapRank2in_add.count(rank_new[i_new]) != 0) { + i_flag = 1; + } else { + i_flag = 0; + } + + gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new); + if (i_flag == 1) { + gsl_vector_view Xcopy_col = gsl_matrix_column(X_add, i_add); + gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector); + } else { + gsl_vector_const_view Xcopy_col = + gsl_matrix_const_column(X_old, i_old); + gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector); + } + + if (i_flag == 1) { + d = gsl_vector_get(Xty_add, i_add); } else { - d=gsl_vector_get (Xty_old, i_old); + d = gsl_vector_get(Xty_old, i_old); + } + gsl_vector_set(Xty_new, i_new, d); + + j_old = i_old; + j_new = i_new; + j_add = i_add; + for (size_t j = i; j < rank_union.size(); j++) { + if (mapRank2in_remove.count(rank_old[j_old]) != 0) { + j_old++; + continue; + } + if (mapRank2in_add.count(rank_new[j_new]) != 0) { + j_flag = 1; + } else { + j_flag = 0; + } + + if (i_flag == 1 && j_flag == 1) { + d = gsl_matrix_get(XtX_aa, i_add, j_add); + } else if (i_flag == 1) { + d = gsl_matrix_get(XtX_ao, i_add, j_old); + } else if (j_flag == 1) { + d = gsl_matrix_get(XtX_ao, j_add, i_old); + } else { + d = gsl_matrix_get(XtX_old, i_old, j_old); + } + + gsl_matrix_set(XtX_new, i_new, j_new, d); + if (i_new != j_new) { + gsl_matrix_set(XtX_new, j_new, i_new, d); + } + + j_new++; + if (j_flag == 1) { + j_add++; + } else { + j_old++; + } } - gsl_vector_set (Xty_new, i_new, d); - - j_old=i_old; j_new=i_new; j_add=i_add; - for (size_t j=i; j<rank_union.size(); j++) { - if (mapRank2in_remove.count(rank_old[j_old])!=0) { - j_old++; - continue; - } - if (mapRank2in_add.count(rank_new[j_new])!=0) { - j_flag=1; - } else { - j_flag=0; - } - - if (i_flag==1 && j_flag==1) { - d=gsl_matrix_get(XtX_aa, i_add, j_add); - } else if (i_flag==1) { - d=gsl_matrix_get(XtX_ao, i_add, j_old); - } else if (j_flag==1) { - d=gsl_matrix_get(XtX_ao, j_add, i_old); - } else { - d=gsl_matrix_get(XtX_old, i_old, j_old); - } - - gsl_matrix_set (XtX_new, i_new, j_new, d); - if (i_new!=j_new) {gsl_matrix_set (XtX_new, j_new, i_new, d);} - - j_new++; if (j_flag==1) {j_add++;} else {j_old++;} + i_new++; + if (i_flag == 1) { + i_add++; + } else { + i_old++; } - i_new++; if (i_flag==1) {i_add++;} else {i_old++;} } gsl_matrix_free(X_add); @@ -1377,7 +1445,6 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, gsl_matrix_free(XtX_ao); gsl_vector_free(Xty_add); } - } rank_remove.clear(); @@ -1389,462 +1456,442 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, return; } -double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) { - double logpost=0.0; +double BSLMM::CalcPosterior(const double yty, class HYPBSLMM &cHyp) { + double logpost = 0.0; - // For quantitative traits, calculate pve and pge. - // Pve and pge for case/control data are calculted in CalcCC_PVEnZ. - if (a_mode==11) { - cHyp.pve=0.0; - cHyp.pge=1.0; - } + // For quantitative traits, calculate pve and pge. + // Pve and pge for case/control data are calculted in CalcCC_PVEnZ. + if (a_mode == 11) { + cHyp.pve = 0.0; + cHyp.pge = 1.0; + } - // Calculate likelihood. - if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);} - else {logpost-=0.5*yty;} + // Calculate likelihood. + if (a_mode == 11) { + logpost -= 0.5 * (double)ni_test * log(yty); + } else { + logpost -= 0.5 * yty; + } - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ - ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); + logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp + + ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp)); - return logpost; + return logpost; } -double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, - const gsl_vector *Xty, const double yty, - const size_t s_size, gsl_vector *Xb, - gsl_vector *beta, class HYPBSLMM &cHyp) { - double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); - double logpost=0.0; - double d, P_yy=yty, logdet_O=0.0; - - gsl_matrix_const_view Xgamma_sub= - gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size); - gsl_matrix_const_view XtX_sub= - gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size); - gsl_vector_const_view Xty_sub= - gsl_vector_const_subvector (Xty, 0, s_size); - - gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size); - gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size); - gsl_vector *beta_hat=gsl_vector_alloc (s_size); - gsl_vector *Xty_temp=gsl_vector_alloc (s_size); - - gsl_vector_memcpy (Xty_temp, &Xty_sub.vector); - - // Calculate Omega. - gsl_matrix_memcpy (Omega, &XtX_sub.matrix); - gsl_matrix_scale (Omega, sigma_a2); - gsl_matrix_set_identity (M_temp); - gsl_matrix_add (Omega, M_temp); - - // Calculate beta_hat. - logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat); - gsl_vector_scale (beta_hat, sigma_a2); - - gsl_blas_ddot (Xty_temp, beta_hat, &d); - P_yy-=d; - - // Sample tau. - double tau=1.0; - if (a_mode==11) { - tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); - } - - // Sample beta. - for (size_t i=0; i<s_size; i++) - { - d=gsl_ran_gaussian(gsl_r, 1); - gsl_vector_set(beta, i, d); - } - gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size); - gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, - &beta_sub.vector); - - // This computes inv(L^T(Omega)) %*% beta. - gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau)); - gsl_vector_add(&beta_sub.vector, beta_hat); - gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix, - &beta_sub.vector, 0.0, Xb); - - // For quantitative traits, calculate pve and pge. - if (a_mode==11) { - gsl_blas_ddot (Xb, Xb, &d); - cHyp.pve=d/(double)ni_test; - cHyp.pve/=cHyp.pve+1.0/tau; - cHyp.pge=1.0; - } - - logpost=-0.5*logdet_O; - if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);} - else {logpost-=0.5*P_yy;} - - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ - ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); - - gsl_matrix_free (Omega); - gsl_matrix_free (M_temp); - gsl_vector_free (beta_hat); - gsl_vector_free (Xty_temp); - - return logpost; +double BSLMM::CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX, + const gsl_vector *Xty, const double yty, + const size_t s_size, gsl_vector *Xb, + gsl_vector *beta, class HYPBSLMM &cHyp) { + double sigma_a2 = cHyp.h / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); + double logpost = 0.0; + double d, P_yy = yty, logdet_O = 0.0; + + gsl_matrix_const_view Xgamma_sub = + gsl_matrix_const_submatrix(Xgamma, 0, 0, Xgamma->size1, s_size); + gsl_matrix_const_view XtX_sub = + gsl_matrix_const_submatrix(XtX, 0, 0, s_size, s_size); + gsl_vector_const_view Xty_sub = gsl_vector_const_subvector(Xty, 0, s_size); + + gsl_matrix *Omega = gsl_matrix_alloc(s_size, s_size); + gsl_matrix *M_temp = gsl_matrix_alloc(s_size, s_size); + gsl_vector *beta_hat = gsl_vector_alloc(s_size); + gsl_vector *Xty_temp = gsl_vector_alloc(s_size); + + gsl_vector_memcpy(Xty_temp, &Xty_sub.vector); + + // Calculate Omega. + gsl_matrix_memcpy(Omega, &XtX_sub.matrix); + gsl_matrix_scale(Omega, sigma_a2); + gsl_matrix_set_identity(M_temp); + gsl_matrix_add(Omega, M_temp); + + // Calculate beta_hat. + logdet_O = CholeskySolve(Omega, Xty_temp, beta_hat); + gsl_vector_scale(beta_hat, sigma_a2); + + gsl_blas_ddot(Xty_temp, beta_hat, &d); + P_yy -= d; + + // Sample tau. + double tau = 1.0; + if (a_mode == 11) { + tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy); + } + + // Sample beta. + for (size_t i = 0; i < s_size; i++) { + d = gsl_ran_gaussian(gsl_r, 1); + gsl_vector_set(beta, i, d); + } + gsl_vector_view beta_sub = gsl_vector_subvector(beta, 0, s_size); + gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, + &beta_sub.vector); + + // This computes inv(L^T(Omega)) %*% beta. + gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2 / tau)); + gsl_vector_add(&beta_sub.vector, beta_hat); + gsl_blas_dgemv(CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0, + Xb); + + // For quantitative traits, calculate pve and pge. + if (a_mode == 11) { + gsl_blas_ddot(Xb, Xb, &d); + cHyp.pve = d / (double)ni_test; + cHyp.pve /= cHyp.pve + 1.0 / tau; + cHyp.pge = 1.0; + } + + logpost = -0.5 * logdet_O; + if (a_mode == 11) { + logpost -= 0.5 * (double)ni_test * log(P_yy); + } else { + logpost -= 0.5 * P_yy; + } + + logpost += + ((double)cHyp.n_gamma - 1.0) * cHyp.logp + + ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp)); + + gsl_matrix_free(Omega); + gsl_matrix_free(M_temp); + gsl_vector_free(beta_hat); + gsl_vector_free(Xty_temp); + + return logpost; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp) -{ +void BSLMM::CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp) { gsl_vector_set_zero(z_hat); - cHyp.pve=0.0; - cHyp.pge=1.0; + cHyp.pve = 0.0; + cHyp.pge = 1.0; return; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, - class HYPBSLMM &cHyp) { - double d; +void BSLMM::CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat, + class HYPBSLMM &cHyp) { + double d; - gsl_blas_ddot (Xb, Xb, &d); - cHyp.pve=d/(double)ni_test; - cHyp.pve/=cHyp.pve+1.0; - cHyp.pge=1.0; + gsl_blas_ddot(Xb, Xb, &d); + cHyp.pve = d / (double)ni_test; + cHyp.pve /= cHyp.pve + 1.0; + cHyp.pge = 1.0; - gsl_vector_memcpy (z_hat, Xb); + gsl_vector_memcpy(z_hat, Xb); - return; + return; } // If a_mode==13, then run probit model. -void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { - clock_t time_start; - double time_set=0, time_post=0; - - class HYPBSLMM cHyp_old, cHyp_new; - - gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6); - gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max); - - gsl_vector *Xb_new=gsl_vector_alloc (ni_test); - gsl_vector *Xb_old=gsl_vector_alloc (ni_test); - gsl_vector *z_hat=gsl_vector_alloc (ni_test); - gsl_vector *z=gsl_vector_alloc (ni_test); - - gsl_matrix *Xgamma_old=gsl_matrix_alloc (ni_test, s_max); - gsl_matrix *XtX_old=gsl_matrix_alloc (s_max, s_max); - gsl_vector *Xtz_old=gsl_vector_alloc (s_max); - gsl_vector *beta_old=gsl_vector_alloc (s_max); - - gsl_matrix *Xgamma_new=gsl_matrix_alloc (ni_test, s_max); - gsl_matrix *XtX_new=gsl_matrix_alloc (s_max, s_max); - gsl_vector *Xtz_new=gsl_vector_alloc (s_max); - gsl_vector *beta_new=gsl_vector_alloc (s_max); - - double ztz=0.0; - gsl_vector_memcpy (z, y); - - // For quantitative traits, y is centered already in - // gemma.cpp, but just in case. - double mean_z=CenterVector (z); - gsl_blas_ddot(z, z, &ztz); - - double logPost_new, logPost_old; - double logMHratio; - - gsl_matrix_set_zero (Result_gamma); - if (a_mode==13) { - pheno_mean=0.0; - } - - vector<pair<double, double> > beta_g; - for (size_t i=0; i<ns_test; i++) { - beta_g.push_back(make_pair(0.0, 0.0)); - } - - vector<size_t> rank_new, rank_old; - vector<pair<size_t, double> > pos_loglr; - - time_start=clock(); - MatrixCalcLmLR (X, z, pos_loglr); - time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - stable_sort (pos_loglr.begin(), pos_loglr.end(), comp_lr); - for (size_t i=0; i<ns_test; ++i) { - mapRank2pos[i]=pos_loglr[i].first; - } - - // Calculate proposal distribution for gamma (unnormalized), - // and set up gsl_r and gsl_t. - gsl_rng_env_setup(); - const gsl_rng_type * gslType; - gslType = gsl_rng_default; - if (randseed<0) - { - time_t rawtime; - time (&rawtime); - tm * ptm = gmtime (&rawtime); - - randseed = (unsigned) (ptm->tm_hour%24*3600+ - ptm->tm_min*60+ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - - double *p_gamma = new double[ns_test]; - CalcPgamma (p_gamma); - - gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma); - - // Initial parameters. - InitialMCMC (X, z, rank_old, cHyp_old, pos_loglr); - - cHyp_initial=cHyp_old; - - if (cHyp_old.n_gamma==0) { - logPost_old=CalcPosterior (ztz, cHyp_old); - } - else { - SetXgamma (Xgamma_old, X, rank_old); - CalcXtX (Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old); - logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, - rank_old.size(), Xb_old, beta_old, - cHyp_old); - } - - // Calculate centered z_hat, and pve. - if (a_mode==13) { - if (cHyp_old.n_gamma==0) { - CalcCC_PVEnZ (z_hat, cHyp_old); - } - else { - CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old); - } - } - - // Start MCMC. - int accept; - size_t total_step=w_step+s_step; - size_t w=0, w_col, pos; - size_t repeat=0; - - for (size_t t=0; t<total_step; ++t) { - if (t%d_pace==0 || t==total_step-1) { - ProgressBar ("Running MCMC ", t, total_step-1, - (double)n_accept/(double)(t*n_mh+1)); - } - - if (a_mode==13) { - SampleZ (y, z_hat, z); - mean_z=CenterVector (z); - gsl_blas_ddot(z,z,&ztz); - - // First proposal. - if (cHyp_old.n_gamma==0) { - logPost_old=CalcPosterior (ztz, cHyp_old); - } else { - gsl_matrix_view Xold_sub= - gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, - rank_old.size()); - gsl_vector_view Xtz_sub= - gsl_vector_subvector(Xtz_old, 0, rank_old.size()); - gsl_blas_dgemv (CblasTrans, 1.0, &Xold_sub.matrix, - z, 0.0, &Xtz_sub.vector); - logPost_old= - CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, - rank_old.size(), Xb_old, beta_old, - cHyp_old); - } - } - - // M-H steps. - for (size_t i=0; i<n_mh; ++i) { - if (gsl_rng_uniform(gsl_r)<0.33) { - repeat = 1+gsl_rng_uniform_int(gsl_r, 20); - } - else {repeat=1;} - - logMHratio=0.0; - logMHratio+= - ProposeHnRho(cHyp_old, cHyp_new, repeat); - logMHratio+= - ProposeGamma (rank_old, rank_new, p_gamma, - cHyp_old, cHyp_new, repeat); - logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat); - - if (cHyp_new.n_gamma==0) { - logPost_new=CalcPosterior (ztz, cHyp_new); - } else { - - // This makes sure that rank_old.size() == - // rank_remove.size() does not happen. - if (cHyp_new.n_gamma<=20 || cHyp_old.n_gamma<=20) { - time_start=clock(); - SetXgamma (Xgamma_new, X, rank_new); - CalcXtX (Xgamma_new, z, rank_new.size(), - XtX_new, Xtz_new); - time_set+=(clock()-time_start)/ - (double(CLOCKS_PER_SEC)*60.0); - } else { - time_start=clock(); - SetXgamma (X, Xgamma_old, XtX_old, Xtz_old, z, - rank_old, rank_new, Xgamma_new, - XtX_new, Xtz_new); - time_set+=(clock()-time_start)/ - (double(CLOCKS_PER_SEC)*60.0); - } - time_start=clock(); - logPost_new= - CalcPosterior (Xgamma_new, XtX_new, Xtz_new, ztz, - rank_new.size(), Xb_new, beta_new, - cHyp_new); - time_post+=(clock()-time_start)/ - (double(CLOCKS_PER_SEC)*60.0); - } - logMHratio+=logPost_new-logPost_old; - - if (logMHratio>0 || - log(gsl_rng_uniform(gsl_r))<logMHratio) { - accept=1; - n_accept++; - } - else {accept=0;} - - if (accept==1) { - logPost_old=logPost_new; - cHyp_old=cHyp_new; - gsl_vector_memcpy (Xb_old, Xb_new); - - rank_old.clear(); - if (rank_new.size()!=0) { - for (size_t i=0; - i<rank_new.size(); - ++i) { - rank_old.push_back(rank_new[i]); - } - - gsl_matrix_view Xold_sub=gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size()); - gsl_matrix_view XtXold_sub=gsl_matrix_submatrix(XtX_old, 0, 0, rank_new.size(), rank_new.size()); - gsl_vector_view Xtzold_sub=gsl_vector_subvector(Xtz_old, 0, rank_new.size()); - gsl_vector_view betaold_sub=gsl_vector_subvector(beta_old, 0, rank_new.size()); - - gsl_matrix_view Xnew_sub=gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size()); - gsl_matrix_view XtXnew_sub=gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size()); - gsl_vector_view Xtznew_sub=gsl_vector_subvector(Xtz_new, 0, rank_new.size()); - gsl_vector_view betanew_sub=gsl_vector_subvector(beta_new, 0, rank_new.size()); - - gsl_matrix_memcpy(&Xold_sub.matrix, - &Xnew_sub.matrix); - gsl_matrix_memcpy(&XtXold_sub.matrix, - &XtXnew_sub.matrix); - gsl_vector_memcpy(&Xtzold_sub.vector, - &Xtznew_sub.vector); - gsl_vector_memcpy(&betaold_sub.vector, - &betanew_sub.vector); - } - } else { - cHyp_new=cHyp_old; - } - - } - - // Calculate z_hat, and pve. - if (a_mode==13) { - if (cHyp_old.n_gamma==0) { - CalcCC_PVEnZ (z_hat, cHyp_old); - } - else { - CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old); - } - - // Sample mu and update z_hat. - gsl_vector_sub (z, z_hat); - mean_z+=CenterVector(z); - mean_z+=gsl_ran_gaussian(gsl_r, - sqrt(1.0/(double) ni_test)); - - gsl_vector_add_constant (z_hat, mean_z); - } - - // Save data. - if (t<w_step) {continue;} - else { - if (t%r_pace==0) { - w_col=w%w_pace; - if (w_col==0) { - if (w==0) { - WriteResult(0,Result_hyp, - Result_gamma,w_col); - } - else { - WriteResult(1,Result_hyp, - Result_gamma,w_col); - gsl_matrix_set_zero (Result_hyp); - gsl_matrix_set_zero (Result_gamma); - } - } - - gsl_matrix_set(Result_hyp,w_col,0, - cHyp_old.h); - gsl_matrix_set(Result_hyp,w_col,1, - cHyp_old.pve); - gsl_matrix_set(Result_hyp,w_col,2, - cHyp_old.rho); - gsl_matrix_set(Result_hyp,w_col,3, - cHyp_old.pge); - gsl_matrix_set(Result_hyp,w_col,4, - cHyp_old.logp); - gsl_matrix_set(Result_hyp,w_col,5, - cHyp_old.n_gamma); - - for (size_t i=0; i<cHyp_old.n_gamma; ++i) { - pos=mapRank2pos[rank_old[i]]+1; - gsl_matrix_set(Result_gamma,w_col, - i,pos); - - beta_g[pos-1].first+= - gsl_vector_get(beta_old, i); - beta_g[pos-1].second+=1.0; - } - - if (a_mode==13) { - pheno_mean+=mean_z; - } - - w++; - } - } - } - cout<<endl; - - cout<<"time on selecting Xgamma: "<<time_set<<endl; - cout<<"time on calculating posterior: "<<time_post<<endl; - - w_col=w%w_pace; - WriteResult (1, Result_hyp, Result_gamma, w_col); - - gsl_vector *alpha=gsl_vector_alloc (ns_test); - gsl_vector_set_zero (alpha); - WriteParam (beta_g, alpha, w); - gsl_vector_free(alpha); - - gsl_matrix_free(Result_hyp); - gsl_matrix_free(Result_gamma); - - gsl_vector_free(z_hat); - gsl_vector_free(z); - gsl_vector_free(Xb_new); - gsl_vector_free(Xb_old); - - gsl_matrix_free(Xgamma_old); - gsl_matrix_free(XtX_old); - gsl_vector_free(Xtz_old); - gsl_vector_free(beta_old); - - gsl_matrix_free(Xgamma_new); - gsl_matrix_free(XtX_new); - gsl_vector_free(Xtz_new); - gsl_vector_free(beta_new); - - delete [] p_gamma; - beta_g.clear(); - - return; +void BSLMM::MCMC(const gsl_matrix *X, const gsl_vector *y) { + clock_t time_start; + double time_set = 0, time_post = 0; + + class HYPBSLMM cHyp_old, cHyp_new; + + gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6); + gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max); + + gsl_vector *Xb_new = gsl_vector_alloc(ni_test); + gsl_vector *Xb_old = gsl_vector_alloc(ni_test); + gsl_vector *z_hat = gsl_vector_alloc(ni_test); + gsl_vector *z = gsl_vector_alloc(ni_test); + + gsl_matrix *Xgamma_old = gsl_matrix_alloc(ni_test, s_max); + gsl_matrix *XtX_old = gsl_matrix_alloc(s_max, s_max); + gsl_vector *Xtz_old = gsl_vector_alloc(s_max); + gsl_vector *beta_old = gsl_vector_alloc(s_max); + + gsl_matrix *Xgamma_new = gsl_matrix_alloc(ni_test, s_max); + gsl_matrix *XtX_new = gsl_matrix_alloc(s_max, s_max); + gsl_vector *Xtz_new = gsl_vector_alloc(s_max); + gsl_vector *beta_new = gsl_vector_alloc(s_max); + + double ztz = 0.0; + gsl_vector_memcpy(z, y); + + // For quantitative traits, y is centered already in + // gemma.cpp, but just in case. + double mean_z = CenterVector(z); + gsl_blas_ddot(z, z, &ztz); + + double logPost_new, logPost_old; + double logMHratio; + + gsl_matrix_set_zero(Result_gamma); + if (a_mode == 13) { + pheno_mean = 0.0; + } + + vector<pair<double, double>> beta_g; + for (size_t i = 0; i < ns_test; i++) { + beta_g.push_back(make_pair(0.0, 0.0)); + } + + vector<size_t> rank_new, rank_old; + vector<pair<size_t, double>> pos_loglr; + + time_start = clock(); + MatrixCalcLmLR(X, z, pos_loglr); + time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr); + for (size_t i = 0; i < ns_test; ++i) { + mapRank2pos[i] = pos_loglr[i].first; + } + + // Calculate proposal distribution for gamma (unnormalized), + // and set up gsl_r and gsl_t. + gsl_rng_env_setup(); + const gsl_rng_type *gslType; + gslType = gsl_rng_default; + if (randseed < 0) { + time_t rawtime; + time(&rawtime); + tm *ptm = gmtime(&rawtime); + + randseed = + (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); + } + gsl_r = gsl_rng_alloc(gslType); + gsl_rng_set(gsl_r, randseed); + + double *p_gamma = new double[ns_test]; + CalcPgamma(p_gamma); + + gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma); + + // Initial parameters. + InitialMCMC(X, z, rank_old, cHyp_old, pos_loglr); + + cHyp_initial = cHyp_old; + + if (cHyp_old.n_gamma == 0) { + logPost_old = CalcPosterior(ztz, cHyp_old); + } else { + SetXgamma(Xgamma_old, X, rank_old); + CalcXtX(Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old); + logPost_old = CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, + rank_old.size(), Xb_old, beta_old, cHyp_old); + } + + // Calculate centered z_hat, and pve. + if (a_mode == 13) { + if (cHyp_old.n_gamma == 0) { + CalcCC_PVEnZ(z_hat, cHyp_old); + } else { + CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old); + } + } + + // Start MCMC. + int accept; + size_t total_step = w_step + s_step; + size_t w = 0, w_col, pos; + size_t repeat = 0; + + for (size_t t = 0; t < total_step; ++t) { + if (t % d_pace == 0 || t == total_step - 1) { + ProgressBar("Running MCMC ", t, total_step - 1, + (double)n_accept / (double)(t * n_mh + 1)); + } + + if (a_mode == 13) { + SampleZ(y, z_hat, z); + mean_z = CenterVector(z); + gsl_blas_ddot(z, z, &ztz); + + // First proposal. + if (cHyp_old.n_gamma == 0) { + logPost_old = CalcPosterior(ztz, cHyp_old); + } else { + gsl_matrix_view Xold_sub = + gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size()); + gsl_vector_view Xtz_sub = + gsl_vector_subvector(Xtz_old, 0, rank_old.size()); + gsl_blas_dgemv(CblasTrans, 1.0, &Xold_sub.matrix, z, 0.0, + &Xtz_sub.vector); + logPost_old = + CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(), + Xb_old, beta_old, cHyp_old); + } + } + + // M-H steps. + for (size_t i = 0; i < n_mh; ++i) { + if (gsl_rng_uniform(gsl_r) < 0.33) { + repeat = 1 + gsl_rng_uniform_int(gsl_r, 20); + } else { + repeat = 1; + } + + logMHratio = 0.0; + logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat); + logMHratio += + ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); + logMHratio += ProposePi(cHyp_old, cHyp_new, repeat); + + if (cHyp_new.n_gamma == 0) { + logPost_new = CalcPosterior(ztz, cHyp_new); + } else { + + // This makes sure that rank_old.size() == + // rank_remove.size() does not happen. + if (cHyp_new.n_gamma <= 20 || cHyp_old.n_gamma <= 20) { + time_start = clock(); + SetXgamma(Xgamma_new, X, rank_new); + CalcXtX(Xgamma_new, z, rank_new.size(), XtX_new, Xtz_new); + time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } else { + time_start = clock(); + SetXgamma(X, Xgamma_old, XtX_old, Xtz_old, z, rank_old, rank_new, + Xgamma_new, XtX_new, Xtz_new); + time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + time_start = clock(); + logPost_new = + CalcPosterior(Xgamma_new, XtX_new, Xtz_new, ztz, rank_new.size(), + Xb_new, beta_new, cHyp_new); + time_post += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + logMHratio += logPost_new - logPost_old; + + if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) { + accept = 1; + n_accept++; + } else { + accept = 0; + } + + if (accept == 1) { + logPost_old = logPost_new; + cHyp_old = cHyp_new; + gsl_vector_memcpy(Xb_old, Xb_new); + + rank_old.clear(); + if (rank_new.size() != 0) { + for (size_t i = 0; i < rank_new.size(); ++i) { + rank_old.push_back(rank_new[i]); + } + + gsl_matrix_view Xold_sub = + gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size()); + gsl_matrix_view XtXold_sub = gsl_matrix_submatrix( + XtX_old, 0, 0, rank_new.size(), rank_new.size()); + gsl_vector_view Xtzold_sub = + gsl_vector_subvector(Xtz_old, 0, rank_new.size()); + gsl_vector_view betaold_sub = + gsl_vector_subvector(beta_old, 0, rank_new.size()); + + gsl_matrix_view Xnew_sub = + gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size()); + gsl_matrix_view XtXnew_sub = gsl_matrix_submatrix( + XtX_new, 0, 0, rank_new.size(), rank_new.size()); + gsl_vector_view Xtznew_sub = + gsl_vector_subvector(Xtz_new, 0, rank_new.size()); + gsl_vector_view betanew_sub = + gsl_vector_subvector(beta_new, 0, rank_new.size()); + + gsl_matrix_memcpy(&Xold_sub.matrix, &Xnew_sub.matrix); + gsl_matrix_memcpy(&XtXold_sub.matrix, &XtXnew_sub.matrix); + gsl_vector_memcpy(&Xtzold_sub.vector, &Xtznew_sub.vector); + gsl_vector_memcpy(&betaold_sub.vector, &betanew_sub.vector); + } + } else { + cHyp_new = cHyp_old; + } + } + + // Calculate z_hat, and pve. + if (a_mode == 13) { + if (cHyp_old.n_gamma == 0) { + CalcCC_PVEnZ(z_hat, cHyp_old); + } else { + CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old); + } + + // Sample mu and update z_hat. + gsl_vector_sub(z, z_hat); + mean_z += CenterVector(z); + mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test)); + + gsl_vector_add_constant(z_hat, mean_z); + } + + // Save data. + if (t < w_step) { + continue; + } else { + if (t % r_pace == 0) { + w_col = w % w_pace; + if (w_col == 0) { + if (w == 0) { + WriteResult(0, Result_hyp, Result_gamma, w_col); + } else { + WriteResult(1, Result_hyp, Result_gamma, w_col); + gsl_matrix_set_zero(Result_hyp); + gsl_matrix_set_zero(Result_gamma); + } + } + + gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h); + gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve); + gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho); + gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge); + gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp); + gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma); + + for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { + pos = mapRank2pos[rank_old[i]] + 1; + gsl_matrix_set(Result_gamma, w_col, i, pos); + + beta_g[pos - 1].first += gsl_vector_get(beta_old, i); + beta_g[pos - 1].second += 1.0; + } + + if (a_mode == 13) { + pheno_mean += mean_z; + } + + w++; + } + } + } + cout << endl; + + cout << "time on selecting Xgamma: " << time_set << endl; + cout << "time on calculating posterior: " << time_post << endl; + + w_col = w % w_pace; + WriteResult(1, Result_hyp, Result_gamma, w_col); + + gsl_vector *alpha = gsl_vector_alloc(ns_test); + gsl_vector_set_zero(alpha); + WriteParam(beta_g, alpha, w); + gsl_vector_free(alpha); + + gsl_matrix_free(Result_hyp); + gsl_matrix_free(Result_gamma); + + gsl_vector_free(z_hat); + gsl_vector_free(z); + gsl_vector_free(Xb_new); + gsl_vector_free(Xb_old); + + gsl_matrix_free(Xgamma_old); + gsl_matrix_free(XtX_old); + gsl_vector_free(Xtz_old); + gsl_vector_free(beta_old); + + gsl_matrix_free(Xgamma_new); + gsl_matrix_free(XtX_new); + gsl_vector_free(Xtz_new); + gsl_vector_free(beta_new); + + delete[] p_gamma; + beta_g.clear(); + + return; } diff --git a/src/bslmm.h b/src/bslmm.h index c7768a2..d2dadbf 100644 --- a/src/bslmm.h +++ b/src/bslmm.h @@ -19,10 +19,10 @@ #ifndef __BSLMM_H__ #define __BSLMM_H__ -#include <vector> -#include <map> -#include <gsl/gsl_rng.h> #include <gsl/gsl_randist.h> +#include <gsl/gsl_rng.h> +#include <map> +#include <vector> #include "param.h" @@ -31,149 +31,139 @@ using namespace std; class BSLMM { public: - // IO-related parameters. - int a_mode; - size_t d_pace; - - string file_bfile; - string file_geno; - string file_out; - string path_out; - - // LMM-related parameters. - double l_min; - double l_max; - size_t n_region; - double pve_null; - double pheno_mean; - - // BSLMM MCMC-related parameters - double h_min, h_max, h_scale; // Priors for h. - double rho_min, rho_max, rho_scale; // Priors for rho. - double logp_min, logp_max, logp_scale; // Priors for log(pi). - size_t s_min, s_max; // Min. & max. number of gammas. - size_t w_step; // Number of warm up/burn in - // iterations. - size_t s_step; // Num. sampling iterations. - size_t r_pace; // Record pace. - size_t w_pace; // Write pace. - size_t n_accept; // Number of acceptances. - size_t n_mh; // Number of MH steps per iter. - double geo_mean; // Mean of geometric dist. - long int randseed; - double trace_G; - - HYPBSLMM cHyp_initial; - - // Summary statistics. - size_t ni_total, ns_total; // Number of total individuals and SNPs - size_t ni_test, ns_test; // Num. individuals & SNPs used in analysis. - size_t n_cvt; // Number of covariates. - double time_UtZ; - double time_Omega; // Time spent on optimization iterations. - - // Time spent on constructing the proposal distribution for - // gamma (i.e. lmm or lm analysis). - double time_Proposal; - - // Indicator for individuals (phenotypes): 0 missing, 1 - // available for analysis. - vector<int> indicator_idv; - - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; - - // Record SNP information. - vector<SNPINFO> snpInfo; - - // Not included in PARAM. - gsl_rng *gsl_r; - gsl_ran_discrete_t *gsl_t; - map<size_t, size_t> mapRank2pos; - - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - - void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *eval, - const double lambda); - - void MCMC (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - const gsl_vector *y); - void WriteLog (); - void WriteLR (); - void WriteBV (const gsl_vector *bv); - void WriteParam (vector<pair<double, double> > &beta_g, - const gsl_vector *alpha, const size_t w); - void WriteParam (const gsl_vector *alpha); - void WriteResult (const int flag, const gsl_matrix *Result_hyp, - const gsl_matrix *Result_gamma, const size_t w_col); - - // Subfunctions inside MCMC. - void CalcPgamma (double *p_gammar); - - double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, - const double sigma_a2); - void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, - vector<size_t> &rank_old, class HYPBSLMM &cHyp, - vector<pair<size_t, double> > &pos_loglr); - double CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, - gsl_vector *Utu, gsl_vector *alpha_prime, - class HYPBSLMM &cHyp); - double CalcPosterior (const gsl_matrix *UtXgamma, - const gsl_vector *Uty, const gsl_vector *K_eval, - gsl_vector *UtXb, gsl_vector *Utu, - gsl_vector *alpha_prime, gsl_vector *beta, - class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, - gsl_vector *z_hat, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, - const gsl_vector *Utu, gsl_vector *z_hat, - class HYPBSLMM &cHyp); - double CalcREMLE (const gsl_matrix *Utw, const gsl_vector *Uty, - const gsl_vector *K_eval); - - // Calculate the maximum marginal likelihood ratio for each - // analyzed SNPs with gemma, use it to rank SNPs. - double CalcLR (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - vector<pair<size_t, double> > &loglr_sort); - void SampleZ (const gsl_vector *y, const gsl_vector *z_hat, - gsl_vector *z); - double ProposeHnRho (const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, const size_t &repeat); - double ProposePi (const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, - const size_t &repeat); - double ProposeGamma (const vector<size_t> &rank_old, - vector<size_t> &rank_new, const double *p_gamma, - const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, const size_t &repeat); - void SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, - vector<size_t> &rank); - - void CalcXtX (const gsl_matrix *X_new, const gsl_vector *y, - const size_t s_size, gsl_matrix *XtX_new, - gsl_vector *Xty_new); - void SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, - const gsl_matrix *XtX_old, const gsl_vector *Xty_old, - const gsl_vector *y, const vector<size_t> &rank_old, - const vector<size_t> &rank_new, gsl_matrix *X_new, - gsl_matrix *XtX_new, gsl_vector *Xty_new); - double CalcPosterior (const double yty, class HYPBSLMM &cHyp); - double CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, - const gsl_vector *Xty, const double yty, - const size_t s_size, gsl_vector *Xb, - gsl_vector *beta, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, - class HYPBSLMM &cHyp); - void MCMC (const gsl_matrix *X, const gsl_vector *y); + // IO-related parameters. + int a_mode; + size_t d_pace; + + string file_bfile; + string file_geno; + string file_out; + string path_out; + + // LMM-related parameters. + double l_min; + double l_max; + size_t n_region; + double pve_null; + double pheno_mean; + + // BSLMM MCMC-related parameters + double h_min, h_max, h_scale; // Priors for h. + double rho_min, rho_max, rho_scale; // Priors for rho. + double logp_min, logp_max, logp_scale; // Priors for log(pi). + size_t s_min, s_max; // Min. & max. number of gammas. + size_t w_step; // Number of warm up/burn in + // iterations. + size_t s_step; // Num. sampling iterations. + size_t r_pace; // Record pace. + size_t w_pace; // Write pace. + size_t n_accept; // Number of acceptances. + size_t n_mh; // Number of MH steps per iter. + double geo_mean; // Mean of geometric dist. + long int randseed; + double trace_G; + + HYPBSLMM cHyp_initial; + + // Summary statistics. + size_t ni_total, ns_total; // Number of total individuals and SNPs + size_t ni_test, ns_test; // Num. individuals & SNPs used in analysis. + size_t n_cvt; // Number of covariates. + double time_UtZ; + double time_Omega; // Time spent on optimization iterations. + + // Time spent on constructing the proposal distribution for + // gamma (i.e. lmm or lm analysis). + double time_Proposal; + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + // Record SNP information. + vector<SNPINFO> snpInfo; + + // Not included in PARAM. + gsl_rng *gsl_r; + gsl_ran_discrete_t *gsl_t; + map<size_t, size_t> mapRank2pos; + + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + + void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, + const gsl_vector *eval, const double lambda); + + void MCMC(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, + const gsl_vector *K_eval, const gsl_vector *y); + void WriteLog(); + void WriteLR(); + void WriteBV(const gsl_vector *bv); + void WriteParam(vector<pair<double, double>> &beta_g, const gsl_vector *alpha, + const size_t w); + void WriteParam(const gsl_vector *alpha); + void WriteResult(const int flag, const gsl_matrix *Result_hyp, + const gsl_matrix *Result_gamma, const size_t w_col); + + // Subfunctions inside MCMC. + void CalcPgamma(double *p_gammar); + + double CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const double sigma_a2); + void InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty, + vector<size_t> &rank_old, class HYPBSLMM &cHyp, + vector<pair<size_t, double>> &pos_loglr); + double CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *Utu, gsl_vector *alpha_prime, + class HYPBSLMM &cHyp); + double CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const gsl_vector *K_eval, gsl_vector *UtXb, + gsl_vector *Utu, gsl_vector *alpha_prime, + gsl_vector *beta, class HYPBSLMM &cHyp); + void CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu, + gsl_vector *z_hat, class HYPBSLMM &cHyp); + void CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb, + const gsl_vector *Utu, gsl_vector *z_hat, + class HYPBSLMM &cHyp); + double CalcREMLE(const gsl_matrix *Utw, const gsl_vector *Uty, + const gsl_vector *K_eval); + + // Calculate the maximum marginal likelihood ratio for each + // analyzed SNPs with gemma, use it to rank SNPs. + double CalcLR(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + vector<pair<size_t, double>> &loglr_sort); + void SampleZ(const gsl_vector *y, const gsl_vector *z_hat, gsl_vector *z); + double ProposeHnRho(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, + const size_t &repeat); + double ProposePi(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, + const size_t &repeat); + double ProposeGamma(const vector<size_t> &rank_old, vector<size_t> &rank_new, + const double *p_gamma, const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat); + void SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank); + + void CalcXtX(const gsl_matrix *X_new, const gsl_vector *y, + const size_t s_size, gsl_matrix *XtX_new, gsl_vector *Xty_new); + void SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old, + const gsl_matrix *XtX_old, const gsl_vector *Xty_old, + const gsl_vector *y, const vector<size_t> &rank_old, + const vector<size_t> &rank_new, gsl_matrix *X_new, + gsl_matrix *XtX_new, gsl_vector *Xty_new); + double CalcPosterior(const double yty, class HYPBSLMM &cHyp); + double CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX, + const gsl_vector *Xty, const double yty, + const size_t s_size, gsl_vector *Xb, gsl_vector *beta, + class HYPBSLMM &cHyp); + void CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp); + void CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat, + class HYPBSLMM &cHyp); + void MCMC(const gsl_matrix *X, const gsl_vector *y); }; #endif - - diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp index e1a53a6..7aac1d4 100644 --- a/src/bslmmdap.cpp +++ b/src/bslmmdap.cpp @@ -16,89 +16,97 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> #include <fstream> +#include <iostream> #include <sstream> -#include <iomanip> +#include <algorithm> #include <cmath> +#include <cstring> +#include <ctime> +#include <iomanip> #include <iostream> #include <stdio.h> #include <stdlib.h> -#include <ctime> -#include <cstring> -#include <algorithm> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" +#include "gsl/gsl_cdf.h" #include "gsl/gsl_eigen.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" #include "gsl/gsl_randist.h" -#include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" +#include "gsl/gsl_vector.h" -#include "logistic.h" -#include "lapack.h" -#include "io.h" -#include "param.h" #include "bslmmdap.h" -#include "lmm.h" +#include "io.h" +#include "lapack.h" #include "lm.h" +#include "lmm.h" +#include "logistic.h" #include "mathfunc.h" +#include "param.h" using namespace std; -void BSLMMDAP::CopyFromParam (PARAM &cPar) { - file_out=cPar.file_out; - path_out=cPar.path_out; +void BSLMMDAP::CopyFromParam(PARAM &cPar) { + file_out = cPar.file_out; + path_out = cPar.path_out; - time_UtZ=0.0; - time_Omega=0.0; + time_UtZ = 0.0; + time_Omega = 0.0; - h_min=cPar.h_min; - h_max=cPar.h_max; - h_ngrid=cPar.h_ngrid; - rho_min=cPar.rho_min; - rho_max=cPar.rho_max; - rho_ngrid=cPar.rho_ngrid; + h_min = cPar.h_min; + h_max = cPar.h_max; + h_ngrid = cPar.h_ngrid; + rho_min = cPar.rho_min; + rho_max = cPar.rho_max; + rho_ngrid = cPar.rho_ngrid; - if (h_min<=0) {h_min=0.01;} - if (h_max>=1) {h_max=0.99;} - if (rho_min<=0) {rho_min=0.01;} - if (rho_max>=1) {rho_max=0.99;} + if (h_min <= 0) { + h_min = 0.01; + } + if (h_max >= 1) { + h_max = 0.99; + } + if (rho_min <= 0) { + rho_min = 0.01; + } + if (rho_max >= 1) { + rho_max = 0.99; + } - trace_G=cPar.trace_G; + trace_G = cPar.trace_G; - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ni_test=cPar.ni_test; - ns_test=cPar.ns_test; + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ni_test = cPar.ni_test; + ns_test = cPar.ns_test; - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; + indicator_idv = cPar.indicator_idv; + indicator_snp = cPar.indicator_snp; + snpInfo = cPar.snpInfo; - return; + return; } -void BSLMMDAP::CopyToParam (PARAM &cPar) { - cPar.time_UtZ=time_UtZ; - cPar.time_Omega=time_Omega; +void BSLMMDAP::CopyToParam(PARAM &cPar) { + cPar.time_UtZ = time_UtZ; + cPar.time_Omega = time_Omega; - return; + return; } - - // Read hyp file. -void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, - vector<double> &vec_sb2, vector<double> &vec_wab) { - vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear(); +void ReadFile_hyb(const string &file_hyp, vector<double> &vec_sa2, + vector<double> &vec_sb2, vector<double> &vec_wab) { + vec_sa2.clear(); + vec_sb2.clear(); + vec_wab.clear(); - igzstream infile (file_hyp.c_str(), igzstream::in); + igzstream infile(file_hyp.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open hyp file: "<<file_hyp<<endl; + cout << "error! fail to open hyp file: " << file_hyp << endl; return; } @@ -108,16 +116,16 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, getline(infile, line); while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); vec_sa2.push_back(atof(ch_ptr)); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); vec_sb2.push_back(atof(ch_ptr)); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); vec_wab.push_back(atof(ch_ptr)); } @@ -128,55 +136,59 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, } // Read bf file. -void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, - vector<vector<vector<double> > > &BF) { - BF.clear(); vec_rs.clear(); +void ReadFile_bf(const string &file_bf, vector<string> &vec_rs, + vector<vector<vector<double>>> &BF) { + BF.clear(); + vec_rs.clear(); - igzstream infile (file_bf.c_str(), igzstream::in); - if (!infile) {cout<<"error! fail to open bf file: "<<file_bf<<endl; return;} + igzstream infile(file_bf.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open bf file: " << file_bf << endl; + return; + } string line, rs, block; vector<double> vec_bf; - vector<vector<double> > mat_bf; + vector<vector<double>> mat_bf; char *ch_ptr; size_t bf_size, flag_block; getline(infile, line); - size_t t=0; + size_t t = 0; while (!safeGetline(infile, line).eof()) { - flag_block=0; + flag_block = 0; - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; vec_rs.push_back(rs); - ch_ptr=strtok (NULL, " , \t"); - if (t==0) { - block=ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + if (t == 0) { + block = ch_ptr; } else { - if (strcmp(ch_ptr, block.c_str() )!=0) { - flag_block=1; - block=ch_ptr; + if (strcmp(ch_ptr, block.c_str()) != 0) { + flag_block = 1; + block = ch_ptr; } } - ch_ptr=strtok (NULL, " , \t"); - while (ch_ptr!=NULL) { + ch_ptr = strtok(NULL, " , \t"); + while (ch_ptr != NULL) { vec_bf.push_back(atof(ch_ptr)); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } - if (t==0) { - bf_size=vec_bf.size(); + if (t == 0) { + bf_size = vec_bf.size(); } else { - if (bf_size!=vec_bf.size()) { - cout<<"error! unequal row size in bf file."<<endl; + if (bf_size != vec_bf.size()) { + cout << "error! unequal row size in bf file." << endl; } } - if (flag_block==0) { + if (flag_block == 0) { mat_bf.push_back(vec_bf); } else { BF.push_back(mat_bf); @@ -193,15 +205,14 @@ void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, return; } - // Read category files. // Read both continuous and discrete category file, record mapRS2catc. -void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, - gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, - size_t &kc, size_t &kd) { - igzstream infile (file_cat.c_str(), igzstream::in); +void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs, + gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, + size_t &kc, size_t &kd) { + igzstream infile(file_cat.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open category file: "<<file_cat<<endl; + cout << "error! fail to open category file: " << file_cat << endl; return; } @@ -213,94 +224,103 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, // Read header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_io (line, header); + ReadHeader_io(line, header); // Use the header to determine the number of categories. - kc=header.catc_col.size(); kd=header.catd_col.size(); + kc = header.catc_col.size(); + kd = header.catd_col.size(); - //set up storage and mapper - map<string, vector<double> > mapRS2catc; - map<string, vector<int> > mapRS2catd; + // set up storage and mapper + map<string, vector<double>> mapRS2catc; + map<string, vector<int>> mapRS2catd; vector<double> catc; vector<int> catd; // Read the following lines to record mapRS2cat. while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); + ch_ptr = strtok((char *)line.c_str(), " , \t"); - if (header.rs_col==0) { - rs=chr+":"+pos; + if (header.rs_col == 0) { + rs = chr + ":" + pos; } - catc.clear(); catd.clear(); - - for (size_t i=0; i<header.coln; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) { - rs=ch_ptr; - } else if (header.chr_col!=0 && header.chr_col==i+1) { - chr=ch_ptr; - } else if (header.pos_col!=0 && header.pos_col==i+1) { - pos=ch_ptr; - } else if (header.cm_col!=0 && header.cm_col==i+1) { - cm=ch_ptr; - } else if (header.a1_col!=0 && header.a1_col==i+1) { - a1=ch_ptr; - } else if (header.a0_col!=0 && header.a0_col==i+1) { - a0=ch_ptr; - } else if (header.catc_col.size()!=0 && header.catc_col.count(i+1)!=0 ) { - catc.push_back(atof(ch_ptr)); - } else if (header.catd_col.size()!=0 && header.catd_col.count(i+1)!=0 ) { - catd.push_back(atoi(ch_ptr)); - } else {} - - ch_ptr=strtok (NULL, " , \t"); + catc.clear(); + catd.clear(); + + for (size_t i = 0; i < header.coln; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } else if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } else if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + } else if (header.cm_col != 0 && header.cm_col == i + 1) { + cm = ch_ptr; + } else if (header.a1_col != 0 && header.a1_col == i + 1) { + a1 = ch_ptr; + } else if (header.a0_col != 0 && header.a0_col == i + 1) { + a0 = ch_ptr; + } else if (header.catc_col.size() != 0 && + header.catc_col.count(i + 1) != 0) { + catc.push_back(atof(ch_ptr)); + } else if (header.catd_col.size() != 0 && + header.catd_col.count(i + 1) != 0) { + catd.push_back(atoi(ch_ptr)); + } else { + } + + ch_ptr = strtok(NULL, " , \t"); } - if (mapRS2catc.count(rs)==0 && kc>0) {mapRS2catc[rs]=catc;} - if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;} + if (mapRS2catc.count(rs) == 0 && kc > 0) { + mapRS2catc[rs] = catc; + } + if (mapRS2catd.count(rs) == 0 && kd > 0) { + mapRS2catd[rs] = catd; + } } // Load into Ad and Ac. - if (kc>0) { - Ac=gsl_matrix_alloc(vec_rs.size(), kc); - for (size_t i=0; i<vec_rs.size(); i++) { - if (mapRS2catc.count(vec_rs[i])!=0) { - for (size_t j=0; j<kc; j++) { - gsl_matrix_set(Ac, i, j, mapRS2catc[vec_rs[i]][j]); - } + if (kc > 0) { + Ac = gsl_matrix_alloc(vec_rs.size(), kc); + for (size_t i = 0; i < vec_rs.size(); i++) { + if (mapRS2catc.count(vec_rs[i]) != 0) { + for (size_t j = 0; j < kc; j++) { + gsl_matrix_set(Ac, i, j, mapRS2catc[vec_rs[i]][j]); + } } else { - for (size_t j=0; j<kc; j++) { - gsl_matrix_set(Ac, i, j, 0); - } + for (size_t j = 0; j < kc; j++) { + gsl_matrix_set(Ac, i, j, 0); + } } } } - if (kd>0) { - Ad=gsl_matrix_int_alloc(vec_rs.size(), kd); + if (kd > 0) { + Ad = gsl_matrix_int_alloc(vec_rs.size(), kd); - for (size_t i=0; i<vec_rs.size(); i++) { - if (mapRS2catd.count(vec_rs[i])!=0) { - for (size_t j=0; j<kd; j++) { - gsl_matrix_int_set(Ad, i, j, mapRS2catd[vec_rs[i]][j]); - } + for (size_t i = 0; i < vec_rs.size(); i++) { + if (mapRS2catd.count(vec_rs[i]) != 0) { + for (size_t j = 0; j < kd; j++) { + gsl_matrix_int_set(Ad, i, j, mapRS2catd[vec_rs[i]][j]); + } } else { - for (size_t j=0; j<kd; j++) { - gsl_matrix_int_set(Ad, i, j, 0); - } + for (size_t j = 0; j < kd; j++) { + gsl_matrix_int_set(Ad, i, j, 0); + } } } - dlevel=gsl_vector_int_alloc(kd); + dlevel = gsl_vector_int_alloc(kd); map<int, int> rcd; int val; - for (size_t j=0; j<kd; j++) { + for (size_t j = 0; j < kd; j++) { rcd.clear(); - for (size_t i=0; i<Ad->size1; i++) { - val = gsl_matrix_int_get(Ad, i, j); - rcd[val] = 1; + for (size_t i = 0; i < Ad->size1; i++) { + val = gsl_matrix_int_get(Ad, i, j); + rcd[val] = 1; } - gsl_vector_int_set (dlevel, j, rcd.size()); + gsl_vector_int_set(dlevel, j, rcd.size()); } } @@ -310,509 +330,531 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, return; } -void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) { +void BSLMMDAP::WriteResult(const gsl_matrix *Hyper, const gsl_matrix *BF) { string file_bf, file_hyp; - file_bf=path_out+"/"+file_out; - file_bf+=".bf.txt"; - file_hyp=path_out+"/"+file_out; - file_hyp+=".hyp.txt"; - - ofstream outfile_bf, outfile_hyp; - - outfile_bf.open (file_bf.c_str(), ofstream::out); - outfile_hyp.open (file_hyp.c_str(), ofstream::out); - - if (!outfile_bf) { - cout<<"error writing file: "<<file_bf<<endl; - return; - } - if (!outfile_hyp) { - cout<<"error writing file: "<<file_hyp<<endl; - return; - } - - outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<< - "weight"<<endl; - outfile_hyp<<scientific; - for (size_t i=0; i<Hyper->size1; i++) { - for (size_t j=0; j<Hyper->size2; j++) { - outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t"; - } - outfile_hyp<<endl; - } - - outfile_bf<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"; - for (size_t i=0; i<BF->size2; i++) { - outfile_bf<<"\t"<<"BF"<<i+1; - } - outfile_bf<<endl; - - size_t t=0; - for (size_t i=0; i<ns_total; ++i) { - if (indicator_snp[i]==0) {continue;} - - outfile_bf<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t" - <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss; - - outfile_bf<<scientific; - for (size_t j=0; j<BF->size2; j++) { - outfile_bf<<"\t"<<setprecision(6)<<gsl_matrix_get (BF, t, j); - } - outfile_bf<<endl; - - t++; - } - - outfile_hyp.close(); - outfile_hyp.clear(); - outfile_bf.close(); - outfile_bf.clear(); - return; + file_bf = path_out + "/" + file_out; + file_bf += ".bf.txt"; + file_hyp = path_out + "/" + file_out; + file_hyp += ".hyp.txt"; + + ofstream outfile_bf, outfile_hyp; + + outfile_bf.open(file_bf.c_str(), ofstream::out); + outfile_hyp.open(file_hyp.c_str(), ofstream::out); + + if (!outfile_bf) { + cout << "error writing file: " << file_bf << endl; + return; + } + if (!outfile_hyp) { + cout << "error writing file: " << file_hyp << endl; + return; + } + + outfile_hyp << "h" + << "\t" + << "rho" + << "\t" + << "sa2" + << "\t" + << "sb2" + << "\t" + << "weight" << endl; + outfile_hyp << scientific; + for (size_t i = 0; i < Hyper->size1; i++) { + for (size_t j = 0; j < Hyper->size2; j++) { + outfile_hyp << setprecision(6) << gsl_matrix_get(Hyper, i, j) << "\t"; + } + outfile_hyp << endl; + } + + outfile_bf << "chr" + << "\t" + << "rs" + << "\t" + << "ps" + << "\t" + << "n_miss"; + for (size_t i = 0; i < BF->size2; i++) { + outfile_bf << "\t" + << "BF" << i + 1; + } + outfile_bf << endl; + + size_t t = 0; + for (size_t i = 0; i < ns_total; ++i) { + if (indicator_snp[i] == 0) { + continue; + } + + outfile_bf << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" + << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss; + + outfile_bf << scientific; + for (size_t j = 0; j < BF->size2; j++) { + outfile_bf << "\t" << setprecision(6) << gsl_matrix_get(BF, t, j); + } + outfile_bf << endl; + + t++; + } + + outfile_hyp.close(); + outfile_hyp.clear(); + outfile_bf.close(); + outfile_bf.clear(); + return; } -void BSLMMDAP::WriteResult (const vector<string> &vec_rs, - const gsl_matrix *Hyper, const gsl_vector *pip, - const gsl_vector *coef) { +void BSLMMDAP::WriteResult(const vector<string> &vec_rs, + const gsl_matrix *Hyper, const gsl_vector *pip, + const gsl_vector *coef) { string file_gamma, file_hyp, file_coef; - file_gamma=path_out+"/"+file_out; - file_gamma+=".gamma.txt"; - file_hyp=path_out+"/"+file_out; - file_hyp+=".hyp.txt"; - file_coef=path_out+"/"+file_out; - file_coef+=".coef.txt"; - - ofstream outfile_gamma, outfile_hyp, outfile_coef; - - outfile_gamma.open (file_gamma.c_str(), ofstream::out); - outfile_hyp.open (file_hyp.c_str(), ofstream::out); - outfile_coef.open (file_coef.c_str(), ofstream::out); - - if (!outfile_gamma) { - cout<<"error writing file: "<<file_gamma<<endl; - return; - } - if (!outfile_hyp) { - cout<<"error writing file: "<<file_hyp<<endl; - return; - } - if (!outfile_coef) { - cout<<"error writing file: "<<file_coef<<endl; - return; - } - - outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<< - "weight"<<endl; - outfile_hyp<<scientific; - for (size_t i=0; i<Hyper->size1; i++) { - for (size_t j=0; j<Hyper->size2; j++) { - outfile_hyp<<setprecision(6)<<gsl_matrix_get (Hyper, i, j)<<"\t"; - } - outfile_hyp<<endl; - } - - outfile_gamma<<"rs"<<"\t"<<"gamma"<<endl; - for (size_t i=0; i<vec_rs.size(); ++i) { - outfile_gamma<<vec_rs[i]<<"\t"<<scientific<<setprecision(6)<< - gsl_vector_get(pip, i)<<endl; - } - - outfile_coef<<"coef"<<endl; - outfile_coef<<scientific; - for (size_t i=0; i<coef->size; i++) { - outfile_coef<<setprecision(6)<<gsl_vector_get (coef, i)<<endl; - } - - outfile_coef.close(); - outfile_coef.clear(); - outfile_hyp.close(); - outfile_hyp.clear(); - outfile_gamma.close(); - outfile_gamma.clear(); - return; -} + file_gamma = path_out + "/" + file_out; + file_gamma += ".gamma.txt"; + file_hyp = path_out + "/" + file_out; + file_hyp += ".hyp.txt"; + file_coef = path_out + "/" + file_out; + file_coef += ".coef.txt"; + ofstream outfile_gamma, outfile_hyp, outfile_coef; -double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, - const gsl_vector *K_eval, - const double sigma_b2, const double tau) { - gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size); + outfile_gamma.open(file_gamma.c_str(), ofstream::out); + outfile_hyp.open(file_hyp.c_str(), ofstream::out); + outfile_coef.open(file_coef.c_str(), ofstream::out); - double logm=0.0; - double d, uy, Hi_yy=0, logdet_H=0.0; - for (size_t i=0; i<ni_test; ++i) { - d=gsl_vector_get (K_eval, i)*sigma_b2; - d=1.0/(d+1.0); - gsl_vector_set (weight_Hi, i, d); + if (!outfile_gamma) { + cout << "error writing file: " << file_gamma << endl; + return; + } + if (!outfile_hyp) { + cout << "error writing file: " << file_hyp << endl; + return; + } + if (!outfile_coef) { + cout << "error writing file: " << file_coef << endl; + return; + } - logdet_H-=log(d); - uy=gsl_vector_get (Uty, i); - Hi_yy+=d*uy*uy; - } + outfile_hyp << "h" + << "\t" + << "rho" + << "\t" + << "sa2" + << "\t" + << "sb2" + << "\t" + << "weight" << endl; + outfile_hyp << scientific; + for (size_t i = 0; i < Hyper->size1; i++) { + for (size_t j = 0; j < Hyper->size2; j++) { + outfile_hyp << setprecision(6) << gsl_matrix_get(Hyper, i, j) << "\t"; + } + outfile_hyp << endl; + } - // Calculate likelihood. - logm=-0.5*logdet_H-0.5*tau*Hi_yy+0.5*log(tau)*(double)ni_test; + outfile_gamma << "rs" + << "\t" + << "gamma" << endl; + for (size_t i = 0; i < vec_rs.size(); ++i) { + outfile_gamma << vec_rs[i] << "\t" << scientific << setprecision(6) + << gsl_vector_get(pip, i) << endl; + } - gsl_vector_free (weight_Hi); + outfile_coef << "coef" << endl; + outfile_coef << scientific; + for (size_t i = 0; i < coef->size; i++) { + outfile_coef << setprecision(6) << gsl_vector_get(coef, i) << endl; + } - return logm; + outfile_coef.close(); + outfile_coef.clear(); + outfile_hyp.close(); + outfile_hyp.clear(); + outfile_gamma.close(); + outfile_gamma.clear(); + return; } -double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, - const gsl_vector *Uty, - const gsl_vector *K_eval, - const double sigma_a2, - const double sigma_b2, const double tau) { - clock_t time_start; - double logm=0.0; - double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0; - - gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, - UtXgamma->size2); - gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); - gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2); - gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2); - gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1); - - gsl_matrix_memcpy (UtXgamma_eval, UtXgamma); - - logdet_H=0.0; P_yy=0.0; - for (size_t i=0; i<ni_test; ++i) { - gsl_vector_view UtXgamma_row=gsl_matrix_row(UtXgamma_eval,i); - d=gsl_vector_get (K_eval, i)*sigma_b2; - d=1.0/(d+1.0); - gsl_vector_set (weight_Hi, i, d); - - logdet_H-=log(d); - uy=gsl_vector_get (Uty, i); - P_yy+=d*uy*uy; - gsl_vector_scale (&UtXgamma_row.vector, d); - } - - // Calculate Omega. - gsl_matrix_set_identity (Omega); - - time_start=clock(); - lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, - UtXgamma, 1.0, Omega); - time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Calculate beta_hat. - gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); - - logdet_O=CholeskySolve(Omega, XtHiy, beta_hat); - - gsl_vector_scale (beta_hat, sigma_a2); - - gsl_blas_ddot (XtHiy, beta_hat, &d); - P_yy-=d; - - gsl_matrix_free (UtXgamma_eval); - gsl_matrix_free (Omega); - gsl_vector_free (XtHiy); - gsl_vector_free (beta_hat); - gsl_vector_free (weight_Hi); - - logm=-0.5*logdet_H-0.5*logdet_O-0.5*tau*P_yy+0.5*log(tau)* - (double)ni_test; - - return logm; +double BSLMMDAP::CalcMarginal(const gsl_vector *Uty, const gsl_vector *K_eval, + const double sigma_b2, const double tau) { + gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size); + + double logm = 0.0; + double d, uy, Hi_yy = 0, logdet_H = 0.0; + for (size_t i = 0; i < ni_test; ++i) { + d = gsl_vector_get(K_eval, i) * sigma_b2; + d = 1.0 / (d + 1.0); + gsl_vector_set(weight_Hi, i, d); + + logdet_H -= log(d); + uy = gsl_vector_get(Uty, i); + Hi_yy += d * uy * uy; + } + + // Calculate likelihood. + logm = -0.5 * logdet_H - 0.5 * tau * Hi_yy + 0.5 * log(tau) * (double)ni_test; + + gsl_vector_free(weight_Hi); + + return logm; } -double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) { - double logprior=0; - logprior=((double)cHyp.n_gamma-1.0)*cHyp.logp+ - ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); +double BSLMMDAP::CalcMarginal(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const gsl_vector *K_eval, const double sigma_a2, + const double sigma_b2, const double tau) { + clock_t time_start; + double logm = 0.0; + double d, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0; + + gsl_matrix *UtXgamma_eval = + gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2); + gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2); + gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2); + gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2); + gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1); + + gsl_matrix_memcpy(UtXgamma_eval, UtXgamma); + + logdet_H = 0.0; + P_yy = 0.0; + for (size_t i = 0; i < ni_test; ++i) { + gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i); + d = gsl_vector_get(K_eval, i) * sigma_b2; + d = 1.0 / (d + 1.0); + gsl_vector_set(weight_Hi, i, d); + + logdet_H -= log(d); + uy = gsl_vector_get(Uty, i); + P_yy += d * uy * uy; + gsl_vector_scale(&UtXgamma_row.vector, d); + } + + // Calculate Omega. + gsl_matrix_set_identity(Omega); + + time_start = clock(); + lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, + Omega); + time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Calculate beta_hat. + gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); + + logdet_O = CholeskySolve(Omega, XtHiy, beta_hat); + + gsl_vector_scale(beta_hat, sigma_a2); + + gsl_blas_ddot(XtHiy, beta_hat, &d); + P_yy -= d; + + gsl_matrix_free(UtXgamma_eval); + gsl_matrix_free(Omega); + gsl_vector_free(XtHiy); + gsl_vector_free(beta_hat); + gsl_vector_free(weight_Hi); + + logm = -0.5 * logdet_H - 0.5 * logdet_O - 0.5 * tau * P_yy + + 0.5 * log(tau) * (double)ni_test; + + return logm; +} + +double BSLMMDAP::CalcPrior(class HYPBSLMM &cHyp) { + double logprior = 0; + logprior = + ((double)cHyp.n_gamma - 1.0) * cHyp.logp + + ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp)); return logprior; } // Where A is the ni_test by n_cat matrix of annotations. -void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - const gsl_vector *y) { - clock_t time_start; - - // Set up BF. - double tau, h, rho, sigma_a2, sigma_b2, d; - size_t ns_causal=10; - size_t n_grid=h_ngrid*rho_ngrid; - vector<double> vec_sa2, vec_sb2, logm_null; - - gsl_matrix *BF=gsl_matrix_alloc(ns_test, n_grid); - gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1); - gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5); - - // Compute tau by using yty. - gsl_blas_ddot (Uty, Uty, &tau); - tau=(double)ni_test/tau; - - // Set up grid values for sigma_a2 and sigma_b2 based on an - // approximately even grid for h and rho, and a fixed number - // of causals. - size_t ij=0; - for (size_t i=0; i<h_ngrid; i++) { - h=h_min+(h_max-h_min)*(double)i/((double)h_ngrid-1); - for (size_t j=0; j<rho_ngrid; j++) { - rho=rho_min+(rho_max-rho_min)*(double)j/((double)rho_ngrid-1); - - sigma_a2=h*rho/((1-h)*(double)ns_causal); - sigma_b2=h*(1.0-rho)/(trace_G*(1-h)); - - vec_sa2.push_back(sigma_a2); - vec_sb2.push_back(sigma_b2); - logm_null.push_back(CalcMarginal (Uty, K_eval, 0.0, tau)); - - gsl_matrix_set (Hyper, ij, 0, h); - gsl_matrix_set (Hyper, ij, 1, rho); - gsl_matrix_set (Hyper, ij, 2, sigma_a2); - gsl_matrix_set (Hyper, ij, 3, sigma_b2); - gsl_matrix_set (Hyper, ij, 4, 1/(double)n_grid); - ij++; - } - } - - // Compute BF factors. - time_start=clock(); - cout<<"Calculating BF..."<<endl; - for (size_t t=0; t<ns_test; t++) { - gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, 0); - gsl_vector_const_view X_col=gsl_matrix_const_column (UtX, t); - gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector); - - for (size_t ij=0; ij<n_grid; ij++) { - sigma_a2=vec_sa2[ij]; - sigma_b2=vec_sb2[ij]; - - d=CalcMarginal (Xgamma, Uty, K_eval, sigma_a2, sigma_b2, tau); - d-=logm_null[ij]; - d=exp(d); - - gsl_matrix_set(BF, t, ij, d); - } - } - time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Save results. - WriteResult (Hyper, BF); - - // Free matrices and vectors. - gsl_matrix_free(BF); - gsl_matrix_free(Xgamma); - gsl_matrix_free(Hyper); - return; +void BSLMMDAP::DAP_CalcBF(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const gsl_vector *y) { + clock_t time_start; + + // Set up BF. + double tau, h, rho, sigma_a2, sigma_b2, d; + size_t ns_causal = 10; + size_t n_grid = h_ngrid * rho_ngrid; + vector<double> vec_sa2, vec_sb2, logm_null; + + gsl_matrix *BF = gsl_matrix_alloc(ns_test, n_grid); + gsl_matrix *Xgamma = gsl_matrix_alloc(ni_test, 1); + gsl_matrix *Hyper = gsl_matrix_alloc(n_grid, 5); + + // Compute tau by using yty. + gsl_blas_ddot(Uty, Uty, &tau); + tau = (double)ni_test / tau; + + // Set up grid values for sigma_a2 and sigma_b2 based on an + // approximately even grid for h and rho, and a fixed number + // of causals. + size_t ij = 0; + for (size_t i = 0; i < h_ngrid; i++) { + h = h_min + (h_max - h_min) * (double)i / ((double)h_ngrid - 1); + for (size_t j = 0; j < rho_ngrid; j++) { + rho = rho_min + (rho_max - rho_min) * (double)j / ((double)rho_ngrid - 1); + + sigma_a2 = h * rho / ((1 - h) * (double)ns_causal); + sigma_b2 = h * (1.0 - rho) / (trace_G * (1 - h)); + + vec_sa2.push_back(sigma_a2); + vec_sb2.push_back(sigma_b2); + logm_null.push_back(CalcMarginal(Uty, K_eval, 0.0, tau)); + + gsl_matrix_set(Hyper, ij, 0, h); + gsl_matrix_set(Hyper, ij, 1, rho); + gsl_matrix_set(Hyper, ij, 2, sigma_a2); + gsl_matrix_set(Hyper, ij, 3, sigma_b2); + gsl_matrix_set(Hyper, ij, 4, 1 / (double)n_grid); + ij++; + } + } + + // Compute BF factors. + time_start = clock(); + cout << "Calculating BF..." << endl; + for (size_t t = 0; t < ns_test; t++) { + gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, 0); + gsl_vector_const_view X_col = gsl_matrix_const_column(UtX, t); + gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector); + + for (size_t ij = 0; ij < n_grid; ij++) { + sigma_a2 = vec_sa2[ij]; + sigma_b2 = vec_sb2[ij]; + + d = CalcMarginal(Xgamma, Uty, K_eval, sigma_a2, sigma_b2, tau); + d -= logm_null[ij]; + d = exp(d); + + gsl_matrix_set(BF, t, ij, d); + } + } + time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Save results. + WriteResult(Hyper, BF); + + // Free matrices and vectors. + gsl_matrix_free(BF); + gsl_matrix_free(Xgamma); + gsl_matrix_free(Hyper); + return; } void single_ct_regression(const gsl_matrix_int *Xd, - const gsl_vector_int *dlevel, - const gsl_vector *pip_vec, - gsl_vector *coef, gsl_vector *prior_vec) { + const gsl_vector_int *dlevel, + const gsl_vector *pip_vec, gsl_vector *coef, + gsl_vector *prior_vec) { - map<int,double> sum_pip; - map<int,double> sum; + map<int, double> sum_pip; + map<int, double> sum; - int levels = gsl_vector_int_get(dlevel,0); + int levels = gsl_vector_int_get(dlevel, 0); - for(int i=0;i<levels;i++){ + for (int i = 0; i < levels; i++) { sum_pip[i] = sum[i] = 0; } - for(int i=0;i<Xd->size1;i++){ - int cat = gsl_matrix_int_get(Xd,i,0); - sum_pip[cat] += gsl_vector_get(pip_vec,i); + for (int i = 0; i < Xd->size1; i++) { + int cat = gsl_matrix_int_get(Xd, i, 0); + sum_pip[cat] += gsl_vector_get(pip_vec, i); sum[cat] += 1; } - for(int i=0;i<Xd->size1;i++){ - int cat = gsl_matrix_int_get(Xd,i,0); - gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]); + for (int i = 0; i < Xd->size1; i++) { + int cat = gsl_matrix_int_get(Xd, i, 0); + gsl_vector_set(prior_vec, i, sum_pip[cat] / sum[cat]); } - for(int i=0;i<levels;i++){ - double new_prior = sum_pip[i]/sum[i]; - gsl_vector_set(coef, i, log(new_prior/(1-new_prior)) ); + for (int i = 0; i < levels; i++) { + double new_prior = sum_pip[i] / sum[i]; + gsl_vector_set(coef, i, log(new_prior / (1 - new_prior))); } return; } // Where A is the ni_test by n_cat matrix of annotations. -void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, - const vector<string> &vec_rs, - const vector<double> &vec_sa2, - const vector<double> &vec_sb2, - const vector<double> &wab, - const vector<vector<vector<double> > > &BF, - gsl_matrix *Ac, gsl_matrix_int *Ad, - gsl_vector_int *dlevel) { - clock_t time_start; - - // Set up BF. - double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save; - size_t t1, t2; - size_t n_grid=wab.size(), ns_test=vec_rs.size(); - - gsl_vector *prior_vec=gsl_vector_alloc(ns_test); - gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5); - gsl_vector *pip=gsl_vector_alloc(ns_test); - gsl_vector *coef=gsl_vector_alloc(kc+kd+1); - - // Perform the EM algorithm. - vector<double> vec_wab, vec_wab_new; - - // Initial values. - for (size_t t=0; t<ns_test; t++) { - gsl_vector_set (prior_vec, t, (double)BF.size()/(double)ns_test); - } - for (size_t ij=0; ij<n_grid; ij++) { - vec_wab.push_back(wab[ij]); - vec_wab_new.push_back(wab[ij]); - } - - // EM iteration. - size_t it=0; - double dif=1; - while (it<100 && dif>1e-3) { - - // Update E_gamma. - t1=0, t2=0; - for (size_t b=0; b<BF.size(); b++) { - s=1; - for (size_t m=0; m<BF[b].size(); m++) { - d=0; - for (size_t ij=0; ij<n_grid; ij++) { - d+=vec_wab_new[ij]*BF[b][m][ij]; - } - d*=gsl_vector_get(prior_vec,t1)/(1-gsl_vector_get(prior_vec,t1)); - - gsl_vector_set(pip, t1, d); - s+=d; - t1++; - } - - for (size_t m=0; m<BF[b].size(); m++) { - d=gsl_vector_get(pip, t2)/s; - gsl_vector_set(pip, t2, d); - t2++; - } - } - - // Update E_wab. - s=0; - for (size_t ij=0; ij<n_grid; ij++) { - vec_wab_new[ij]=0; - - t1=0; - for (size_t b=0; b<BF.size(); b++) { - d=1; - for (size_t m=0; m<BF[b].size(); m++) { - d+=gsl_vector_get(prior_vec, t1)/ - (1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij]; - t1++; - } - vec_wab_new[ij]+=log(d); - } - - s=max(s, vec_wab_new[ij]); - } - - d=0; - for (size_t ij=0; ij<n_grid; ij++) { - vec_wab_new[ij]=exp(vec_wab_new[ij]-s); - d+=vec_wab_new[ij]; - } - - for (size_t ij=0; ij<n_grid; ij++) { - vec_wab_new[ij]/=d; - } - - // Update coef, and pi. - if(kc==0 && kd==0){ - - // No annotation. - s=0; - for (size_t t=0; t<pip->size; t++) { - s+=gsl_vector_get(pip, t); - } - s=s/(double)pip->size; - for (size_t t=0; t<pip->size; t++) { - gsl_vector_set(prior_vec, t, s); - } - - gsl_vector_set (coef, 0, log(s/(1-s))); - } else if(kc==0 && kd!=0){ - - // Only discrete annotations. - if(kd == 1){ - single_ct_regression(Ad, dlevel, pip, coef, prior_vec); - }else{ - logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0); - logistic_cat_pred(coef, Ad, dlevel, prior_vec); - } - } else if (kc!=0 && kd==0) { - - // Only continuous annotations. - logistic_cont_fit(coef, Ac, pip, 0, 0); - logistic_cont_pred(coef, Ac, prior_vec); - } else if (kc!=0 && kd!=0) { - - // Both continuous and categorical annotations. - logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0); - logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec); - } - - // Compute marginal likelihood. - logm=0; - - t1=0; - for (size_t b=0; b<BF.size(); b++) { - d=1; s=0; - for (size_t m=0; m<BF[b].size(); m++) { - s+=log(1-gsl_vector_get(prior_vec, t1)); - for (size_t ij=0; ij<n_grid; ij++) { - d+=gsl_vector_get(prior_vec, t1)/ - (1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij]; - } - } - logm+=log(d)+s; - t1++; - } - - if (it>0) { - dif=logm-logm_save; - } - logm_save=logm; - it++; - - cout<<"iteration = "<<it<<"; marginal likelihood = "<<logm<<endl; - } - - // Update h and rho that correspond to w_ab. - for (size_t ij=0; ij<n_grid; ij++) { - sigma_a2=vec_sa2[ij]; - sigma_b2=vec_sb2[ij]; - - d=exp(gsl_vector_get(coef, coef->size-1))/ - (1+exp(gsl_vector_get(coef, coef->size-1))); - h=(d*(double)ns_test*sigma_a2+1*sigma_b2)/ - (1+d*(double)ns_test*sigma_a2+1*sigma_b2); - rho=d*(double)ns_test*sigma_a2/ - (d*(double)ns_test*sigma_a2+1*sigma_b2); - - gsl_matrix_set (Hyper, ij, 0, h); - gsl_matrix_set (Hyper, ij, 1, rho); - gsl_matrix_set (Hyper, ij, 2, sigma_a2); - gsl_matrix_set (Hyper, ij, 3, sigma_b2); - gsl_matrix_set (Hyper, ij, 4, vec_wab_new[ij]); - } - - // Obtain beta and alpha parameters. - - // Save results. - WriteResult (vec_rs, Hyper, pip, coef); - - // Free matrices and vectors. - gsl_vector_free(prior_vec); - gsl_matrix_free(Hyper); - gsl_vector_free(pip); - gsl_vector_free(coef); - return; +void BSLMMDAP::DAP_EstimateHyper( + const size_t kc, const size_t kd, const vector<string> &vec_rs, + const vector<double> &vec_sa2, const vector<double> &vec_sb2, + const vector<double> &wab, const vector<vector<vector<double>>> &BF, + gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) { + clock_t time_start; + + // Set up BF. + double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save; + size_t t1, t2; + size_t n_grid = wab.size(), ns_test = vec_rs.size(); + + gsl_vector *prior_vec = gsl_vector_alloc(ns_test); + gsl_matrix *Hyper = gsl_matrix_alloc(n_grid, 5); + gsl_vector *pip = gsl_vector_alloc(ns_test); + gsl_vector *coef = gsl_vector_alloc(kc + kd + 1); + + // Perform the EM algorithm. + vector<double> vec_wab, vec_wab_new; + + // Initial values. + for (size_t t = 0; t < ns_test; t++) { + gsl_vector_set(prior_vec, t, (double)BF.size() / (double)ns_test); + } + for (size_t ij = 0; ij < n_grid; ij++) { + vec_wab.push_back(wab[ij]); + vec_wab_new.push_back(wab[ij]); + } + + // EM iteration. + size_t it = 0; + double dif = 1; + while (it < 100 && dif > 1e-3) { + + // Update E_gamma. + t1 = 0, t2 = 0; + for (size_t b = 0; b < BF.size(); b++) { + s = 1; + for (size_t m = 0; m < BF[b].size(); m++) { + d = 0; + for (size_t ij = 0; ij < n_grid; ij++) { + d += vec_wab_new[ij] * BF[b][m][ij]; + } + d *= + gsl_vector_get(prior_vec, t1) / (1 - gsl_vector_get(prior_vec, t1)); + + gsl_vector_set(pip, t1, d); + s += d; + t1++; + } + + for (size_t m = 0; m < BF[b].size(); m++) { + d = gsl_vector_get(pip, t2) / s; + gsl_vector_set(pip, t2, d); + t2++; + } + } + + // Update E_wab. + s = 0; + for (size_t ij = 0; ij < n_grid; ij++) { + vec_wab_new[ij] = 0; + + t1 = 0; + for (size_t b = 0; b < BF.size(); b++) { + d = 1; + for (size_t m = 0; m < BF[b].size(); m++) { + d += gsl_vector_get(prior_vec, t1) / + (1 - gsl_vector_get(prior_vec, t1)) * vec_wab[ij] * BF[b][m][ij]; + t1++; + } + vec_wab_new[ij] += log(d); + } + + s = max(s, vec_wab_new[ij]); + } + + d = 0; + for (size_t ij = 0; ij < n_grid; ij++) { + vec_wab_new[ij] = exp(vec_wab_new[ij] - s); + d += vec_wab_new[ij]; + } + + for (size_t ij = 0; ij < n_grid; ij++) { + vec_wab_new[ij] /= d; + } + + // Update coef, and pi. + if (kc == 0 && kd == 0) { + + // No annotation. + s = 0; + for (size_t t = 0; t < pip->size; t++) { + s += gsl_vector_get(pip, t); + } + s = s / (double)pip->size; + for (size_t t = 0; t < pip->size; t++) { + gsl_vector_set(prior_vec, t, s); + } + + gsl_vector_set(coef, 0, log(s / (1 - s))); + } else if (kc == 0 && kd != 0) { + + // Only discrete annotations. + if (kd == 1) { + single_ct_regression(Ad, dlevel, pip, coef, prior_vec); + } else { + logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0); + logistic_cat_pred(coef, Ad, dlevel, prior_vec); + } + } else if (kc != 0 && kd == 0) { + + // Only continuous annotations. + logistic_cont_fit(coef, Ac, pip, 0, 0); + logistic_cont_pred(coef, Ac, prior_vec); + } else if (kc != 0 && kd != 0) { + + // Both continuous and categorical annotations. + logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0); + logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec); + } + + // Compute marginal likelihood. + logm = 0; + + t1 = 0; + for (size_t b = 0; b < BF.size(); b++) { + d = 1; + s = 0; + for (size_t m = 0; m < BF[b].size(); m++) { + s += log(1 - gsl_vector_get(prior_vec, t1)); + for (size_t ij = 0; ij < n_grid; ij++) { + d += gsl_vector_get(prior_vec, t1) / + (1 - gsl_vector_get(prior_vec, t1)) * vec_wab[ij] * BF[b][m][ij]; + } + } + logm += log(d) + s; + t1++; + } + + if (it > 0) { + dif = logm - logm_save; + } + logm_save = logm; + it++; + + cout << "iteration = " << it << "; marginal likelihood = " << logm << endl; + } + + // Update h and rho that correspond to w_ab. + for (size_t ij = 0; ij < n_grid; ij++) { + sigma_a2 = vec_sa2[ij]; + sigma_b2 = vec_sb2[ij]; + + d = exp(gsl_vector_get(coef, coef->size - 1)) / + (1 + exp(gsl_vector_get(coef, coef->size - 1))); + h = (d * (double)ns_test * sigma_a2 + 1 * sigma_b2) / + (1 + d * (double)ns_test * sigma_a2 + 1 * sigma_b2); + rho = d * (double)ns_test * sigma_a2 / + (d * (double)ns_test * sigma_a2 + 1 * sigma_b2); + + gsl_matrix_set(Hyper, ij, 0, h); + gsl_matrix_set(Hyper, ij, 1, rho); + gsl_matrix_set(Hyper, ij, 2, sigma_a2); + gsl_matrix_set(Hyper, ij, 3, sigma_b2); + gsl_matrix_set(Hyper, ij, 4, vec_wab_new[ij]); + } + + // Obtain beta and alpha parameters. + + // Save results. + WriteResult(vec_rs, Hyper, pip, coef); + + // Free matrices and vectors. + gsl_vector_free(prior_vec); + gsl_matrix_free(Hyper); + gsl_vector_free(pip); + gsl_vector_free(coef); + return; } diff --git a/src/bslmmdap.h b/src/bslmmdap.h index db5774b..dc05e34 100644 --- a/src/bslmmdap.h +++ b/src/bslmmdap.h @@ -19,97 +19,91 @@ #ifndef __BSLMMDAP_H__ #define __BSLMMDAP_H__ -#include <vector> -#include <map> -#include <gsl/gsl_rng.h> -#include <gsl/gsl_randist.h> #include "param.h" +#include <gsl/gsl_randist.h> +#include <gsl/gsl_rng.h> +#include <map> +#include <vector> using namespace std; class BSLMMDAP { public: - // IO-related parameters. - int a_mode; - size_t d_pace; - - string file_bfile; - string file_geno; - string file_out; - string path_out; - - // LMM related parameters - double pve_null; - double pheno_mean; - - // BSLMM MCMC related parameters - long int randseed; - double trace_G; - - HYPBSLMM cHyp_initial; - - // Summary statistics - size_t ni_total, ns_total; // Number of total individuals and SNPs. - size_t ni_test, ns_test; // Number of individuals and SNPs - // used for analysis. - - double h_min, h_max, rho_min, rho_max; - size_t h_ngrid, rho_ngrid; - - double time_UtZ; - double time_Omega; // Time spent on optimization iterations. - double time_Proposal; // Time spent on constructing the - // proposal distribution for gamma - // (i.e., lmm or lm analysis). - - // Indicator for individuals (phenotypes): 0 missing, 1 - // available for analysis. - vector<int> indicator_idv; - - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; - - vector<SNPINFO> snpInfo; // Record SNP information. - - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - - void WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF); - void WriteResult (const vector<string> &vec_rs, - const gsl_matrix *Hyper, const gsl_vector *pip, - const gsl_vector *coef); - double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, - const double sigma_b2, const double tau); - double CalcMarginal (const gsl_matrix *UtXgamma, - const gsl_vector *Uty, const gsl_vector *K_eval, - const double sigma_a2, const double sigma_b2, - const double tau); - double CalcPrior (class HYPBSLMM &cHyp); - - void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - const gsl_vector *y); - void DAP_EstimateHyper (const size_t kc, const size_t kd, - const vector<string> &vec_rs, - const vector<double> &vec_sa2, - const vector<double> &vec_sb2, - const vector<double> &wab, - const vector<vector<vector<double> > > &BF, - gsl_matrix *Ac, gsl_matrix_int *Ad, - gsl_vector_int *dlevel); + // IO-related parameters. + int a_mode; + size_t d_pace; + + string file_bfile; + string file_geno; + string file_out; + string path_out; + + // LMM related parameters + double pve_null; + double pheno_mean; + + // BSLMM MCMC related parameters + long int randseed; + double trace_G; + + HYPBSLMM cHyp_initial; + + // Summary statistics + size_t ni_total, ns_total; // Number of total individuals and SNPs. + size_t ni_test, ns_test; // Number of individuals and SNPs + // used for analysis. + + double h_min, h_max, rho_min, rho_max; + size_t h_ngrid, rho_ngrid; + + double time_UtZ; + double time_Omega; // Time spent on optimization iterations. + double time_Proposal; // Time spent on constructing the + // proposal distribution for gamma + // (i.e., lmm or lm analysis). + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + vector<SNPINFO> snpInfo; // Record SNP information. + + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + + void WriteResult(const gsl_matrix *Hyper, const gsl_matrix *BF); + void WriteResult(const vector<string> &vec_rs, const gsl_matrix *Hyper, + const gsl_vector *pip, const gsl_vector *coef); + double CalcMarginal(const gsl_vector *Uty, const gsl_vector *K_eval, + const double sigma_b2, const double tau); + double CalcMarginal(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const gsl_vector *K_eval, const double sigma_a2, + const double sigma_b2, const double tau); + double CalcPrior(class HYPBSLMM &cHyp); + + void DAP_CalcBF(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const gsl_vector *y); + void + DAP_EstimateHyper(const size_t kc, const size_t kd, + const vector<string> &vec_rs, const vector<double> &vec_sa2, + const vector<double> &vec_sb2, const vector<double> &wab, + const vector<vector<vector<double>>> &BF, gsl_matrix *Ac, + gsl_matrix_int *Ad, gsl_vector_int *dlevel); }; -void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, - vector<double> &vec_sb2, vector<double> &vec_wab); -void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, - vector<vector<vector<double> > > &BF); -void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, - gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, - size_t &kc, size_t &kd); +void ReadFile_hyb(const string &file_hyp, vector<double> &vec_sa2, + vector<double> &vec_sb2, vector<double> &vec_wab); +void ReadFile_bf(const string &file_bf, vector<string> &vec_rs, + vector<vector<vector<double>>> &BF); +void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs, + gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, + size_t &kc, size_t &kd); #endif - - diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp index 733dae1..a8c545c 100644 --- a/src/eigenlib.cpp +++ b/src/eigenlib.cpp @@ -16,13 +16,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include "Eigen/Dense" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" #include <cmath> +#include <iostream> #include <vector> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" -#include "Eigen/Dense" using namespace std; using namespace Eigen; @@ -34,82 +34,79 @@ using namespace Eigen; // eigen, 1x or 0.3x slower than lapack // invert, 20x or 10x faster than lapack // -void eigenlib_dgemm (const char *TransA, const char *TransB, - const double alpha, const gsl_matrix *A, - const gsl_matrix *B, const double beta, - gsl_matrix *C) { - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > - A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) ); - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > - B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda) ); - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > - C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda) ); +void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha, + const gsl_matrix *A, const gsl_matrix *B, const double beta, + gsl_matrix *C) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>> + A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda)); + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>> + B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda)); + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>> + C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda)); - if (*TransA=='N' || *TransA=='n') { - if (*TransB=='N' || *TransB=='n') { - C_mat=alpha*A_mat*B_mat+beta*C_mat; + if (*TransA == 'N' || *TransA == 'n') { + if (*TransB == 'N' || *TransB == 'n') { + C_mat = alpha * A_mat * B_mat + beta * C_mat; } else { - C_mat=alpha*A_mat*B_mat.transpose()+beta*C_mat; + C_mat = alpha * A_mat * B_mat.transpose() + beta * C_mat; } } else { - if (*TransB=='N' || *TransB=='n') { - C_mat=alpha*A_mat.transpose()*B_mat+beta*C_mat; + if (*TransB == 'N' || *TransB == 'n') { + C_mat = alpha * A_mat.transpose() * B_mat + beta * C_mat; } else { - C_mat=alpha*A_mat.transpose()*B_mat.transpose()+beta*C_mat; + C_mat = alpha * A_mat.transpose() * B_mat.transpose() + beta * C_mat; } } return; } -void eigenlib_dgemv (const char *TransA, const double alpha, - const gsl_matrix *A, const gsl_vector *x, - const double beta, gsl_vector *y) { - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > - A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) ); - Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> > - x_vec(x->data, x->size, InnerStride<Dynamic>(x->stride) ); - Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> > - y_vec(y->data, y->size, InnerStride<Dynamic>(y->stride) ); +void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A, + const gsl_vector *x, const double beta, gsl_vector *y) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>> + A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda)); + Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic>> x_vec( + x->data, x->size, InnerStride<Dynamic>(x->stride)); + Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic>> y_vec( + y->data, y->size, InnerStride<Dynamic>(y->stride)); - if (*TransA=='N' || *TransA=='n') { - y_vec=alpha*A_mat*x_vec+beta*y_vec; + if (*TransA == 'N' || *TransA == 'n') { + y_vec = alpha * A_mat * x_vec + beta * y_vec; } else { - y_vec=alpha*A_mat.transpose()*x_vec+beta*y_vec; + y_vec = alpha * A_mat.transpose() * x_vec + beta * y_vec; } return; } void eigenlib_invert(gsl_matrix *A) { - Map<Matrix<double, Dynamic, Dynamic, RowMajor> > - A_mat(A->data, A->size1, A->size2); - A_mat=A_mat.inverse(); + Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1, + A->size2); + A_mat = A_mat.inverse(); return; } -void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A) { - Map<Matrix<double, Dynamic, Dynamic, RowMajor> > - A_mat(A->data, A->size1, A->size2); - Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> > - b_vec(b->data, b->size, OuterStride<Dynamic>(b->stride) ); - A_mat=alpha*b_vec*b_vec.transpose()+A_mat; +void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1, + A->size2); + Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> b_vec( + b->data, b->size, OuterStride<Dynamic>(b->stride)); + A_mat = alpha * b_vec * b_vec.transpose() + A_mat; return; } -void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, - gsl_vector *eval) { - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > - G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda) ); - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > - U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda) ); - Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> > - eval_vec(eval->data, eval->size, OuterStride<Dynamic>(eval->stride) ); +void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>> + G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda)); + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>> + U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda)); + Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> eval_vec( + eval->data, eval->size, OuterStride<Dynamic>(eval->stride)); SelfAdjointEigenSolver<MatrixXd> es(G_mat); if (es.info() != Success) abort(); - eval_vec=es.eigenvalues(); - U_mat=es.eigenvectors(); + eval_vec = es.eigenvalues(); + U_mat = es.eigenvectors(); return; } diff --git a/src/eigenlib.h b/src/eigenlib.h index 3659dc1..b29fa63 100644 --- a/src/eigenlib.h +++ b/src/eigenlib.h @@ -23,15 +23,13 @@ using namespace std; -void eigenlib_dgemm (const char *TransA, const char *TransB, - const double alpha, const gsl_matrix *A, - const gsl_matrix *B, const double beta, - gsl_matrix *C); -void eigenlib_dgemv (const char *TransA, const double alpha, - const gsl_matrix *A, const gsl_vector *x, - const double beta, gsl_vector *y); +void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha, + const gsl_matrix *A, const gsl_matrix *B, const double beta, + gsl_matrix *C); +void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A, + const gsl_vector *x, const double beta, gsl_vector *y); void eigenlib_invert(gsl_matrix *A); -void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A); -void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval); +void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A); +void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval); #endif diff --git a/src/gemma.cpp b/src/gemma.cpp index 1a9ca9b..c72475b 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -16,427 +16,670 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include <cmath> +#include <cstring> +#include <ctime> #include <fstream> +#include <iostream> #include <string> -#include <cstring> #include <sys/stat.h> -#include <ctime> -#include <cmath> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" -#include "gsl/gsl_eigen.h" #include "gsl/gsl_cdf.h" +#include "gsl/gsl_eigen.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" -#include "lapack.h" -#include "io.h" -#include "gemma.h" -#include "vc.h" -#include "lm.h" #include "bslmm.h" #include "bslmmdap.h" +#include "gemma.h" +#include "io.h" +#include "lapack.h" #include "ldr.h" +#include "lm.h" #include "lmm.h" +#include "mathfunc.h" #include "mvlmm.h" #include "prdt.h" #include "varcov.h" -#include "mathfunc.h" +#include "vc.h" using namespace std; -GEMMA::GEMMA(void): -version("0.97"), date("07/27/2017"), year("2017") -{} - -void GEMMA::PrintHeader (void) { - cout<<endl; - cout<<"*********************************************************"<<endl; - cout<<" Genome-wide Efficient Mixed Model Association (GEMMA) "<<endl; - cout<<" Version "<<version<<", "<<date<<" "<< - endl; - cout<<" Visit http://www.xzlab.org/software.html For Updates "<<endl; - cout<<" (C) "<<year<<" Xiang Zhou "<<endl; - cout<<" GNU General Public License "<<endl; - cout<<" For Help, Type ./gemma -h "<<endl; - cout<<"*********************************************************"<<endl; - cout<<endl; +GEMMA::GEMMA(void) : version("0.97"), date("07/27/2017"), year("2017") {} + +void GEMMA::PrintHeader(void) { + cout << endl; + cout << "*********************************************************" << endl; + cout << " Genome-wide Efficient Mixed Model Association (GEMMA) " << endl; + cout << " Version " << version << ", " << date + << " " << endl; + cout << " Visit http://www.xzlab.org/software.html For Updates " << endl; + cout << " (C) " << year << " Xiang Zhou " + << endl; + cout << " GNU General Public License " << endl; + cout << " For Help, Type ./gemma -h " << endl; + cout << "*********************************************************" << endl; + cout << endl; return; } -void GEMMA::PrintLicense (void) { - cout<<endl; - cout<<"The Software Is Distributed Under GNU General Public "<< - "License, But May Also Require The Following Notifications."<<endl; - cout<<endl; - - cout<<"Including Lapack Routines In The Software May Require"<< - " The Following Notification:"<<endl; - cout<<"Copyright (c) 1992-2010 The University of Tennessee and "<< - "The University of Tennessee Research Foundation. All rights "<< - "reserved."<<endl; - cout<<"Copyright (c) 2000-2010 The University of California "<< - "Berkeley. All rights reserved."<<endl; - cout<<"Copyright (c) 2006-2010 The University of Colorado Denver. "<< - "All rights reserved."<<endl; - cout<<endl; - - cout<<"$COPYRIGHT$"<<endl; - cout<<"Additional copyrights may follow"<<endl; - cout<<"$HEADER$"<<endl; - cout<<"Redistribution and use in source and binary forms, with or "<< - "without modification, are permitted provided that the following "<< - " conditions are met:"<<endl; - cout<<"- Redistributions of source code must retain the above "<< - "copyright notice, this list of conditions and the following "<< - "disclaimer."<<endl; - cout<<"- Redistributions in binary form must reproduce the above "<< - "copyright notice, this list of conditions and the following "<< - "disclaimer listed in this license in the documentation and/or "<< - "other materials provided with the distribution."<<endl; - cout<<"- Neither the name of the copyright holders nor the names "<< - "of its contributors may be used to endorse or promote products "<< - "derived from this software without specific prior written "<< - "permission."<<endl; - cout<<"The copyright holders provide no reassurances that the "<< - "source code provided does not infringe any patent, copyright, "<< - "or any other "<< - "intellectual property rights of third parties. "<< - "The copyright holders disclaim any liability to any recipient "<< - "for claims brought against "<< - "recipient by any third party for infringement of that parties "<< - "intellectual property rights. "<<endl; - cout<<"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND "<< - "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, "<< - "INCLUDING, BUT NOT "<< - "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND "<< - "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT "<< - "SHALL THE COPYRIGHT "<< - "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, "<< - "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "<< - "(INCLUDING, BUT NOT "<< - "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; "<< - "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) "<< - "HOWEVER CAUSED AND ON ANY "<< - "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, "<< - "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY "<< - "OUT OF THE USE "<< - "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF "<< - "SUCH DAMAGE."<<endl; - cout<<endl; - - return; +void GEMMA::PrintLicense(void) { + cout << endl; + cout << "The Software Is Distributed Under GNU General Public " + << "License, But May Also Require The Following Notifications." << endl; + cout << endl; + + cout << "Including Lapack Routines In The Software May Require" + << " The Following Notification:" << endl; + cout << "Copyright (c) 1992-2010 The University of Tennessee and " + << "The University of Tennessee Research Foundation. All rights " + << "reserved." << endl; + cout << "Copyright (c) 2000-2010 The University of California " + << "Berkeley. All rights reserved." << endl; + cout << "Copyright (c) 2006-2010 The University of Colorado Denver. " + << "All rights reserved." << endl; + cout << endl; + + cout << "$COPYRIGHT$" << endl; + cout << "Additional copyrights may follow" << endl; + cout << "$HEADER$" << endl; + cout << "Redistribution and use in source and binary forms, with or " + << "without modification, are permitted provided that the following " + << " conditions are met:" << endl; + cout << "- Redistributions of source code must retain the above " + << "copyright notice, this list of conditions and the following " + << "disclaimer." << endl; + cout << "- Redistributions in binary form must reproduce the above " + << "copyright notice, this list of conditions and the following " + << "disclaimer listed in this license in the documentation and/or " + << "other materials provided with the distribution." << endl; + cout << "- Neither the name of the copyright holders nor the names " + << "of its contributors may be used to endorse or promote products " + << "derived from this software without specific prior written " + << "permission." << endl; + cout << "The copyright holders provide no reassurances that the " + << "source code provided does not infringe any patent, copyright, " + << "or any other " + << "intellectual property rights of third parties. " + << "The copyright holders disclaim any liability to any recipient " + << "for claims brought against " + << "recipient by any third party for infringement of that parties " + << "intellectual property rights. " << endl; + cout << "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND " + << "CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, " + << "INCLUDING, BUT NOT " + << "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND " + << "FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT " + << "SHALL THE COPYRIGHT " + << "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, " + << "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES " + << "(INCLUDING, BUT NOT " + << "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; " + << "LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) " + << "HOWEVER CAUSED AND ON ANY " + << "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, " + << "OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY " + << "OUT OF THE USE " + << "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF " + << "SUCH DAMAGE." << endl; + cout << endl; + + return; } void GEMMA::PrintHelp(size_t option) { - if (option==0) { - cout<<endl; - cout<<" GEMMA version "<<version<<", released on "<<date<<endl; - cout<<" implemented by Xiang Zhou"<<endl; - cout<<endl; - cout<<" type ./gemma -h [num] for detailed helps"<<endl; - cout<<" options: " << endl; - cout<<" 1: quick guide"<<endl; - cout<<" 2: file I/O related"<<endl; - cout<<" 3: SNP QC"<<endl; - cout<<" 4: calculate relatedness matrix"<<endl; - cout<<" 5: perform eigen decomposition"<<endl; - cout<<" 6: perform variance component estimation"<<endl; - cout<<" 7: fit a linear model"<<endl; - cout<<" 8: fit a linear mixed model"<<endl; - cout<<" 9: fit a multivariate linear mixed model"<<endl; - cout<<" 10: fit a Bayesian sparse linear mixed model"<<endl; - cout<<" 11: obtain predicted values"<<endl; - cout<<" 12: calculate snp variance covariance"<<endl; - cout<<" 13: note"<<endl; - cout<<endl; + if (option == 0) { + cout << endl; + cout << " GEMMA version " << version << ", released on " << date << endl; + cout << " implemented by Xiang Zhou" << endl; + cout << endl; + cout << " type ./gemma -h [num] for detailed helps" << endl; + cout << " options: " << endl; + cout << " 1: quick guide" << endl; + cout << " 2: file I/O related" << endl; + cout << " 3: SNP QC" << endl; + cout << " 4: calculate relatedness matrix" << endl; + cout << " 5: perform eigen decomposition" << endl; + cout << " 6: perform variance component estimation" << endl; + cout << " 7: fit a linear model" << endl; + cout << " 8: fit a linear mixed model" << endl; + cout << " 9: fit a multivariate linear mixed model" << endl; + cout << " 10: fit a Bayesian sparse linear mixed model" << endl; + cout << " 11: obtain predicted values" << endl; + cout << " 12: calculate snp variance covariance" << endl; + cout << " 13: note" << endl; + cout << endl; } - if (option==1) { - cout<<" QUICK GUIDE" << endl; - cout<<" to generate a relatedness matrix: "<<endl; - cout<<" ./gemma -bfile [prefix] -gk [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]"<<endl; - cout<<" to generate the S matrix: "<<endl; - cout<<" ./gemma -bfile [prefix] -gs -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -g [filename] -gs -o [prefix]"<<endl; - cout<<" ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -g [filename] -cat [filename] -gs -o [prefix]"<<endl; - cout<<" ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -g [filename] -sample [num] -gs -o [prefix]"<<endl; - cout<<" to generate the q vector: "<<endl; - cout<<" ./gemma -beta [filename] -gq -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -cat [filename] -gq -o [prefix]"<<endl; - cout<<" to generate the ldsc weigthts: "<<endl; - cout<<" ./gemma -beta [filename] -gw -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -cat [filename] -gw -o [prefix]"<<endl; - cout<<" to perform eigen decomposition of the relatedness matrix: "<<endl; - cout<<" ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -k [filename] -eigen -o [prefix]"<<endl; - cout<<" to estimate variance components: "<<endl; - cout<<" ./gemma -bfile [prefix] -k [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -bfile [prefix] -mk [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -cor [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -cor [filename] -cat [filename] -vc [num] -o [prefix]"<<endl; - cout<<" options for the above two commands: -crt -windowbp [num]"<<endl; - cout<<" ./gemma -mq [filename] -ms [filename] -mv [filename] -vc [num] -o [prefix]"<<endl; - cout<<" or with summary statistics, replace bfile with mbfile, or g or mg; vc=1 for HE weights and vc=2 for LDSC weights"<<endl; - cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]"<<endl; - cout<<" ./gemma -beta [filename] -bfile [filename] -cat [filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]"<<endl; - cout<<" to fit a linear mixed model: "<<endl; - cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" to fit a linear mixed model to test g by e effects: "<<endl; - cout<<" ./gemma -bfile [prefix] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -gxe [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" to fit a univariate linear mixed model with different residual weights for different individuals: "<<endl; - cout<<" ./gemma -bfile [prefix] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -weight [filename] -k [filename] -lmm [num] -o [prefix]"<<endl; - cout<<" to fit a multivariate linear mixed model: "<<endl; - cout<<" ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] -o [prefix]"<<endl; - cout<<" to fit a Bayesian sparse linear mixed model: "<<endl; - cout<<" ./gemma -bfile [prefix] -bslmm [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -a [filename] -bslmm [num] -o [prefix]"<<endl; - cout<<" to obtain predicted values: "<<endl; - cout<<" ./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix]"<<endl; - cout<<" to calculate correlations between SNPs: "<<endl; - cout<<" ./gemma -bfile [prefix] -calccor -o [prefix]"<<endl; - cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl; - cout<<endl; + if (option == 1) { + cout << " QUICK GUIDE" << endl; + cout << " to generate a relatedness matrix: " << endl; + cout << " ./gemma -bfile [prefix] -gk [num] -o [prefix]" << endl; + cout << " ./gemma -g [filename] -p [filename] -gk [num] -o [prefix]" + << endl; + cout << " to generate the S matrix: " << endl; + cout << " ./gemma -bfile [prefix] -gs -o [prefix]" << endl; + cout << " ./gemma -p [filename] -g [filename] -gs -o [prefix]" + << endl; + cout << " ./gemma -bfile [prefix] -cat [filename] -gs -o [prefix]" + << endl; + cout << " ./gemma -p [filename] -g [filename] -cat [filename] -gs " + "-o [prefix]" + << endl; + cout << " ./gemma -bfile [prefix] -sample [num] -gs -o [prefix]" + << endl; + cout << " ./gemma -p [filename] -g [filename] -sample [num] -gs -o " + "[prefix]" + << endl; + cout << " to generate the q vector: " << endl; + cout << " ./gemma -beta [filename] -gq -o [prefix]" << endl; + cout << " ./gemma -beta [filename] -cat [filename] -gq -o [prefix]" + << endl; + cout << " to generate the ldsc weigthts: " << endl; + cout << " ./gemma -beta [filename] -gw -o [prefix]" << endl; + cout << " ./gemma -beta [filename] -cat [filename] -gw -o [prefix]" + << endl; + cout << " to perform eigen decomposition of the relatedness matrix: " + << endl; + cout << " ./gemma -bfile [prefix] -k [filename] -eigen -o [prefix]" + << endl; + cout << " ./gemma -g [filename] -p [filename] -k [filename] -eigen " + "-o [prefix]" + << endl; + cout << " to estimate variance components: " << endl; + cout << " ./gemma -bfile [prefix] -k [filename] -vc [num] -o " + "[prefix]" + << endl; + cout << " ./gemma -p [filename] -k [filename] -vc [num] -o [prefix]" + << endl; + cout << " ./gemma -bfile [prefix] -mk [filename] -vc [num] -o " + "[prefix]" + << endl; + cout + << " ./gemma -p [filename] -mk [filename] -vc [num] -o [prefix]" + << endl; + cout << " ./gemma -beta [filename] -cor [filename] -vc [num] -o " + "[prefix]" + << endl; + cout << " ./gemma -beta [filename] -cor [filename] -cat [filename] " + "-vc [num] -o [prefix]" + << endl; + cout << " options for the above two commands: -crt -windowbp [num]" + << endl; + cout << " ./gemma -mq [filename] -ms [filename] -mv [filename] -vc " + "[num] -o [prefix]" + << endl; + cout << " or with summary statistics, replace bfile with mbfile, " + "or g or mg; vc=1 for HE weights and vc=2 for LDSC weights" + << endl; + cout << " ./gemma -beta [filename] -bfile [filename] -cat " + "[filename] -wsnp [filename] -wcat [filename] -vc [num] -o [prefix]" + << endl; + cout << " ./gemma -beta [filename] -bfile [filename] -cat " + "[filename] -wsnp [filename] -wcat [filename] -ci [num] -o [prefix]" + << endl; + cout << " to fit a linear mixed model: " << endl; + cout << " ./gemma -bfile [prefix] -k [filename] -lmm [num] -o " + "[prefix]" + << endl; + cout << " ./gemma -g [filename] -p [filename] -a [filename] -k " + "[filename] -lmm [num] -o [prefix]" + << endl; + cout << " to fit a linear mixed model to test g by e effects: " << endl; + cout << " ./gemma -bfile [prefix] -gxe [filename] -k [filename] " + "-lmm [num] -o [prefix]" + << endl; + cout << " ./gemma -g [filename] -p [filename] -a [filename] -gxe " + "[filename] -k [filename] -lmm [num] -o [prefix]" + << endl; + cout << " to fit a univariate linear mixed model with different residual " + "weights for different individuals: " + << endl; + cout << " ./gemma -bfile [prefix] -weight [filename] -k [filename] " + "-lmm [num] -o [prefix]" + << endl; + cout << " ./gemma -g [filename] -p [filename] -a [filename] " + "-weight [filename] -k [filename] -lmm [num] -o [prefix]" + << endl; + cout << " to fit a multivariate linear mixed model: " << endl; + cout << " ./gemma -bfile [prefix] -k [filename] -lmm [num] -n " + "[num1] [num2] -o [prefix]" + << endl; + cout << " ./gemma -g [filename] -p [filename] -a [filename] -k " + "[filename] -lmm [num] -n [num1] [num2] -o [prefix]" + << endl; + cout << " to fit a Bayesian sparse linear mixed model: " << endl; + cout << " ./gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl; + cout << " ./gemma -g [filename] -p [filename] -a [filename] -bslmm " + "[num] -o [prefix]" + << endl; + cout << " to obtain predicted values: " << endl; + cout << " ./gemma -bfile [prefix] -epm [filename] -emu [filename] " + "-ebv [filename] -k [filename] -predict [num] -o [prefix]" + << endl; + cout << " ./gemma -g [filename] -p [filename] -epm [filename] -emu " + "[filename] -ebv [filename] -k [filename] -predict [num] -o " + "[prefix]" + << endl; + cout << " to calculate correlations between SNPs: " << endl; + cout << " ./gemma -bfile [prefix] -calccor -o [prefix]" << endl; + cout << " ./gemma -g [filename] -p [filename] -calccor -o [prefix]" + << endl; + cout << endl; } - if (option==2) { - cout<<" FILE I/O RELATED OPTIONS" << endl; - cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl; - cout<<" requires: *.fam, *.bim and *.bed files"<<endl; - cout<<" missing value: -9"<<endl; - cout<<" -g [filename] "<<" specify input BIMBAM mean genotype file name"<<endl; - cout<<" format: rs#1, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl; - cout<<" rs#2, allele0, allele1, genotype for individual 1, genotype for individual 2, ..."<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -p [filename] "<<" specify input BIMBAM phenotype file name"<<endl; - cout<<" format: phenotype for individual 1"<<endl; - cout<<" phenotype for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -a [filename] "<<" specify input BIMBAM SNP annotation file name (optional)"<<endl; - cout<<" format: rs#1, base_position, chr_number"<<endl; - cout<<" rs#2, base_position, chr_number"<<endl; - cout<<" ..."<<endl; + if (option == 2) { + cout << " FILE I/O RELATED OPTIONS" << endl; + cout << " -bfile [prefix] " + << " specify input PLINK binary ped file prefix." << endl; + cout << " requires: *.fam, *.bim and *.bed files" << endl; + cout << " missing value: -9" << endl; + cout << " -g [filename] " + << " specify input BIMBAM mean genotype file name" << endl; + cout << " format: rs#1, allele0, allele1, genotype for individual " + "1, genotype for individual 2, ..." + << endl; + cout << " rs#2, allele0, allele1, genotype for individual " + "1, genotype for individual 2, ..." + << endl; + cout << " ..." << endl; + cout << " missing value: NA" << endl; + cout << " -p [filename] " + << " specify input BIMBAM phenotype file name" << endl; + cout << " format: phenotype for individual 1" << endl; + cout << " phenotype for individual 2" << endl; + cout << " ..." << endl; + cout << " missing value: NA" << endl; + cout << " -a [filename] " + << " specify input BIMBAM SNP annotation file name (optional)" << endl; + cout << " format: rs#1, base_position, chr_number" << endl; + cout << " rs#2, base_position, chr_number" << endl; + cout << " ..." << endl; // WJA added. - cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl; - cout<<" requires: *.bgen, *.sample files"<<endl; - - cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl; - cout<<" format: variable for individual 1"<<endl; - cout<<" variable for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -widv [filename] "<<" specify input file that contains a column of residual weights"<<endl; - cout<<" format: variable for individual 1"<<endl; - cout<<" variable for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -k [filename] "<<" specify input kinship/relatedness matrix file name"<<endl; - cout<<" -mk [filename] "<<" specify input file which contains a list of kinship/relatedness matrices"<<endl; - cout<<" -u [filename] "<<" specify input file containing the eigen vectors of the kinship/relatedness matrix"<<endl; - cout<<" -d [filename] "<<" specify input file containing the eigen values of the kinship/relatedness matrix"<<endl; - cout<<" -c [filename] "<<" specify input covariates file name (optional)"<<endl; - cout<<" -cat [filename] "<<" specify input category file name (optional), which contains rs cat1 cat2 ..."<<endl; - cout<<" -beta [filename] "<<" specify input beta file name (optional), which contains rs beta se_beta n_total (or n_mis and n_obs) estimates from a lm model"<<endl; - cout<<" -cor [filename] "<<" specify input correlation file name (optional), which contains rs window_size correlations from snps"<<endl; - cout<<" missing value: NA"<<endl; - cout<<" note: the intercept (a column of 1s) may need to be included"<<endl; - cout<<" -epm [filename] "<<" specify input estimated parameter file name"<<endl; - cout<<" -en [n1] [n2] [n3] [n4] "<<" specify values for the input estimated parameter file (with a header)"<<endl; - cout<<" options: n1: rs column number"<<endl; - cout<<" n2: estimated alpha column number (0 to ignore)"<<endl; - cout<<" n3: estimated beta column number (0 to ignore)"<<endl; - cout<<" n4: estimated gamma column number (0 to ignore)"<<endl; - cout<<" default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if -ebv is specified"<<endl; - cout<<" -ebv [filename] "<<" specify input estimated random effect (breeding value) file name"<<endl; - cout<<" format: value for individual 1"<<endl; - cout<<" value for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -emu [filename] "<<" specify input log file name containing estimated mean"<<endl; - cout<<" -mu [num] "<<" specify input estimated mean value"<<endl; - cout<<" -gene [filename] "<<" specify input gene expression file name"<<endl; - cout<<" format: header"<<endl; - cout<<" gene1, count for individual 1, count for individual 2, ..."<<endl; - cout<<" gene2, count for individual 1, count for individual 2, ..."<<endl; - cout<<" ..."<<endl; - cout<<" missing value: not allowed"<<endl; - cout<<" -r [filename] "<<" specify input total read count file name"<<endl; - cout<<" format: total read count for individual 1"<<endl; - cout<<" total read count for individual 2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -snps [filename] "<<" specify input snps file name to only analyze a certain set of snps"<<endl; - cout<<" format: rs#1"<<endl; - cout<<" rs#2"<<endl; - cout<<" ..."<<endl; - cout<<" missing value: NA"<<endl; - cout<<" -silence "<<" silent terminal display"<<endl; - cout<<" -km [num] "<<" specify input kinship/relatedness file type (default 1)."<<endl; - cout<<" options: 1: \"n by n matrix\" format"<<endl; - cout<<" 2: \"id id value\" format"<<endl; - cout<<" -n [num] "<<" specify phenotype column in the phenotype/*.fam file (optional; default 1)"<<endl; - cout<<" -pace [num] "<<" specify terminal display update pace (default 100000 SNPs or 100000 iterations)."<<endl; - cout<<" -outdir [path] "<<" specify output directory path (default \"./output/\")"<<endl; - cout<<" -o [prefix] "<<" specify output file prefix (default \"result\")"<<endl; - cout<<" output: prefix.cXX.txt or prefix.sXX.txt from kinship/relatedness matrix estimation"<<endl; - cout<<" output: prefix.assoc.txt and prefix.log.txt form association tests"<<endl; - cout<<endl; + cout << " -oxford [prefix] " + << " specify input Oxford genotype bgen file prefix." << endl; + cout << " requires: *.bgen, *.sample files" << endl; + + cout << " -gxe [filename] " + << " specify input file that contains a column of environmental " + "factor for g by e tests" + << endl; + cout << " format: variable for individual 1" << endl; + cout << " variable for individual 2" << endl; + cout << " ..." << endl; + cout << " missing value: NA" << endl; + cout << " -widv [filename] " + << " specify input file that contains a column of residual weights" + << endl; + cout << " format: variable for individual 1" << endl; + cout << " variable for individual 2" << endl; + cout << " ..." << endl; + cout << " missing value: NA" << endl; + cout << " -k [filename] " + << " specify input kinship/relatedness matrix file name" << endl; + cout << " -mk [filename] " + << " specify input file which contains a list of kinship/relatedness " + "matrices" + << endl; + cout << " -u [filename] " + << " specify input file containing the eigen vectors of the " + "kinship/relatedness matrix" + << endl; + cout << " -d [filename] " + << " specify input file containing the eigen values of the " + "kinship/relatedness matrix" + << endl; + cout << " -c [filename] " + << " specify input covariates file name (optional)" << endl; + cout << " -cat [filename] " + << " specify input category file name (optional), which contains rs " + "cat1 cat2 ..." + << endl; + cout << " -beta [filename] " + << " specify input beta file name (optional), which contains rs beta " + "se_beta n_total (or n_mis and n_obs) estimates from a lm model" + << endl; + cout << " -cor [filename] " + << " specify input correlation file name (optional), which contains " + "rs window_size correlations from snps" + << endl; + cout << " missing value: NA" << endl; + cout << " note: the intercept (a column of 1s) may need to be " + "included" + << endl; + cout << " -epm [filename] " + << " specify input estimated parameter file name" << endl; + cout << " -en [n1] [n2] [n3] [n4] " + << " specify values for the input estimated parameter file (with a " + "header)" + << endl; + cout << " options: n1: rs column number" << endl; + cout << " n2: estimated alpha column number (0 to ignore)" + << endl; + cout << " n3: estimated beta column number (0 to ignore)" + << endl; + cout << " n4: estimated gamma column number (0 to ignore)" + << endl; + cout << " default: 2 4 5 6 if -ebv is not specified; 2 0 5 6 if " + "-ebv is specified" + << endl; + cout << " -ebv [filename] " + << " specify input estimated random effect (breeding value) file name" + << endl; + cout << " format: value for individual 1" << endl; + cout << " value for individual 2" << endl; + cout << " ..." << endl; + cout << " missing value: NA" << endl; + cout << " -emu [filename] " + << " specify input log file name containing estimated mean" << endl; + cout << " -mu [num] " + << " specify input estimated mean value" << endl; + cout << " -gene [filename] " + << " specify input gene expression file name" << endl; + cout << " format: header" << endl; + cout << " gene1, count for individual 1, count for " + "individual 2, ..." + << endl; + cout << " gene2, count for individual 1, count for " + "individual 2, ..." + << endl; + cout << " ..." << endl; + cout << " missing value: not allowed" << endl; + cout << " -r [filename] " + << " specify input total read count file name" << endl; + cout << " format: total read count for individual 1" << endl; + cout << " total read count for individual 2" << endl; + cout << " ..." << endl; + cout << " missing value: NA" << endl; + cout + << " -snps [filename] " + << " specify input snps file name to only analyze a certain set of snps" + << endl; + cout << " format: rs#1" << endl; + cout << " rs#2" << endl; + cout << " ..." << endl; + cout << " missing value: NA" << endl; + cout << " -silence " + << " silent terminal display" << endl; + cout << " -km [num] " + << " specify input kinship/relatedness file type (default 1)." << endl; + cout << " options: 1: \"n by n matrix\" format" << endl; + cout << " 2: \"id id value\" format" << endl; + cout << " -n [num] " + << " specify phenotype column in the phenotype/*.fam file (optional; " + "default 1)" + << endl; + cout << " -pace [num] " + << " specify terminal display update pace (default 100000 SNPs or " + "100000 iterations)." + << endl; + cout << " -outdir [path] " + << " specify output directory path (default \"./output/\")" << endl; + cout << " -o [prefix] " + << " specify output file prefix (default \"result\")" << endl; + cout << " output: prefix.cXX.txt or prefix.sXX.txt from " + "kinship/relatedness matrix estimation" + << endl; + cout << " output: prefix.assoc.txt and prefix.log.txt form " + "association tests" + << endl; + cout << endl; } - if (option==3) { - cout<<" SNP QC OPTIONS" << endl; - cout<<" -miss [num] "<<" specify missingness threshold (default 0.05)" << endl; - cout<<" -maf [num] "<<" specify minor allele frequency threshold (default 0.01)" << endl; - cout<<" -hwe [num] "<<" specify HWE test p value threshold (default 0; no test)" << endl; - cout<<" -r2 [num] "<<" specify r-squared threshold (default 0.9999)" << endl; - cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl; - cout<<endl; + if (option == 3) { + cout << " SNP QC OPTIONS" << endl; + cout << " -miss [num] " + << " specify missingness threshold (default 0.05)" << endl; + cout << " -maf [num] " + << " specify minor allele frequency threshold (default 0.01)" << endl; + cout << " -hwe [num] " + << " specify HWE test p value threshold (default 0; no test)" << endl; + cout << " -r2 [num] " + << " specify r-squared threshold (default 0.9999)" << endl; + cout << " -notsnp " + << " minor allele frequency cutoff is not used" << endl; + cout << endl; } - if (option==4) { - cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl; - cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl; - cout<<" options: 1: centered XX^T/p"<<endl; - cout<<" 2: standardized XX^T/p"<<endl; - cout<<" note: non-polymorphic SNPs are excluded "<<endl; - cout<<endl; + if (option == 4) { + cout << " RELATEDNESS MATRIX CALCULATION OPTIONS" << endl; + cout << " -gk [num] " + << " specify which type of kinship/relatedness matrix to generate " + "(default 1)" + << endl; + cout << " options: 1: centered XX^T/p" << endl; + cout << " 2: standardized XX^T/p" << endl; + cout << " note: non-polymorphic SNPs are excluded " << endl; + cout << endl; } - if (option==5) { - cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl; - cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl; - cout<<endl; + if (option == 5) { + cout << " EIGEN-DECOMPOSITION OPTIONS" << endl; + cout << " -eigen " + << " specify to perform eigen decomposition of the loaded relatedness " + "matrix" + << endl; + cout << endl; } - if (option==6) { - cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl; - cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl; - cout<<" options (with kinship file): 1: HE regression (default)"<<endl; - cout<<" 2: REML"<<endl; - cout<<" options (with beta/cor files): 1: Centered genotypes (default)"<<endl; - cout<<" 2: Standardized genotypes"<<endl; - cout<<" -crt -windowbp [num]"<<" specify the window size based on bp (default 1000000; 1Mb)"<<endl; - cout<<" -crt -windowcm [num]"<<" specify the window size based on cm (default 0)"<<endl; - cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl; - cout<<endl; + if (option == 6) { + cout << " VARIANCE COMPONENT ESTIMATION OPTIONS" << endl; + cout << " -vc " + << " specify to perform variance component estimation for the loaded " + "relatedness matrix/matrices" + << endl; + cout + << " options (with kinship file): 1: HE regression (default)" + << endl; + cout << " 2: REML" << endl; + cout << " options (with beta/cor files): 1: Centered genotypes " + "(default)" + << endl; + cout << " 2: Standardized genotypes" + << endl; + cout << " -crt -windowbp [num]" + << " specify the window size based on bp (default 1000000; 1Mb)" + << endl; + cout << " -crt -windowcm [num]" + << " specify the window size based on cm (default 0)" << endl; + cout << " -crt -windowns [num]" + << " specify the window size based on number of snps (default 0)" + << endl; + cout << endl; } - if (option==7) { - cout<<" LINEAR MODEL OPTIONS" << endl; - cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl; - cout<<" options: 1: Wald test"<<endl; - cout<<" 2: Likelihood ratio test"<<endl; - cout<<" 3: Score test"<<endl; - cout<<" 4: 1-3"<<endl; - cout<<endl; + if (option == 7) { + cout << " LINEAR MODEL OPTIONS" << endl; + cout << " -lm [num] " + << " specify analysis options (default 1)." << endl; + cout << " options: 1: Wald test" << endl; + cout << " 2: Likelihood ratio test" << endl; + cout << " 3: Score test" << endl; + cout << " 4: 1-3" << endl; + cout << endl; } - if (option==8) { - cout<<" LINEAR MIXED MODEL OPTIONS" << endl; - cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl; - cout<<" options: 1: Wald test"<<endl; - cout<<" 2: Likelihood ratio test"<<endl; - cout<<" 3: Score test"<<endl; - cout<<" 4: 1-3"<<endl; - cout<<" 5: Parameter estimation in the null model only"<<endl; - cout<<" -lmin [num] "<<" specify minimal value for lambda (default 1e-5)" << endl; - cout<<" -lmax [num] "<<" specify maximum value for lambda (default 1e+5)" << endl; - cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl; - cout<<endl; + if (option == 8) { + cout << " LINEAR MIXED MODEL OPTIONS" << endl; + cout << " -lmm [num] " + << " specify analysis options (default 1)." << endl; + cout << " options: 1: Wald test" << endl; + cout << " 2: Likelihood ratio test" << endl; + cout << " 3: Score test" << endl; + cout << " 4: 1-3" << endl; + cout << " 5: Parameter estimation in the null model only" + << endl; + cout << " -lmin [num] " + << " specify minimal value for lambda (default 1e-5)" << endl; + cout << " -lmax [num] " + << " specify maximum value for lambda (default 1e+5)" << endl; + cout + << " -region [num] " + << " specify the number of regions used to evaluate lambda (default 10)" + << endl; + cout << endl; } - if (option==9) { - cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl; - cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl; - cout<<" -emi "<<" specify the maximum number of iterations for the PX-EM method in the null (default 10000)"<<endl; - cout<<" -nri "<<" specify the maximum number of iterations for the Newton-Raphson's method in the null (default 100)"<<endl; - cout<<" -emp "<<" specify the precision for the PX-EM method in the null (default 0.0001)"<<endl; - cout<<" -nrp "<<" specify the precision for the Newton-Raphson's method in the null (default 0.0001)"<<endl; - cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl; - cout<<endl; + if (option == 9) { + cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl; + cout << " -pnr " + << " specify the pvalue threshold to use the Newton-Raphson's method " + "(default 0.001)" + << endl; + cout << " -emi " + << " specify the maximum number of iterations for the PX-EM method in " + "the null (default 10000)" + << endl; + cout << " -nri " + << " specify the maximum number of iterations for the " + "Newton-Raphson's method in the null (default 100)" + << endl; + cout << " -emp " + << " specify the precision for the PX-EM method in the null (default " + "0.0001)" + << endl; + cout << " -nrp " + << " specify the precision for the Newton-Raphson's method in the " + "null (default 0.0001)" + << endl; + cout << " -crt " + << " specify to output corrected pvalues for these pvalues that are " + "below the -pnr threshold" + << endl; + cout << endl; } - if (option==10) { - cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl; - cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl; - cout<<" options: 1: BSLMM"<<endl; - cout<<" 2: standard ridge regression/GBLUP (no mcmc)"<<endl; - cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl; - cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl; - cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl; - - cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl; - cout<<" options: 1: LDR"<<endl; - - cout<<" MCMC OPTIONS" << endl; - cout<<" Prior" << endl; - cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl; - cout<<" -hmax [num] "<<" specify maximum value for h (default 1)" << endl; - cout<<" -rmin [num] "<<" specify minimum value for rho (default 0)" << endl; - cout<<" -rmax [num] "<<" specify maximum value for rho (default 1)" << endl; - cout<<" -pmin [num] "<<" specify minimum value for log10(pi) (default log10(1/p), where p is the number of analyzed SNPs )" << endl; - cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl; - cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl; - cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl; - - cout<<" Proposal" << endl; - cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl; - cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; - cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; - cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl; - - cout<<" Others" << endl; - cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl; - cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl; - cout<<" -rpace [num] "<<" specify recording pace, record one state in every [num] steps (default 10)" << endl; - cout<<" -wpace [num] "<<" specify writing pace, write values down in every [num] recorded steps (default 1000)" << endl; - cout<<" -seed [num] "<<" specify random seed (a random seed is generated by default)" << endl; - cout<<" -mh [num] "<<" specify number of MH steps in each iteration (default 10)" << endl; - cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl; - cout<<endl; + if (option == 10) { + cout << " MULTI-LOCUS ANALYSIS OPTIONS" << endl; + cout << " -bslmm [num] " + << " specify analysis options (default 1)." << endl; + cout << " options: 1: BSLMM" << endl; + cout << " 2: standard ridge regression/GBLUP (no mcmc)" + << endl; + cout << " 3: probit BSLMM (requires 0/1 phenotypes)" + << endl; + cout + << " 4: BSLMM with DAP for Hyper Parameter Estimation" + << endl; + cout << " 5: BSLMM with DAP for Fine Mapping" << endl; + + cout << " -ldr [num] " + << " specify analysis options (default 1)." << endl; + cout << " options: 1: LDR" << endl; + + cout << " MCMC OPTIONS" << endl; + cout << " Prior" << endl; + cout << " -hmin [num] " + << " specify minimum value for h (default 0)" << endl; + cout << " -hmax [num] " + << " specify maximum value for h (default 1)" << endl; + cout << " -rmin [num] " + << " specify minimum value for rho (default 0)" << endl; + cout << " -rmax [num] " + << " specify maximum value for rho (default 1)" << endl; + cout << " -pmin [num] " + << " specify minimum value for log10(pi) (default log10(1/p), where p " + "is the number of analyzed SNPs )" + << endl; + cout << " -pmax [num] " + << " specify maximum value for log10(pi) (default log10(1) )" << endl; + cout << " -smin [num] " + << " specify minimum value for |gamma| (default 0)" << endl; + cout << " -smax [num] " + << " specify maximum value for |gamma| (default 300)" << endl; + + cout << " Proposal" << endl; + cout << " -gmean [num] " + << " specify the mean for the geometric distribution (default: 2000)" + << endl; + cout << " -hscale [num] " + << " specify the step size scale for the proposal distribution of h " + "(value between 0 and 1, default min(10/sqrt(n),1) )" + << endl; + cout << " -rscale [num] " + << " specify the step size scale for the proposal distribution of rho " + "(value between 0 and 1, default min(10/sqrt(n),1) )" + << endl; + cout << " -pscale [num] " + << " specify the step size scale for the proposal distribution of " + "log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" + << endl; + + cout << " Others" << endl; + cout << " -w [num] " + << " specify burn-in steps (default 100,000)" << endl; + cout << " -s [num] " + << " specify sampling steps (default 1,000,000)" << endl; + cout << " -rpace [num] " + << " specify recording pace, record one state in every [num] steps " + "(default 10)" + << endl; + cout << " -wpace [num] " + << " specify writing pace, write values down in every [num] recorded " + "steps (default 1000)" + << endl; + cout << " -seed [num] " + << " specify random seed (a random seed is generated by default)" + << endl; + cout << " -mh [num] " + << " specify number of MH steps in each iteration (default 10)" + << endl; + cout << " requires: 0/1 phenotypes and -bslmm 3 option" << endl; + cout << endl; } - if (option==11) { - cout<<" PREDICTION OPTIONS" << endl; - cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl; - cout<<" options: 1: predict for individuals with missing phenotypes"<<endl; - cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl; - cout<<endl; + if (option == 11) { + cout << " PREDICTION OPTIONS" << endl; + cout << " -predict [num] " + << " specify prediction options (default 1)." << endl; + cout << " options: 1: predict for individuals with missing " + "phenotypes" + << endl; + cout << " 2: predict for individuals with missing " + "phenotypes, and convert the predicted values to probability " + "scale. Use only for files fitted with -bslmm 3 option" + << endl; + cout << endl; } - if (option==12) { - cout<<" CALC CORRELATION OPTIONS" << endl; - cout<<" -calccor "<<endl; - cout<<" -windowbp [num] "<<" specify the window size based on bp (default 1000000; 1Mb)" << endl; - cout<<" -windowcm [num] "<<" specify the window size based on cm (default 0; not used)" << endl; - cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl; - cout<<endl; + if (option == 12) { + cout << " CALC CORRELATION OPTIONS" << endl; + cout << " -calccor " << endl; + cout << " -windowbp [num] " + << " specify the window size based on bp (default 1000000; 1Mb)" + << endl; + cout << " -windowcm [num] " + << " specify the window size based on cm (default 0; not used)" + << endl; + cout << " -windowns [num] " + << " specify the window size based on number of snps (default 0; not " + "used)" + << endl; + cout << endl; } - if (option==13) { - cout<<" NOTE"<<endl; - cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl; - cout<<" 2. Missing genotoypes will be repalced with the mean genotype of that SNP."<<endl; - cout<<" 3. For lmm analysis, memory should be large enough to hold the relatedness matrix and to perform eigen decomposition."<<endl; - cout<<" 4. For multivariate lmm analysis, use a large -pnr for each snp will increase computation time dramatically."<<endl; - cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl; - cout<<endl; + if (option == 13) { + cout << " NOTE" << endl; + cout << " 1. Only individuals with non-missing phenotoypes and covariates " + "will be analyzed." + << endl; + cout << " 2. Missing genotoypes will be repalced with the mean genotype of " + "that SNP." + << endl; + cout << " 3. For lmm analysis, memory should be large enough to hold the " + "relatedness matrix and to perform eigen decomposition." + << endl; + cout << " 4. For multivariate lmm analysis, use a large -pnr for each snp " + "will increase computation time dramatically." + << endl; + cout << " 5. For bslmm analysis, in addition to 3, memory should be large " + "enough to hold the whole genotype matrix." + << endl; + cout << endl; } return; @@ -457,2609 +700,2985 @@ void GEMMA::PrintHelp(size_t option) { // calccor: 71 // gw: 72 -void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) { - string str; - - for(int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-bfile")==0 || - strcmp(argv[i], "--bfile")==0 || - strcmp(argv[i], "-b")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_bfile=str; - } - else if (strcmp(argv[i], "-mbfile")==0 || - strcmp(argv[i], "--mbfile")==0 || - strcmp(argv[i], "-mb")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_mbfile=str; - } - else if (strcmp(argv[i], "-silence")==0) { - cPar.mode_silence=true; - } - else if (strcmp(argv[i], "-g")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_geno=str; - } - else if (strcmp(argv[i], "-mg")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_mgeno=str; - } - else if (strcmp(argv[i], "-p")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_pheno=str; - } - else if (strcmp(argv[i], "-a")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_anno=str; - } - - // WJA added. - else if (strcmp(argv[i], "-oxford")==0 || - strcmp(argv[i], "--oxford")==0 || - strcmp(argv[i], "-x")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_oxford=str; - } - else if (strcmp(argv[i], "-gxe")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_gxe=str; - } - else if (strcmp(argv[i], "-widv")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_weight=str; - } - else if (strcmp(argv[i], "-wsnp")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_wsnp=str; - } - else if (strcmp(argv[i], "-wcat")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_wcat=str; - } - else if (strcmp(argv[i], "-k")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_kin=str; - } - else if (strcmp(argv[i], "-mk")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_mk=str; - } - else if (strcmp(argv[i], "-u")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_ku=str; - } - else if (strcmp(argv[i], "-d")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_kd=str; - } - else if (strcmp(argv[i], "-c")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_cvt=str; - } - else if (strcmp(argv[i], "-cat")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_cat=str; - } - else if (strcmp(argv[i], "-mcat")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_mcat=str; - } - else if (strcmp(argv[i], "-catc")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_catc=str; - } - else if (strcmp(argv[i], "-mcatc")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_mcatc=str; - } - else if (strcmp(argv[i], "-beta")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_beta=str; - } - else if (strcmp(argv[i], "-bf")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_bf=str; - } - else if (strcmp(argv[i], "-hyp")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_hyp=str; - } - else if (strcmp(argv[i], "-cor")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_cor=str; - } - else if (strcmp(argv[i], "-study")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_study=str; - } - else if (strcmp(argv[i], "-ref")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_ref=str; - } - else if (strcmp(argv[i], "-mstudy")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_mstudy=str; - } - else if (strcmp(argv[i], "-mref")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_mref=str; - } - else if (strcmp(argv[i], "-epm")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_epm=str; - } - else if (strcmp(argv[i], "-en")==0) { - while (argv[i+1] != NULL && argv[i+1][0] != '-') { - ++i; - str.clear(); - str.assign(argv[i]); - cPar.est_column.push_back(atoi(str.c_str())); - } - } - else if (strcmp(argv[i], "-ebv")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_ebv=str; - } - else if (strcmp(argv[i], "-emu")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_log=str; - } - else if (strcmp(argv[i], "-mu")==0) { - if(argv[i+1] == NULL) {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.pheno_mean=atof(str.c_str()); - } - else if (strcmp(argv[i], "-gene")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_gene=str; - } - else if (strcmp(argv[i], "-r")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_read=str; - } - else if (strcmp(argv[i], "-snps")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_snps=str; - } - else if (strcmp(argv[i], "-km")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.k_mode=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-n")==0) { - (cPar.p_column).clear(); - while (argv[i+1] != NULL && argv[i+1][0] != '-') { - ++i; - str.clear(); - str.assign(argv[i]); - (cPar.p_column).push_back(atoi(str.c_str())); - } - } - else if (strcmp(argv[i], "-pace")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.d_pace=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-outdir")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.path_out=str; - } - else if (strcmp(argv[i], "-o")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_out=str; - } - else if (strcmp(argv[i], "-miss")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.miss_level=atof(str.c_str()); - } - else if (strcmp(argv[i], "-maf")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - if (cPar.maf_level!=-1) {cPar.maf_level=atof(str.c_str());} - } - else if (strcmp(argv[i], "-hwe")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.hwe_level=atof(str.c_str()); - } - else if (strcmp(argv[i], "-r2")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.r2_level=atof(str.c_str()); - } - else if (strcmp(argv[i], "-notsnp")==0) { - cPar.maf_level=-1; - } - else if (strcmp(argv[i], "-gk")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=21; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=20+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-gs")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=25; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=24+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-gq")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=27; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=26+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-gw")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=72; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=71+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-sample")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.ni_subsample=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-eigen")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=31; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=30+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-calccor")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=71; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=70+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-vc")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=61; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=60+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-ci")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=66; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=65+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-pve")==0) { - double s=0; - while (argv[i+1] != NULL && (argv[i+1][0] != '-' || !isalpha(argv[i+1][1]) ) ) { - ++i; - str.clear(); - str.assign(argv[i]); - cPar.v_pve.push_back(atof(str.c_str())); - s+=atof(str.c_str()); - } - if (s==1) { - cout<<"summation of pve equals one."<<endl; - } - } - else if (strcmp(argv[i], "-blocks")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.n_block=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-noconstrain")==0) { - cPar.noconstrain=true; - } - else if (strcmp(argv[i], "-lm")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=51; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=50+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-fa")==0 || strcmp(argv[i], "-lmm")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=1; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-lmin")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.l_min=atof(str.c_str()); - } - else if (strcmp(argv[i], "-lmax")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.l_max=atof(str.c_str()); - } - else if (strcmp(argv[i], "-region")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.n_region=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-pnr")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.p_nr=atof(str.c_str()); - } - else if (strcmp(argv[i], "-emi")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.em_iter=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-nri")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.nr_iter=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-emp")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.em_prec=atof(str.c_str()); - } - else if (strcmp(argv[i], "-nrp")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.nr_prec=atof(str.c_str()); - } - else if (strcmp(argv[i], "-crt")==0) { - cPar.crt=1; - } - else if (strcmp(argv[i], "-bslmm")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=11; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=10+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-hmin")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.h_min=atof(str.c_str()); - } - else if (strcmp(argv[i], "-hmax")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.h_max=atof(str.c_str()); - } - else if (strcmp(argv[i], "-rmin")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.rho_min=atof(str.c_str()); - } - else if (strcmp(argv[i], "-rmax")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.rho_max=atof(str.c_str()); - } - else if (strcmp(argv[i], "-pmin")==0) { - if(argv[i+1] == NULL) {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.logp_min=atof(str.c_str())*log(10.0); - } - else if (strcmp(argv[i], "-pmax")==0) { - if(argv[i+1] == NULL) {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.logp_max=atof(str.c_str())*log(10.0); - } - else if (strcmp(argv[i], "-smin")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.s_min=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-smax")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.s_max=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-gmean")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.geo_mean=atof(str.c_str()); - } - else if (strcmp(argv[i], "-hscale")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.h_scale=atof(str.c_str()); - } - else if (strcmp(argv[i], "-rscale")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.rho_scale=atof(str.c_str()); - } - else if (strcmp(argv[i], "-pscale")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.logp_scale=atof(str.c_str())*log(10.0); - } - else if (strcmp(argv[i], "-w")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.w_step=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-s")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.s_step=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-rpace")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.r_pace=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-wpace")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.w_pace=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-seed")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.randseed=atol(str.c_str()); - } - else if (strcmp(argv[i], "-mh")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.n_mh=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-predict")==0) { - if (cPar.a_mode!=0) {cPar.error=true; cout<<"error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm -predict -calccor options is allowed."<<endl; break;} - if(argv[i+1] == NULL || argv[i+1][0] == '-') {cPar.a_mode=41; continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.a_mode=40+atoi(str.c_str()); - } - else if (strcmp(argv[i], "-windowcm")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.window_cm=atof(str.c_str()); - } - else if (strcmp(argv[i], "-windowbp")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.window_bp=atoi(str.c_str()); - } - else if (strcmp(argv[i], "-windowns")==0) { - if(argv[i+1] == NULL || argv[i+1][0] == '-') {continue;} - ++i; - str.clear(); - str.assign(argv[i]); - cPar.window_ns=atoi(str.c_str()); - } - else {cout<<"error! unrecognized option: "<<argv[i]<<endl; cPar.error=true; continue;} - } - - // Change prediction mode to 43 if the epm file is not provided. - if (cPar.a_mode==41 && cPar.file_epm.empty()) { - cPar.a_mode=43; - } - - return; +void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { + string str; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-bfile") == 0 || strcmp(argv[i], "--bfile") == 0 || + strcmp(argv[i], "-b") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_bfile = str; + } else if (strcmp(argv[i], "-mbfile") == 0 || + strcmp(argv[i], "--mbfile") == 0 || + strcmp(argv[i], "-mb") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_mbfile = str; + } else if (strcmp(argv[i], "-silence") == 0) { + cPar.mode_silence = true; + } else if (strcmp(argv[i], "-g") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_geno = str; + } else if (strcmp(argv[i], "-mg") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_mgeno = str; + } else if (strcmp(argv[i], "-p") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_pheno = str; + } else if (strcmp(argv[i], "-a") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_anno = str; + } + + // WJA added. + else if (strcmp(argv[i], "-oxford") == 0 || + strcmp(argv[i], "--oxford") == 0 || strcmp(argv[i], "-x") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_oxford = str; + } else if (strcmp(argv[i], "-gxe") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_gxe = str; + } else if (strcmp(argv[i], "-widv") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_weight = str; + } else if (strcmp(argv[i], "-wsnp") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_wsnp = str; + } else if (strcmp(argv[i], "-wcat") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_wcat = str; + } else if (strcmp(argv[i], "-k") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_kin = str; + } else if (strcmp(argv[i], "-mk") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_mk = str; + } else if (strcmp(argv[i], "-u") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_ku = str; + } else if (strcmp(argv[i], "-d") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_kd = str; + } else if (strcmp(argv[i], "-c") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_cvt = str; + } else if (strcmp(argv[i], "-cat") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_cat = str; + } else if (strcmp(argv[i], "-mcat") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_mcat = str; + } else if (strcmp(argv[i], "-catc") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_catc = str; + } else if (strcmp(argv[i], "-mcatc") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_mcatc = str; + } else if (strcmp(argv[i], "-beta") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_beta = str; + } else if (strcmp(argv[i], "-bf") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_bf = str; + } else if (strcmp(argv[i], "-hyp") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_hyp = str; + } else if (strcmp(argv[i], "-cor") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_cor = str; + } else if (strcmp(argv[i], "-study") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_study = str; + } else if (strcmp(argv[i], "-ref") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_ref = str; + } else if (strcmp(argv[i], "-mstudy") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_mstudy = str; + } else if (strcmp(argv[i], "-mref") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_mref = str; + } else if (strcmp(argv[i], "-epm") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_epm = str; + } else if (strcmp(argv[i], "-en") == 0) { + while (argv[i + 1] != NULL && argv[i + 1][0] != '-') { + ++i; + str.clear(); + str.assign(argv[i]); + cPar.est_column.push_back(atoi(str.c_str())); + } + } else if (strcmp(argv[i], "-ebv") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_ebv = str; + } else if (strcmp(argv[i], "-emu") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_log = str; + } else if (strcmp(argv[i], "-mu") == 0) { + if (argv[i + 1] == NULL) { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.pheno_mean = atof(str.c_str()); + } else if (strcmp(argv[i], "-gene") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_gene = str; + } else if (strcmp(argv[i], "-r") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_read = str; + } else if (strcmp(argv[i], "-snps") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_snps = str; + } else if (strcmp(argv[i], "-km") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.k_mode = atoi(str.c_str()); + } else if (strcmp(argv[i], "-n") == 0) { + (cPar.p_column).clear(); + while (argv[i + 1] != NULL && argv[i + 1][0] != '-') { + ++i; + str.clear(); + str.assign(argv[i]); + (cPar.p_column).push_back(atoi(str.c_str())); + } + } else if (strcmp(argv[i], "-pace") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.d_pace = atoi(str.c_str()); + } else if (strcmp(argv[i], "-outdir") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.path_out = str; + } else if (strcmp(argv[i], "-o") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.file_out = str; + } else if (strcmp(argv[i], "-miss") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.miss_level = atof(str.c_str()); + } else if (strcmp(argv[i], "-maf") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + if (cPar.maf_level != -1) { + cPar.maf_level = atof(str.c_str()); + } + } else if (strcmp(argv[i], "-hwe") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.hwe_level = atof(str.c_str()); + } else if (strcmp(argv[i], "-r2") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.r2_level = atof(str.c_str()); + } else if (strcmp(argv[i], "-notsnp") == 0) { + cPar.maf_level = -1; + } else if (strcmp(argv[i], "-gk") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 21; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 20 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-gs") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 25; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 24 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-gq") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 27; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 26 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-gw") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 72; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 71 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-sample") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.ni_subsample = atoi(str.c_str()); + } else if (strcmp(argv[i], "-eigen") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 31; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 30 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-calccor") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 71; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 70 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-vc") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 61; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 60 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-ci") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 66; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 65 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-pve") == 0) { + double s = 0; + while (argv[i + 1] != NULL && + (argv[i + 1][0] != '-' || !isalpha(argv[i + 1][1]))) { + ++i; + str.clear(); + str.assign(argv[i]); + cPar.v_pve.push_back(atof(str.c_str())); + s += atof(str.c_str()); + } + if (s == 1) { + cout << "summation of pve equals one." << endl; + } + } else if (strcmp(argv[i], "-blocks") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.n_block = atoi(str.c_str()); + } else if (strcmp(argv[i], "-noconstrain") == 0) { + cPar.noconstrain = true; + } else if (strcmp(argv[i], "-lm") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 51; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 50 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-fa") == 0 || strcmp(argv[i], "-lmm") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 1; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = atoi(str.c_str()); + } else if (strcmp(argv[i], "-lmin") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.l_min = atof(str.c_str()); + } else if (strcmp(argv[i], "-lmax") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.l_max = atof(str.c_str()); + } else if (strcmp(argv[i], "-region") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.n_region = atoi(str.c_str()); + } else if (strcmp(argv[i], "-pnr") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.p_nr = atof(str.c_str()); + } else if (strcmp(argv[i], "-emi") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.em_iter = atoi(str.c_str()); + } else if (strcmp(argv[i], "-nri") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.nr_iter = atoi(str.c_str()); + } else if (strcmp(argv[i], "-emp") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.em_prec = atof(str.c_str()); + } else if (strcmp(argv[i], "-nrp") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.nr_prec = atof(str.c_str()); + } else if (strcmp(argv[i], "-crt") == 0) { + cPar.crt = 1; + } else if (strcmp(argv[i], "-bslmm") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 11; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 10 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-hmin") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.h_min = atof(str.c_str()); + } else if (strcmp(argv[i], "-hmax") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.h_max = atof(str.c_str()); + } else if (strcmp(argv[i], "-rmin") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.rho_min = atof(str.c_str()); + } else if (strcmp(argv[i], "-rmax") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.rho_max = atof(str.c_str()); + } else if (strcmp(argv[i], "-pmin") == 0) { + if (argv[i + 1] == NULL) { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.logp_min = atof(str.c_str()) * log(10.0); + } else if (strcmp(argv[i], "-pmax") == 0) { + if (argv[i + 1] == NULL) { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.logp_max = atof(str.c_str()) * log(10.0); + } else if (strcmp(argv[i], "-smin") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.s_min = atoi(str.c_str()); + } else if (strcmp(argv[i], "-smax") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.s_max = atoi(str.c_str()); + } else if (strcmp(argv[i], "-gmean") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.geo_mean = atof(str.c_str()); + } else if (strcmp(argv[i], "-hscale") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.h_scale = atof(str.c_str()); + } else if (strcmp(argv[i], "-rscale") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.rho_scale = atof(str.c_str()); + } else if (strcmp(argv[i], "-pscale") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.logp_scale = atof(str.c_str()) * log(10.0); + } else if (strcmp(argv[i], "-w") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.w_step = atoi(str.c_str()); + } else if (strcmp(argv[i], "-s") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.s_step = atoi(str.c_str()); + } else if (strcmp(argv[i], "-rpace") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.r_pace = atoi(str.c_str()); + } else if (strcmp(argv[i], "-wpace") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.w_pace = atoi(str.c_str()); + } else if (strcmp(argv[i], "-seed") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.randseed = atol(str.c_str()); + } else if (strcmp(argv[i], "-mh") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.n_mh = atoi(str.c_str()); + } else if (strcmp(argv[i], "-predict") == 0) { + if (cPar.a_mode != 0) { + cPar.error = true; + cout << "error! only one of -gk -gs -eigen -vc -lm -lmm -bslmm " + "-predict -calccor options is allowed." + << endl; + break; + } + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + cPar.a_mode = 41; + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.a_mode = 40 + atoi(str.c_str()); + } else if (strcmp(argv[i], "-windowcm") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.window_cm = atof(str.c_str()); + } else if (strcmp(argv[i], "-windowbp") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.window_bp = atoi(str.c_str()); + } else if (strcmp(argv[i], "-windowns") == 0) { + if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { + continue; + } + ++i; + str.clear(); + str.assign(argv[i]); + cPar.window_ns = atoi(str.c_str()); + } else { + cout << "error! unrecognized option: " << argv[i] << endl; + cPar.error = true; + continue; + } + } + + // Change prediction mode to 43 if the epm file is not provided. + if (cPar.a_mode == 41 && cPar.file_epm.empty()) { + cPar.a_mode = 43; + } + + return; } -void GEMMA::BatchRun (PARAM &cPar) { - clock_t time_begin, time_start; - time_begin=clock(); - - // Read Files. - cout<<"Reading Files ... "<<endl; - cPar.ReadFiles(); - if (cPar.error==true) {cout<<"error! fail to read files. "<<endl; return;} - cPar.CheckData(); - if (cPar.error==true) {cout<<"error! fail to check data. "<<endl; return;} - - //Prediction for bslmm - if (cPar.a_mode==41 || cPar.a_mode==42) { - gsl_vector *y_prdt; - - y_prdt=gsl_vector_alloc (cPar.ni_total-cPar.ni_test); - - //set to zero - gsl_vector_set_zero (y_prdt); - - PRDT cPRDT; - cPRDT.CopyFromParam(cPar); - - //add breeding value if needed - if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) { - cout<<"Adding Breeding Values ... "<<endl; - - gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total); - gsl_vector *u_hat=gsl_vector_alloc (cPar.ni_test); - - //read kinship matrix and set u_hat - vector<int> indicator_all; - size_t c_bv=0; - for (size_t i=0; i<cPar.indicator_idv.size(); i++) { - indicator_all.push_back(1); - if (cPar.indicator_bv[i]==1) {gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]); c_bv++;} - } - - ReadFile_kin (cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode, cPar.error, G); - if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} - - //read u - cPRDT.AddBV(G, u_hat, y_prdt); - - gsl_matrix_free(G); - gsl_vector_free(u_hat); - } - - //add beta - if (!cPar.file_bfile.empty()) { - cPRDT.AnalyzePlink (y_prdt); - } - else { - cPRDT.AnalyzeBimbam (y_prdt); - } - - //add mu - gsl_vector_add_constant(y_prdt, cPar.pheno_mean); - - //convert y to probability if needed - if (cPar.a_mode==42) { - double d; - for (size_t i=0; i<y_prdt->size; i++) { - d=gsl_vector_get(y_prdt, i); - d=gsl_cdf_gaussian_P(d, 1.0); - gsl_vector_set(y_prdt, i, d); - } - } - - - cPRDT.CopyToParam(cPar); - - cPRDT.WriteFiles(y_prdt); - - gsl_vector_free(y_prdt); - } - - //Prediction with kinship matrix only; for one or more phenotypes - if (cPar.a_mode==43) { - //first, use individuals with full phenotypes to obtain estimates of Vg and Ve - gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); - gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); - gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1); - gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1); - gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2); - gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2); - gsl_vector *eval=gsl_vector_alloc (Y->size1); - - gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph); - gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt); - - //set covariates matrix W and phenotype matrix Y - //an intercept should be included in W, - cPar.CopyCvtPhen (W, Y, 0); - cPar.CopyCvtPhen (W_full, Y_full, 1); - - gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph); - gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1); - gsl_matrix *H_full=gsl_matrix_alloc (Y_full->size1*Y_hat->size2, Y_full->size1*Y_hat->size2); - - //read relatedness matrix G, and matrix G_full - ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); - if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} - ReadFile_kin (cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, cPar.error, G_full); - if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} - - //center matrix G - CenterMatrix (G); - CenterMatrix (G_full); - - //eigen-decomposition and calculate trace_G - cout<<"Start Eigen-Decomposition..."<<endl; - time_start=clock(); - cPar.trace_G=EigenDecomp (G, U, eval, 0); - cPar.trace_G=0.0; - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} - cPar.trace_G+=gsl_vector_get (eval, i); - } - cPar.trace_G/=(double)eval->size; - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //calculate UtW and Uty - CalcUtX (U, W, UtW); - CalcUtX (U, Y, UtY); - - //calculate variance component and beta estimates - //and then obtain predicted values - if (cPar.n_ph==1) { - gsl_vector *beta=gsl_vector_alloc (W->size2); - gsl_vector *se_beta=gsl_vector_alloc (W->size2); - - double lambda, logl, vg, ve; - gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); - - //obtain estimates - CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, lambda, logl); - CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, lambda, vg, ve, beta, se_beta); - - cout<<"REMLE estimate for vg in the null model = "<<vg<<endl; - cout<<"REMLE estimate for ve in the null model = "<<ve<<endl; - cPar.vg_remle_null=vg; cPar.ve_remle_null=ve; - - //obtain Y_hat from fixed effects - gsl_vector_view Yhat_col=gsl_matrix_column (Y_hat, 0); - gsl_blas_dgemv (CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector); - - //obtain H - gsl_matrix_set_identity (H_full); - gsl_matrix_scale (H_full, ve); - gsl_matrix_scale (G_full, vg); - gsl_matrix_add (H_full, G_full); - - //free matrices - gsl_vector_free(beta); - gsl_vector_free(se_beta); - } else { - gsl_matrix *Vg=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph); - gsl_matrix *Ve=gsl_matrix_alloc (cPar.n_ph, cPar.n_ph); - gsl_matrix *B=gsl_matrix_alloc (cPar.n_ph, W->size2); - gsl_matrix *se_B=gsl_matrix_alloc (cPar.n_ph, W->size2); - - //obtain estimates - CalcMvLmmVgVeBeta (eval, UtW, UtY, cPar.em_iter, cPar.nr_iter, cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max, cPar.n_region, Vg, Ve, B, se_B); - - cout<<"REMLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<Vg->size1; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(Vg, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"REMLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<Ve->size1; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(Ve, i, j)<<"\t"; - } - cout<<endl; - } - cPar.Vg_remle_null.clear(); - cPar.Ve_remle_null.clear(); - for (size_t i=0; i<Vg->size1; i++) { - for (size_t j=i; j<Vg->size2; j++) { - cPar.Vg_remle_null.push_back(gsl_matrix_get (Vg, i, j) ); - cPar.Ve_remle_null.push_back(gsl_matrix_get (Ve, i, j) ); - } - } - - //obtain Y_hat from fixed effects - gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat); - - //obtain H - KroneckerSym(G_full, Vg, H_full); - for (size_t i=0; i<G_full->size1; i++) { - gsl_matrix_view H_sub=gsl_matrix_submatrix (H_full, i*Ve->size1, i*Ve->size2, Ve->size1, Ve->size2); - gsl_matrix_add (&H_sub.matrix, Ve); - } - - //free matrices - gsl_matrix_free (Vg); - gsl_matrix_free (Ve); - gsl_matrix_free (B); - gsl_matrix_free (se_B); - } - - PRDT cPRDT; - - cPRDT.CopyFromParam(cPar); - - cout<<"Predicting Missing Phentypes ... "<<endl; - time_start=clock(); - cPRDT.MvnormPrdt(Y_hat, H_full, Y_full); - cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - cPRDT.WriteFiles(Y_full); - - gsl_matrix_free(Y); - gsl_matrix_free(W); - gsl_matrix_free(G); - gsl_matrix_free(U); - gsl_matrix_free(UtW); - gsl_matrix_free(UtY); - gsl_vector_free(eval); - - gsl_matrix_free(Y_full); - gsl_matrix_free(Y_hat); - gsl_matrix_free(W_full); - gsl_matrix_free(G_full); - gsl_matrix_free(H_full); - } - - - //Generate Kinship matrix - if (cPar.a_mode==21 || cPar.a_mode==22) { - cout<<"Calculating Relatedness Matrix ... "<<endl; - - gsl_matrix *G=gsl_matrix_alloc (cPar.ni_total, cPar.ni_total); - - time_start=clock(); - cPar.CalcKin (G); - cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - if (cPar.error==true) {cout<<"error! fail to calculate relatedness matrix. "<<endl; return;} - - if (cPar.a_mode==21) { - cPar.WriteMatrix (G, "cXX"); - } else { - cPar.WriteMatrix (G, "sXX"); - } - - gsl_matrix_free (G); - } - - //Compute the LDSC weights (not implemented yet) - if (cPar.a_mode==72) { - cout<<"Calculating Weights ... "<<endl; - - VARCOV cVarcov; - cVarcov.CopyFromParam(cPar); - - if (!cPar.file_bfile.empty()) { - cVarcov.AnalyzePlink (); - } else { - cVarcov.AnalyzeBimbam (); - } - - cVarcov.CopyToParam(cPar); - } - - // Compute the S matrix (and its variance), that is used for - // variance component estimation using summary statistics. - if (cPar.a_mode==25 || cPar.a_mode==26) { - cout<<"Calculating the S Matrix ... "<<endl; - - gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc); - gsl_vector *ns=gsl_vector_alloc (cPar.n_vc+1); - gsl_matrix_set_zero(S); - gsl_vector_set_zero(ns); - - gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); - gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); - gsl_vector_view ns_vec=gsl_vector_subvector(ns, 0, cPar.n_vc); - - gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); - gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); - gsl_matrix_set_zero (K); - gsl_matrix_set_zero (A); - - gsl_vector *y=gsl_vector_alloc (cPar.ni_test); - gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt); - - cPar.CopyCvtPhen (W, y, 0); - - set<string> setSnps_beta; - map <string, double> mapRS2wA, mapRS2wK; - - cPar.ObtainWeight(setSnps_beta, mapRS2wK); - - time_start=clock(); - cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &ns_vec.vector); - cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;} - - gsl_vector_set (ns, cPar.n_vc, cPar.ni_test); - - cPar.WriteMatrix (S, "S"); - cPar.WriteVector (ns, "size"); - cPar.WriteVar ("snps"); - - gsl_matrix_free (S); - gsl_vector_free (ns); - - gsl_matrix_free (A); - gsl_matrix_free (K); - - gsl_vector_free (y); - gsl_matrix_free (K); - } - - //Compute the q vector, that is used for variance component estimation using summary statistics - if (cPar.a_mode==27 || cPar.a_mode==28) { - gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); - gsl_vector *q=gsl_vector_alloc (cPar.n_vc); - gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1); - gsl_vector_set_zero (q); - gsl_vector_set_zero (s); - - gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc); - - vector<size_t> vec_cat, vec_ni; - vector<double> vec_weight, vec_z2; - map<string, double> mapRS2weight; - mapRS2weight.clear(); - - time_start=clock(); - ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, cPar.ns_test); - cout<<"## number of total individuals = "<<cPar.ni_total<<endl; - cout<<"## number of total SNPs = "<<cPar.ns_total<<endl; - cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; - cout<<"## number of variance components = "<<cPar.n_vc<<endl; - cout<<"Calculating the q vector ... "<<endl; - Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector); - cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - if (cPar.error==true) {cout<<"error! fail to calculate the q vector. "<<endl; return;} - - gsl_vector_set (s, cPar.n_vc, cPar.ni_total); - - cPar.WriteMatrix (Vq, "Vq"); - cPar.WriteVector (q, "q"); - cPar.WriteVector (s, "size"); - /* - for (size_t i=0; i<cPar.n_vc; i++) { - cout<<gsl_vector_get(q, i)<<endl; - } - */ - gsl_matrix_free (Vq); - gsl_vector_free (q); - gsl_vector_free (s); - } - - // Calculate SNP covariance. - if (cPar.a_mode==71) { - VARCOV cVarcov; - cVarcov.CopyFromParam(cPar); - - if (!cPar.file_bfile.empty()) { - cVarcov.AnalyzePlink (); - } else { - cVarcov.AnalyzeBimbam (); - } - - cVarcov.CopyToParam(cPar); - } - - // LM. - if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM - gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); - gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); - - //set covariates matrix W and phenotype matrix Y - //an intercept should be included in W, - cPar.CopyCvtPhen (W, Y, 0); - - //Fit LM or mvLM - if (cPar.n_ph==1) { - LM cLm; - cLm.CopyFromParam(cPar); - - gsl_vector_view Y_col=gsl_matrix_column (Y, 0); - - if (!cPar.file_gene.empty()) { - cLm.AnalyzeGene (W, &Y_col.vector); //y is the predictor, not the phenotype - } else if (!cPar.file_bfile.empty()) { - cLm.AnalyzePlink (W, &Y_col.vector); - } else if (!cPar.file_oxford.empty()) { - cLm.Analyzebgen (W, &Y_col.vector); - } else { - cLm.AnalyzeBimbam (W, &Y_col.vector); - } - - cLm.WriteFiles(); - cLm.CopyToParam(cPar); - } - /* - else { - MVLM cMvlm; - cMvlm.CopyFromParam(cPar); - - if (!cPar.file_bfile.empty()) { - cMvlm.AnalyzePlink (W, Y); - } else { - cMvlm.AnalyzeBimbam (W, Y); - } - - cMvlm.WriteFiles(); - cMvlm.CopyToParam(cPar); - } - */ - //release all matrices and vectors - gsl_matrix_free (Y); - gsl_matrix_free (W); - } - - //VC estimation with one or multiple kinship matrices - //REML approach only - //if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp - //for one phenotype only; - if (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) { - if (!cPar.file_beta.empty() ) { - //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK - //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1 - //update indicator_snps, so that the numbers are in accordance with mapRS2wK - set<string> setSnps_beta; - ReadFile_snps_header (cPar.file_beta, setSnps_beta); - - map <string, double> mapRS2wA, mapRS2wK; - cPar.ObtainWeight(setSnps_beta, mapRS2wK); - - cPar.UpdateSNP (mapRS2wK); - - // Setup matrices and vectors. - gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc); - gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); - gsl_vector *q=gsl_vector_alloc (cPar.n_vc); - gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1); - - gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); - gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); - - gsl_vector *y=gsl_vector_alloc (cPar.ni_test); - gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt); - - gsl_matrix_set_zero (K); - gsl_matrix_set_zero (A); - - gsl_matrix_set_zero(S); - gsl_matrix_set_zero(Vq); - gsl_vector_set_zero (q); - gsl_vector_set_zero (s); - - cPar.CopyCvtPhen (W, y, 0); - - gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); - gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); - gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc); - - vector<size_t> vec_cat, vec_ni; - vector<double> vec_weight, vec_z2; - - //read beta, based on the mapRS2wK - ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, cPar.ns_test); - - cout<<"Study Panel: "<<endl; - cout<<"## number of total individuals = "<<cPar.ni_study<<endl; - cout<<"## number of total SNPs = "<<cPar.ns_study<<endl; - cout<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; - cout<<"## number of variance components = "<<cPar.n_vc<<endl; - - //compute q - Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector); - - //compute S - time_start=clock(); - cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector); - cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;} - - //compute vc estimates - CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); - - //if LDSC weights, then compute the weights and run the above steps again - if (cPar.a_mode==62) { - //compute the weights and normalize the weights for A - cPar.UpdateWeight (1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA); - - //read beta file again, and update weigths vector - ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_total, cPar.ns_test); - - //compute q - Calcq (cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, &s_vec.vector); - - //compute S - time_start=clock(); - cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &s_vec.vector); - cPar.time_G+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "<<endl; return;} - - //compute vc estimates - CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); - } - - gsl_vector_set (s, cPar.n_vc, cPar.ni_test); - - cPar.WriteMatrix (S, "S"); - cPar.WriteMatrix (Vq, "Vq"); - cPar.WriteVector (q, "q"); - cPar.WriteVector (s, "size"); - - gsl_matrix_free (S); - gsl_matrix_free (Vq); - gsl_vector_free (q); - gsl_vector_free (s); - - gsl_matrix_free (A); - gsl_matrix_free (K); - gsl_vector_free (y); - gsl_matrix_free (W); - } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) { - if (!cPar.file_study.empty()) { - string sfile=cPar.file_study+".size.txt"; - CountFileLines (sfile, cPar.n_vc); - } else { - string file_name; - igzstream infile (cPar.file_mstudy.c_str(), igzstream::in); - if (!infile) {cout<<"error! fail to open mstudy file: "<<cPar.file_study<<endl; return;} - - safeGetline(infile, file_name); - - infile.clear(); - infile.close(); - - string sfile=file_name+".size.txt"; - CountFileLines (sfile, cPar.n_vc); - } - - cPar.n_vc=cPar.n_vc-1; - - gsl_matrix *S=gsl_matrix_alloc (2*cPar.n_vc, cPar.n_vc); - gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); - //gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1, (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) ); - //gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1, (n_lines*(n_lines+1))/2*(n_lines+1) ); - gsl_vector *q=gsl_vector_alloc (cPar.n_vc); - gsl_vector *s_study=gsl_vector_alloc (cPar.n_vc); - gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc); - gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1); - - gsl_matrix_set_zero(S); - gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); - gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); - - gsl_matrix_set_zero(Vq); - //gsl_matrix_set_zero(V); - //gsl_matrix_set_zero(Vslope); - gsl_vector_set_zero(q); - gsl_vector_set_zero(s_study); - gsl_vector_set_zero(s_ref); - - if (!cPar.file_study.empty()) { - ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study); - } else { - ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study); - } - - if (!cPar.file_ref.empty()) { - ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref); - } else { - ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref, cPar.ni_ref); - } - - cout<<"## number of variance components = "<<cPar.n_vc<<endl; - cout<<"## number of individuals in the sample = "<<cPar.ni_study<<endl; - cout<<"## number of individuals in the reference = "<<cPar.ni_ref<<endl; - - CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); - - gsl_vector_view s_sub=gsl_vector_subvector (s, 0, cPar.n_vc); - gsl_vector_memcpy (&s_sub.vector, s_ref); - gsl_vector_set (s, cPar.n_vc, cPar.ni_ref); - - cPar.WriteMatrix (S, "S"); - cPar.WriteMatrix (Vq, "Vq"); - cPar.WriteVector (q, "q"); - cPar.WriteVector (s, "size"); - - gsl_matrix_free (S); - gsl_matrix_free (Vq); - //gsl_matrix_free (V); - //gsl_matrix_free (Vslope); - gsl_vector_free (q); - gsl_vector_free (s_study); - gsl_vector_free (s_ref); - gsl_vector_free (s); - } else { - gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); - gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); - gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1*cPar.n_vc ); - - //set covariates matrix W and phenotype matrix Y - //an intercept should be included in W, - cPar.CopyCvtPhen (W, Y, 0); - - //read kinship matrices - if (!(cPar.file_mk).empty()) { - ReadFile_mk (cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); - if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} - - //center matrix G, and obtain v_traceG - double d=0; - (cPar.v_traceG).clear(); - for (size_t i=0; i<cPar.n_vc; i++) { - gsl_matrix_view G_sub=gsl_matrix_submatrix (G, 0, i*G->size1, G->size1, G->size1); - CenterMatrix (&G_sub.matrix); - d=0; - for (size_t j=0; j<G->size1; j++) { - d+=gsl_matrix_get (&G_sub.matrix, j, j); - } - d/=(double)G->size1; - (cPar.v_traceG).push_back(d); - } - } else if (!(cPar.file_kin).empty()) { - ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); - if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} - - //center matrix G - CenterMatrix (G); - - (cPar.v_traceG).clear(); - double d=0; - for (size_t j=0; j<G->size1; j++) { - d+=gsl_matrix_get (G, j, j); - } - d/=(double)G->size1; - (cPar.v_traceG).push_back(d); - } - /* - //eigen-decomposition and calculate trace_G - cout<<"Start Eigen-Decomposition..."<<endl; - time_start=clock(); - - if (cPar.a_mode==31) { - cPar.trace_G=EigenDecomp (G, U, eval, 1); - } else { - cPar.trace_G=EigenDecomp (G, U, eval, 0); - } - - cPar.trace_G=0.0; - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} - cPar.trace_G+=gsl_vector_get (eval, i); - } - cPar.trace_G/=(double)eval->size; - - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - } else { - ReadFile_eigenU (cPar.file_ku, cPar.error, U); - if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;} - - ReadFile_eigenD (cPar.file_kd, cPar.error, eval); - if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;} - - cPar.trace_G=0.0; - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);} - cPar.trace_G+=gsl_vector_get(eval, i); - } - cPar.trace_G/=(double)eval->size; - } - */ - //fit multiple variance components - if (cPar.n_ph==1) { - // if (cPar.n_vc==1) { - /* - //calculate UtW and Uty - CalcUtX (U, W, UtW); - CalcUtX (U, Y, UtY); - - gsl_vector_view beta=gsl_matrix_row (B, 0); - gsl_vector_view se_beta=gsl_matrix_row (se_B, 0); - gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); - - CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); - CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector); - - cPar.beta_mle_null.clear(); - cPar.se_beta_mle_null.clear(); - for (size_t i=0; i<B->size2; i++) { - cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) ); - cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) ); - } - - CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); - CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector); - cPar.beta_remle_null.clear(); - cPar.se_beta_remle_null.clear(); - for (size_t i=0; i<B->size2; i++) { - cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) ); - cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) ); - } - - CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); - cPar.PrintSummary(); - - //calculate and output residuals - if (cPar.a_mode==5) { - gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1); - gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1); - gsl_vector *u_hat=gsl_vector_alloc (Y->size1); - gsl_vector *e_hat=gsl_vector_alloc (Y->size1); - gsl_vector *y_hat=gsl_vector_alloc (Y->size1); - - //obtain Utu and Ute - gsl_vector_memcpy (y_hat, &UtY_col.vector); - gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat); - - double d, u, e; - for (size_t i=0; i<eval->size; i++) { - d=gsl_vector_get (eval, i); - u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); - e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); - gsl_vector_set (Utu_hat, i, u); - gsl_vector_set (Ute_hat, i, e); - } - - //obtain u and e - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat); - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat); - - //output residuals - cPar.WriteVector(u_hat, "residU"); - cPar.WriteVector(e_hat, "residE"); - - gsl_vector_free(u_hat); - gsl_vector_free(e_hat); - gsl_vector_free(y_hat); - } -*/ - // } else { - gsl_vector_view Y_col=gsl_matrix_column (Y, 0); - VC cVc; - cVc.CopyFromParam(cPar); - if (cPar.a_mode==61) { - cVc.CalcVChe (G, W, &Y_col.vector); - } else if (cPar.a_mode==62) { - cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector); - } else { - cVc.CalcVCacl (G, W, &Y_col.vector); - } - cVc.CopyToParam(cPar); - //obtain pve from sigma2 - //obtain se_pve from se_sigma2 - - //} - } - } - - } - - - //compute confidence intervals with additional summary statistics - //we do not check the sign of z-scores here, but they have to be matched with the genotypes - if (cPar.a_mode==66 || cPar.a_mode==67) { - //read reference file first - gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); - gsl_matrix *Svar=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); - gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc); - - gsl_matrix_set_zero(S); - gsl_matrix_set_zero(Svar); - gsl_vector_set_zero(s_ref); - - if (!cPar.file_ref.empty()) { - ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref); - } else { - ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref); - } - - //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK - //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1 - set<string> setSnps_beta; - ReadFile_snps_header (cPar.file_beta, setSnps_beta); - - //obtain the weights for wA, which contains the SNP weights for SNPs used in the model - map <string, double> mapRS2wK; - cPar.ObtainWeight(setSnps_beta, mapRS2wK); - - //set up matrices and vector - gsl_matrix *Xz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc); - gsl_matrix *XWz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc); - gsl_matrix *XtXWz=gsl_matrix_alloc (mapRS2wK.size(), cPar.n_vc*cPar.n_vc); - gsl_vector *w=gsl_vector_alloc (mapRS2wK.size()); - gsl_vector *w1=gsl_vector_alloc (mapRS2wK.size()); - gsl_vector *z=gsl_vector_alloc (mapRS2wK.size()); - gsl_vector *s_vec=gsl_vector_alloc (cPar.n_vc); - - vector<size_t> vec_cat, vec_size; - vector<double> vec_z; - - map <string, double> mapRS2z, mapRS2wA; - map <string, string> mapRS2A1; - string file_str; - - //update s_vec, the number of snps in each category - for (size_t i=0; i<cPar.n_vc; i++) { - vec_size.push_back(0); - } - - for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) { - vec_size[cPar.mapRS2cat[it->first]]++; - } - - for (size_t i=0; i<cPar.n_vc; i++) { - gsl_vector_set(s_vec, i, vec_size[i]); - } - - //update mapRS2wA using v_pve and s_vec - if (cPar.a_mode==66) { - for (map<string, double>::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) { - mapRS2wA[it->first]=1; - } - } else { - cPar.UpdateWeight (0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA); - } - - //read in z-scores based on allele 0, and save that into a vector - ReadFile_beta (cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z); - - //update snp indicator, save weights to w, save z-scores to vec_z, save category label to vec_cat - //sign of z is determined by matching alleles - cPar.UpdateSNPnZ (mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat); - - //compute an n by k matrix of X_iWz - cout<<"Calculating Xz ... "<<endl; - - gsl_matrix_set_zero(Xz); - gsl_vector_set_all (w1, 1); - - if (!cPar.file_bfile.empty() ) { - file_str=cPar.file_bfile+".bed"; - PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz); - } else if (!cPar.file_geno.empty()) { - BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w1, z, 0, Xz); - } else if (!cPar.file_mbfile.empty() ){ - MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz); - } else if (!cPar.file_mgeno.empty()) { - MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w1, z, Xz); - } - /* - cout<<"Xz: "<<endl; - for (size_t i=0; i<5; i++) { - for (size_t j=0; j<cPar.n_vc; j++) { - cout<<gsl_matrix_get (Xz, i, j)<<" "; - } - cout<<endl; - } - */ - if (cPar.a_mode==66) { - gsl_matrix_memcpy (XWz, Xz); - } else if (cPar.a_mode==67) { - cout<<"Calculating XWz ... "<<endl; - - gsl_matrix_set_zero(XWz); - - if (!cPar.file_bfile.empty() ) { - file_str=cPar.file_bfile+".bed"; - PlinkXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz); - } else if (!cPar.file_geno.empty()) { - BimbamXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, vec_cat, w, z, 0, XWz); - } else if (!cPar.file_mbfile.empty() ){ - MFILEXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz); - } else if (!cPar.file_mgeno.empty()) { - MFILEXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, vec_cat, w, z, XWz); - } - } - /* - cout<<"XWz: "<<endl; - for (size_t i=0; i<5; i++) { - cout<<gsl_vector_get (w, i)<<endl; - for (size_t j=0; j<cPar.n_vc; j++) { - cout<<gsl_matrix_get (XWz, i, j)<<" "; - } - cout<<endl; - } - */ - //compute an p by k matrix of X_j^TWX_iWz - cout<<"Calculating XtXWz ... "<<endl; - gsl_matrix_set_zero(XtXWz); - - if (!cPar.file_bfile.empty() ) { - file_str=cPar.file_bfile+".bed"; - PlinkXtXwz (file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz); - } else if (!cPar.file_geno.empty()) { - BimbamXtXwz (cPar.file_geno, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, XWz, 0, XtXWz); - } else if (!cPar.file_mbfile.empty() ){ - MFILEXtXwz (1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz); - } else if (!cPar.file_mgeno.empty()) { - MFILEXtXwz (0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, cPar.mindicator_snp, XWz, XtXWz); - } - /* - cout<<"XtXWz: "<<endl; - for (size_t i=0; i<5; i++) { - for (size_t j=0; j<cPar.n_vc; j++) { - cout<<gsl_matrix_get (XtXWz, i, j)<<" "; - } - cout<<endl; - } - */ - //compute confidence intervals - CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); - - //write files - //cPar.WriteMatrix (XWz, "XWz"); - //cPar.WriteMatrix (XtXWz, "XtXWz"); - //cPar.WriteVector (w, "w"); - - gsl_matrix_free(S); - gsl_matrix_free(Svar); - gsl_vector_free(s_ref); - - gsl_matrix_free(Xz); - gsl_matrix_free(XWz); - gsl_matrix_free(XtXWz); - gsl_vector_free(w); - gsl_vector_free(w1); - gsl_vector_free(z); - gsl_vector_free(s_vec); - } - - - //LMM or mvLMM or Eigen-Decomposition - if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==31) { //Fit LMM or mvLMM or eigen - gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); - gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); - gsl_matrix *B=gsl_matrix_alloc (Y->size2, W->size2); //B is a d by c matrix - gsl_matrix *se_B=gsl_matrix_alloc (Y->size2, W->size2); - gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1); - gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1); - gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2); - gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2); - gsl_vector *eval=gsl_vector_alloc (Y->size1); - gsl_vector *env=gsl_vector_alloc (Y->size1); - gsl_vector *weight=gsl_vector_alloc (Y->size1); - - //set covariates matrix W and phenotype matrix Y - //an intercept should be included in W, - cPar.CopyCvtPhen (W, Y, 0); - if (!cPar.file_gxe.empty()) {cPar.CopyGxe (env);} - - //read relatedness matrix G - if (!(cPar.file_kin).empty()) { - ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); - if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} - - //center matrix G - CenterMatrix (G); - - //is residual weights are provided, then - if (!cPar.file_weight.empty()) { - cPar.CopyWeight (weight); - double d, wi, wj; - for (size_t i=0; i<G->size1; i++) { - wi=gsl_vector_get(weight, i); - for (size_t j=i; j<G->size2; j++) { - wj=gsl_vector_get(weight, j); - d=gsl_matrix_get(G, i, j); - if (wi<=0 || wj<=0) {d=0;} else {d/=sqrt(wi*wj);} - gsl_matrix_set(G, i, j, d); - if (j!=i) {gsl_matrix_set(G, j, i, d);} - } - } - } - - //eigen-decomposition and calculate trace_G - cout<<"Start Eigen-Decomposition..."<<endl; - time_start=clock(); - - if (cPar.a_mode==31) { - cPar.trace_G=EigenDecomp (G, U, eval, 1); - } else { - cPar.trace_G=EigenDecomp (G, U, eval, 0); - } - - if (!cPar.file_weight.empty()) { - double wi; - for (size_t i=0; i<U->size1; i++) { - wi=gsl_vector_get(weight, i); - if (wi<=0) {wi=0;} else {wi=sqrt(wi);} - gsl_vector_view Urow=gsl_matrix_row (U, i); - gsl_vector_scale (&Urow.vector, wi); - } - } - - cPar.trace_G=0.0; - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} - cPar.trace_G+=gsl_vector_get (eval, i); - } - cPar.trace_G/=(double)eval->size; - - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - } else { - ReadFile_eigenU (cPar.file_ku, cPar.error, U); - if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; return;} - - ReadFile_eigenD (cPar.file_kd, cPar.error, eval); - if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; return;} - - cPar.trace_G=0.0; - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);} - cPar.trace_G+=gsl_vector_get(eval, i); - } - cPar.trace_G/=(double)eval->size; - } - - if (cPar.a_mode==31) { - cPar.WriteMatrix(U, "eigenU"); - cPar.WriteVector(eval, "eigenD"); - } else if (!cPar.file_gene.empty() ) { - //calculate UtW and Uty - CalcUtX (U, W, UtW); - CalcUtX (U, Y, UtY); - - LMM cLmm; - cLmm.CopyFromParam(cPar); - - gsl_vector_view Y_col=gsl_matrix_column (Y, 0); - gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); - - cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype - - cLmm.WriteFiles(); - cLmm.CopyToParam(cPar); - } else { - //calculate UtW and Uty - CalcUtX (U, W, UtW); - CalcUtX (U, Y, UtY); - - //calculate REMLE/MLE estimate and pve for univariate model - if (cPar.n_ph==1) { - gsl_vector_view beta=gsl_matrix_row (B, 0); - gsl_vector_view se_beta=gsl_matrix_row (se_B, 0); - gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); - - CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); - CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector); - - cPar.beta_mle_null.clear(); - cPar.se_beta_mle_null.clear(); - for (size_t i=0; i<B->size2; i++) { - cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) ); - cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) ); - } - - CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); - CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector); - cPar.beta_remle_null.clear(); - cPar.se_beta_remle_null.clear(); - for (size_t i=0; i<B->size2; i++) { - cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) ); - cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) ); - } - - CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); - cPar.PrintSummary(); - - //calculate and output residuals - if (cPar.a_mode==5) { - gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1); - gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1); - gsl_vector *u_hat=gsl_vector_alloc (Y->size1); - gsl_vector *e_hat=gsl_vector_alloc (Y->size1); - gsl_vector *y_hat=gsl_vector_alloc (Y->size1); - - //obtain Utu and Ute - gsl_vector_memcpy (y_hat, &UtY_col.vector); - gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat); - - double d, u, e; - for (size_t i=0; i<eval->size; i++) { - d=gsl_vector_get (eval, i); - u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); - e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); - gsl_vector_set (Utu_hat, i, u); - gsl_vector_set (Ute_hat, i, e); - } - - //obtain u and e - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat); - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat); - - //output residuals - cPar.WriteVector(u_hat, "residU"); - cPar.WriteVector(e_hat, "residE"); - - gsl_vector_free(u_hat); - gsl_vector_free(e_hat); - gsl_vector_free(y_hat); - } - } - - //Fit LMM or mvLMM - if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4) { - if (cPar.n_ph==1) { - LMM cLmm; - cLmm.CopyFromParam(cPar); - - gsl_vector_view Y_col=gsl_matrix_column (Y, 0); - gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); - - if (!cPar.file_bfile.empty()) { - if (cPar.file_gxe.empty()) { - cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); - } else { - cLmm.AnalyzePlinkGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env); - } - } - // WJA added - else if(!cPar.file_oxford.empty()) { - cLmm.Analyzebgen (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); - } - else { - if (cPar.file_gxe.empty()) { - cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); - } else { - cLmm.AnalyzeBimbamGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env); - } - } - - cLmm.WriteFiles(); - cLmm.CopyToParam(cPar); - } else { - MVLMM cMvlmm; - cMvlmm.CopyFromParam(cPar); - - if (!cPar.file_bfile.empty()) { - if (cPar.file_gxe.empty()) { - cMvlmm.AnalyzePlink (U, eval, UtW, UtY); - } else { - cMvlmm.AnalyzePlinkGXE (U, eval, UtW, UtY, env); - } - } - else if(!cPar.file_oxford.empty()) - { - cMvlmm.Analyzebgen (U, eval, UtW, UtY); - } - else { - if (cPar.file_gxe.empty()) { - cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY); - } else { - cMvlmm.AnalyzeBimbamGXE (U, eval, UtW, UtY, env); - } - } - - cMvlmm.WriteFiles(); - cMvlmm.CopyToParam(cPar); - } - } - } - - - //release all matrices and vectors - gsl_matrix_free (Y); - gsl_matrix_free (W); - gsl_matrix_free(B); - gsl_matrix_free(se_B); - gsl_matrix_free (G); - gsl_matrix_free (U); - gsl_matrix_free (UtW); - gsl_matrix_free (UtY); - gsl_vector_free (eval); - gsl_vector_free (env); - } - - - //BSLMM - if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { - gsl_vector *y=gsl_vector_alloc (cPar.ni_test); - gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); - gsl_matrix *G=gsl_matrix_alloc (y->size, y->size); - gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test); - - //set covariates matrix W and phenotype vector y - //an intercept should be included in W, - cPar.CopyCvtPhen (W, y, 0); - - //center y, even for case/control data - cPar.pheno_mean=CenterVector(y); - - //run bvsr if rho==1 - if (cPar.rho_min==1 && cPar.rho_max==1) { - //read genotypes X (not UtX) - cPar.ReadGenotypes (UtX, G, false); - - //perform BSLMM analysis - BSLMM cBslmm; - cBslmm.CopyFromParam(cPar); - time_start=clock(); - cBslmm.MCMC(UtX, y); - cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - cBslmm.CopyToParam(cPar); - //else, if rho!=1 - } else { - gsl_matrix *U=gsl_matrix_alloc (y->size, y->size); - gsl_vector *eval=gsl_vector_alloc (y->size); - gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2); - gsl_vector *Uty=gsl_vector_alloc (y->size); - - //read relatedness matrix G - if (!(cPar.file_kin).empty()) { - cPar.ReadGenotypes (UtX, G, false); - - //read relatedness matrix G - ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); - if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} - - //center matrix G - CenterMatrix (G); - } else { - cPar.ReadGenotypes (UtX, G, true); - } - - //eigen-decomposition and calculate trace_G - cout<<"Start Eigen-Decomposition..."<<endl; - time_start=clock(); - cPar.trace_G=EigenDecomp (G, U, eval, 0); - cPar.trace_G=0.0; - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} - cPar.trace_G+=gsl_vector_get (eval, i); - } - cPar.trace_G/=(double)eval->size; - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //calculate UtW and Uty - CalcUtX (U, W, UtW); - CalcUtX (U, y, Uty); - - //calculate REMLE/MLE estimate and pve - CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); - CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); - CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); - - cPar.PrintSummary(); - - //Creat and calcualte UtX, use a large memory - cout<<"Calculating UtX..."<<endl; - time_start=clock(); - CalcUtX (U, UtX); - cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //perform BSLMM or BSLMMDAP analysis - if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { - BSLMM cBslmm; - cBslmm.CopyFromParam(cPar); - time_start=clock(); - if (cPar.a_mode==12) { //ridge regression - cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null); - } else { //Run MCMC - cBslmm.MCMC(U, UtX, Uty, eval, y); - } - cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - cBslmm.CopyToParam(cPar); - } else { - } - - //release all matrices and vectors - gsl_matrix_free (G); - gsl_matrix_free (U); - gsl_matrix_free (UtW); - gsl_vector_free (eval); - gsl_vector_free (Uty); - - } - gsl_matrix_free (W); - gsl_vector_free (y); - gsl_matrix_free (UtX); - } - - - - //BSLMM-DAP - if (cPar.a_mode==14 || cPar.a_mode==15 || cPar.a_mode==16) { - if (cPar.a_mode==14) { - gsl_vector *y=gsl_vector_alloc (cPar.ni_test); - gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); - gsl_matrix *G=gsl_matrix_alloc (y->size, y->size); - gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test); - - //set covariates matrix W and phenotype vector y - //an intercept should be included in W, - cPar.CopyCvtPhen (W, y, 0); - - //center y, even for case/control data - cPar.pheno_mean=CenterVector(y); - - //run bvsr if rho==1 - if (cPar.rho_min==1 && cPar.rho_max==1) { - //read genotypes X (not UtX) - cPar.ReadGenotypes (UtX, G, false); - - //perform BSLMM analysis - BSLMM cBslmm; - cBslmm.CopyFromParam(cPar); - time_start=clock(); - cBslmm.MCMC(UtX, y); - cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - cBslmm.CopyToParam(cPar); - //else, if rho!=1 - } else { - gsl_matrix *U=gsl_matrix_alloc (y->size, y->size); - gsl_vector *eval=gsl_vector_alloc (y->size); - gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2); - gsl_vector *Uty=gsl_vector_alloc (y->size); - - //read relatedness matrix G - if (!(cPar.file_kin).empty()) { - cPar.ReadGenotypes (UtX, G, false); - - //read relatedness matrix G - ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); - if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<<endl; return;} - - //center matrix G - CenterMatrix (G); - } else { - cPar.ReadGenotypes (UtX, G, true); - } - - //eigen-decomposition and calculate trace_G - cout<<"Start Eigen-Decomposition..."<<endl; - time_start=clock(); - cPar.trace_G=EigenDecomp (G, U, eval, 0); - cPar.trace_G=0.0; - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} - cPar.trace_G+=gsl_vector_get (eval, i); - } - cPar.trace_G/=(double)eval->size; - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //calculate UtW and Uty - CalcUtX (U, W, UtW); - CalcUtX (U, y, Uty); - - //calculate REMLE/MLE estimate and pve - CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); - CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); - CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); - - cPar.PrintSummary(); - - //Creat and calcualte UtX, use a large memory - cout<<"Calculating UtX..."<<endl; - time_start=clock(); - CalcUtX (U, UtX); - cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //perform analysis; assume X and y are already centered - BSLMMDAP cBslmmDap; - cBslmmDap.CopyFromParam(cPar); - time_start=clock(); - cBslmmDap.DAP_CalcBF (U, UtX, Uty, eval, y); - cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - cBslmmDap.CopyToParam(cPar); - - //release all matrices and vectors - gsl_matrix_free (G); - gsl_matrix_free (U); - gsl_matrix_free (UtW); - gsl_vector_free (eval); - gsl_vector_free (Uty); - } - - gsl_matrix_free (W); - gsl_vector_free (y); - gsl_matrix_free (UtX); - } else if (cPar.a_mode==15) { - //perform EM algorithm and estimate parameters - vector<string> vec_rs; - vector<double> vec_sa2, vec_sb2, wab; - vector<vector<vector<double> > > BF; - - //read hyp and bf files (functions defined in BSLMMDAP) - ReadFile_hyb (cPar.file_hyp, vec_sa2, vec_sb2, wab); - ReadFile_bf (cPar.file_bf, vec_rs, BF); - - cPar.ns_test=vec_rs.size(); - if (wab.size()!=BF[0][0].size()) {cout<<"error! hyp and bf files dimension do not match"<<endl;} - - //load annotations - gsl_matrix *Ac; - gsl_matrix_int *Ad; - gsl_vector_int *dlevel; - size_t kc, kd; - if (!cPar.file_cat.empty()) { - ReadFile_cat (cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd); - } else { - kc=0; kd=0; - } - - cout<<"## number of blocks = "<<BF.size()<<endl; - cout<<"## number of analyzed SNPs = "<<vec_rs.size()<<endl; - cout<<"## grid size for hyperparameters = "<<wab.size()<<endl; - cout<<"## number of continuous annotations = "<<kc<<endl; - cout<<"## number of discrete annotations = "<<kd<<endl; - - //DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel); - - //perform analysis - BSLMMDAP cBslmmDap; - cBslmmDap.CopyFromParam(cPar); - time_start=clock(); - cBslmmDap.DAP_EstimateHyper (kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac, Ad, dlevel); - cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - cBslmmDap.CopyToParam(cPar); - - gsl_matrix_free(Ac); - gsl_matrix_int_free(Ad); - gsl_vector_int_free(dlevel); - } else { - // - } - - } - - - - - /* - //LDR (change 14 to 16?) - if (cPar.a_mode==14) { - gsl_vector *y=gsl_vector_alloc (cPar.ni_test); - gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); - gsl_matrix *G=gsl_matrix_alloc (1, 1); - vector<vector<unsigned char> > Xt; - - //set covariates matrix W and phenotype vector y - //an intercept is included in W - cPar.CopyCvtPhen (W, y, 0); - - //read in genotype matrix X - cPar.ReadGenotypes (Xt, G, false); - - LDR cLdr; - cLdr.CopyFromParam(cPar); - time_start=clock(); - - cLdr.VB(Xt, W, y); - - cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - cLdr.CopyToParam(cPar); - - gsl_vector_free (y); - gsl_matrix_free (W); - gsl_matrix_free (G); - } - */ - - cPar.time_total=(clock()-time_begin)/(double(CLOCKS_PER_SEC)*60.0); - - return; +void GEMMA::BatchRun(PARAM &cPar) { + clock_t time_begin, time_start; + time_begin = clock(); + + // Read Files. + cout << "Reading Files ... " << endl; + cPar.ReadFiles(); + if (cPar.error == true) { + cout << "error! fail to read files. " << endl; + return; + } + cPar.CheckData(); + if (cPar.error == true) { + cout << "error! fail to check data. " << endl; + return; + } + + // Prediction for bslmm + if (cPar.a_mode == 41 || cPar.a_mode == 42) { + gsl_vector *y_prdt; + + y_prdt = gsl_vector_alloc(cPar.ni_total - cPar.ni_test); + + // set to zero + gsl_vector_set_zero(y_prdt); + + PRDT cPRDT; + cPRDT.CopyFromParam(cPar); + + // add breeding value if needed + if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) { + cout << "Adding Breeding Values ... " << endl; + + gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total); + gsl_vector *u_hat = gsl_vector_alloc(cPar.ni_test); + + // read kinship matrix and set u_hat + vector<int> indicator_all; + size_t c_bv = 0; + for (size_t i = 0; i < cPar.indicator_idv.size(); i++) { + indicator_all.push_back(1); + if (cPar.indicator_bv[i] == 1) { + gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]); + c_bv++; + } + } + + ReadFile_kin(cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode, + cPar.error, G); + if (cPar.error == true) { + cout << "error! fail to read kinship/relatedness file. " << endl; + return; + } + + // read u + cPRDT.AddBV(G, u_hat, y_prdt); + + gsl_matrix_free(G); + gsl_vector_free(u_hat); + } + + // add beta + if (!cPar.file_bfile.empty()) { + cPRDT.AnalyzePlink(y_prdt); + } else { + cPRDT.AnalyzeBimbam(y_prdt); + } + + // add mu + gsl_vector_add_constant(y_prdt, cPar.pheno_mean); + + // convert y to probability if needed + if (cPar.a_mode == 42) { + double d; + for (size_t i = 0; i < y_prdt->size; i++) { + d = gsl_vector_get(y_prdt, i); + d = gsl_cdf_gaussian_P(d, 1.0); + gsl_vector_set(y_prdt, i, d); + } + } + + cPRDT.CopyToParam(cPar); + + cPRDT.WriteFiles(y_prdt); + + gsl_vector_free(y_prdt); + } + + // Prediction with kinship matrix only; for one or more phenotypes + if (cPar.a_mode == 43) { + // first, use individuals with full phenotypes to obtain estimates of Vg and + // Ve + gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph); + gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt); + gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1); + gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1); + gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2); + gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2); + gsl_vector *eval = gsl_vector_alloc(Y->size1); + + gsl_matrix *Y_full = gsl_matrix_alloc(cPar.ni_cvt, cPar.n_ph); + gsl_matrix *W_full = gsl_matrix_alloc(Y_full->size1, cPar.n_cvt); + + // set covariates matrix W and phenotype matrix Y + // an intercept should be included in W, + cPar.CopyCvtPhen(W, Y, 0); + cPar.CopyCvtPhen(W_full, Y_full, 1); + + gsl_matrix *Y_hat = gsl_matrix_alloc(Y_full->size1, cPar.n_ph); + gsl_matrix *G_full = gsl_matrix_alloc(Y_full->size1, Y_full->size1); + gsl_matrix *H_full = gsl_matrix_alloc(Y_full->size1 * Y_hat->size2, + Y_full->size1 * Y_hat->size2); + + // read relatedness matrix G, and matrix G_full + ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, + cPar.error, G); + if (cPar.error == true) { + cout << "error! fail to read kinship/relatedness file. " << endl; + return; + } + ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, + cPar.error, G_full); + if (cPar.error == true) { + cout << "error! fail to read kinship/relatedness file. " << endl; + return; + } + + // center matrix G + CenterMatrix(G); + CenterMatrix(G_full); + + // eigen-decomposition and calculate trace_G + cout << "Start Eigen-Decomposition..." << endl; + time_start = clock(); + cPar.trace_G = EigenDecomp(G, U, eval, 0); + cPar.trace_G = 0.0; + for (size_t i = 0; i < eval->size; i++) { + if (gsl_vector_get(eval, i) < 1e-10) { + gsl_vector_set(eval, i, 0); + } + cPar.trace_G += gsl_vector_get(eval, i); + } + cPar.trace_G /= (double)eval->size; + cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // calculate UtW and Uty + CalcUtX(U, W, UtW); + CalcUtX(U, Y, UtY); + + // calculate variance component and beta estimates + // and then obtain predicted values + if (cPar.n_ph == 1) { + gsl_vector *beta = gsl_vector_alloc(W->size2); + gsl_vector *se_beta = gsl_vector_alloc(W->size2); + + double lambda, logl, vg, ve; + gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); + + // obtain estimates + CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, + cPar.n_region, lambda, logl); + CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, lambda, vg, ve, beta, + se_beta); + + cout << "REMLE estimate for vg in the null model = " << vg << endl; + cout << "REMLE estimate for ve in the null model = " << ve << endl; + cPar.vg_remle_null = vg; + cPar.ve_remle_null = ve; + + // obtain Y_hat from fixed effects + gsl_vector_view Yhat_col = gsl_matrix_column(Y_hat, 0); + gsl_blas_dgemv(CblasNoTrans, 1.0, W_full, beta, 0.0, &Yhat_col.vector); + + // obtain H + gsl_matrix_set_identity(H_full); + gsl_matrix_scale(H_full, ve); + gsl_matrix_scale(G_full, vg); + gsl_matrix_add(H_full, G_full); + + // free matrices + gsl_vector_free(beta); + gsl_vector_free(se_beta); + } else { + gsl_matrix *Vg = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph); + gsl_matrix *Ve = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph); + gsl_matrix *B = gsl_matrix_alloc(cPar.n_ph, W->size2); + gsl_matrix *se_B = gsl_matrix_alloc(cPar.n_ph, W->size2); + + // obtain estimates + CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter, + cPar.em_prec, cPar.nr_prec, cPar.l_min, cPar.l_max, + cPar.n_region, Vg, Ve, B, se_B); + + cout << "REMLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < Vg->size1; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(Vg, i, j) << "\t"; + } + cout << endl; + } + cout << "REMLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < Ve->size1; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(Ve, i, j) << "\t"; + } + cout << endl; + } + cPar.Vg_remle_null.clear(); + cPar.Ve_remle_null.clear(); + for (size_t i = 0; i < Vg->size1; i++) { + for (size_t j = i; j < Vg->size2; j++) { + cPar.Vg_remle_null.push_back(gsl_matrix_get(Vg, i, j)); + cPar.Ve_remle_null.push_back(gsl_matrix_get(Ve, i, j)); + } + } + + // obtain Y_hat from fixed effects + gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat); + + // obtain H + KroneckerSym(G_full, Vg, H_full); + for (size_t i = 0; i < G_full->size1; i++) { + gsl_matrix_view H_sub = gsl_matrix_submatrix( + H_full, i * Ve->size1, i * Ve->size2, Ve->size1, Ve->size2); + gsl_matrix_add(&H_sub.matrix, Ve); + } + + // free matrices + gsl_matrix_free(Vg); + gsl_matrix_free(Ve); + gsl_matrix_free(B); + gsl_matrix_free(se_B); + } + + PRDT cPRDT; + + cPRDT.CopyFromParam(cPar); + + cout << "Predicting Missing Phentypes ... " << endl; + time_start = clock(); + cPRDT.MvnormPrdt(Y_hat, H_full, Y_full); + cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + cPRDT.WriteFiles(Y_full); + + gsl_matrix_free(Y); + gsl_matrix_free(W); + gsl_matrix_free(G); + gsl_matrix_free(U); + gsl_matrix_free(UtW); + gsl_matrix_free(UtY); + gsl_vector_free(eval); + + gsl_matrix_free(Y_full); + gsl_matrix_free(Y_hat); + gsl_matrix_free(W_full); + gsl_matrix_free(G_full); + gsl_matrix_free(H_full); + } + + // Generate Kinship matrix + if (cPar.a_mode == 21 || cPar.a_mode == 22) { + cout << "Calculating Relatedness Matrix ... " << endl; + + gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total); + + time_start = clock(); + cPar.CalcKin(G); + cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + if (cPar.error == true) { + cout << "error! fail to calculate relatedness matrix. " << endl; + return; + } + + if (cPar.a_mode == 21) { + cPar.WriteMatrix(G, "cXX"); + } else { + cPar.WriteMatrix(G, "sXX"); + } + + gsl_matrix_free(G); + } + + // Compute the LDSC weights (not implemented yet) + if (cPar.a_mode == 72) { + cout << "Calculating Weights ... " << endl; + + VARCOV cVarcov; + cVarcov.CopyFromParam(cPar); + + if (!cPar.file_bfile.empty()) { + cVarcov.AnalyzePlink(); + } else { + cVarcov.AnalyzeBimbam(); + } + + cVarcov.CopyToParam(cPar); + } + + // Compute the S matrix (and its variance), that is used for + // variance component estimation using summary statistics. + if (cPar.a_mode == 25 || cPar.a_mode == 26) { + cout << "Calculating the S Matrix ... " << endl; + + gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc); + gsl_vector *ns = gsl_vector_alloc(cPar.n_vc + 1); + gsl_matrix_set_zero(S); + gsl_vector_set_zero(ns); + + gsl_matrix_view S_mat = gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); + gsl_matrix_view Svar_mat = + gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); + gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc); + + gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test); + gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test); + gsl_matrix_set_zero(K); + gsl_matrix_set_zero(A); + + gsl_vector *y = gsl_vector_alloc(cPar.ni_test); + gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt); + + cPar.CopyCvtPhen(W, y, 0); + + set<string> setSnps_beta; + map<string, double> mapRS2wA, mapRS2wK; + + cPar.ObtainWeight(setSnps_beta, mapRS2wK); + + time_start = clock(); + cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, + &ns_vec.vector); + cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + if (cPar.error == true) { + cout << "error! fail to calculate the S matrix. " << endl; + return; + } + + gsl_vector_set(ns, cPar.n_vc, cPar.ni_test); + + cPar.WriteMatrix(S, "S"); + cPar.WriteVector(ns, "size"); + cPar.WriteVar("snps"); + + gsl_matrix_free(S); + gsl_vector_free(ns); + + gsl_matrix_free(A); + gsl_matrix_free(K); + + gsl_vector_free(y); + gsl_matrix_free(K); + } + + // Compute the q vector, that is used for variance component estimation using + // summary statistics + if (cPar.a_mode == 27 || cPar.a_mode == 28) { + gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc); + gsl_vector *q = gsl_vector_alloc(cPar.n_vc); + gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1); + gsl_vector_set_zero(q); + gsl_vector_set_zero(s); + + gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc); + + vector<size_t> vec_cat, vec_ni; + vector<double> vec_weight, vec_z2; + map<string, double> mapRS2weight; + mapRS2weight.clear(); + + time_start = clock(); + ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, + vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, + cPar.ns_test); + cout << "## number of total individuals = " << cPar.ni_total << endl; + cout << "## number of total SNPs = " << cPar.ns_total << endl; + cout << "## number of analyzed SNPs = " << cPar.ns_test << endl; + cout << "## number of variance components = " << cPar.n_vc << endl; + cout << "Calculating the q vector ... " << endl; + Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, + &s_vec.vector); + cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + if (cPar.error == true) { + cout << "error! fail to calculate the q vector. " << endl; + return; + } + + gsl_vector_set(s, cPar.n_vc, cPar.ni_total); + + cPar.WriteMatrix(Vq, "Vq"); + cPar.WriteVector(q, "q"); + cPar.WriteVector(s, "size"); + /* + for (size_t i=0; i<cPar.n_vc; i++) { + cout<<gsl_vector_get(q, i)<<endl; + } + */ + gsl_matrix_free(Vq); + gsl_vector_free(q); + gsl_vector_free(s); + } + + // Calculate SNP covariance. + if (cPar.a_mode == 71) { + VARCOV cVarcov; + cVarcov.CopyFromParam(cPar); + + if (!cPar.file_bfile.empty()) { + cVarcov.AnalyzePlink(); + } else { + cVarcov.AnalyzeBimbam(); + } + + cVarcov.CopyToParam(cPar); + } + + // LM. + if (cPar.a_mode == 51 || cPar.a_mode == 52 || cPar.a_mode == 53 || + cPar.a_mode == 54) { // Fit LM + gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph); + gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt); + + // set covariates matrix W and phenotype matrix Y + // an intercept should be included in W, + cPar.CopyCvtPhen(W, Y, 0); + + // Fit LM or mvLM + if (cPar.n_ph == 1) { + LM cLm; + cLm.CopyFromParam(cPar); + + gsl_vector_view Y_col = gsl_matrix_column(Y, 0); + + if (!cPar.file_gene.empty()) { + cLm.AnalyzeGene(W, + &Y_col.vector); // y is the predictor, not the phenotype + } else if (!cPar.file_bfile.empty()) { + cLm.AnalyzePlink(W, &Y_col.vector); + } else if (!cPar.file_oxford.empty()) { + cLm.Analyzebgen(W, &Y_col.vector); + } else { + cLm.AnalyzeBimbam(W, &Y_col.vector); + } + + cLm.WriteFiles(); + cLm.CopyToParam(cPar); + } + /* + else { + MVLM cMvlm; + cMvlm.CopyFromParam(cPar); + + if (!cPar.file_bfile.empty()) { + cMvlm.AnalyzePlink (W, Y); + } else { + cMvlm.AnalyzeBimbam (W, Y); + } + + cMvlm.WriteFiles(); + cMvlm.CopyToParam(cPar); + } + */ + // release all matrices and vectors + gsl_matrix_free(Y); + gsl_matrix_free(W); + } + + // VC estimation with one or multiple kinship matrices + // REML approach only + // if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, + // in param.cpp + // for one phenotype only; + if (cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) { + if (!cPar.file_beta.empty()) { + // need to obtain a common set of SNPs between beta file and the genotype + // file; these are saved in mapRS2wA and mapRS2wK + // normalize the weight in mapRS2wK to have an average of one; each + // element of mapRS2wA is 1 + // update indicator_snps, so that the numbers are in accordance with + // mapRS2wK + set<string> setSnps_beta; + ReadFile_snps_header(cPar.file_beta, setSnps_beta); + + map<string, double> mapRS2wA, mapRS2wK; + cPar.ObtainWeight(setSnps_beta, mapRS2wK); + + cPar.UpdateSNP(mapRS2wK); + + // Setup matrices and vectors. + gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc); + gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc); + gsl_vector *q = gsl_vector_alloc(cPar.n_vc); + gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1); + + gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test); + gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test); + + gsl_vector *y = gsl_vector_alloc(cPar.ni_test); + gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt); + + gsl_matrix_set_zero(K); + gsl_matrix_set_zero(A); + + gsl_matrix_set_zero(S); + gsl_matrix_set_zero(Vq); + gsl_vector_set_zero(q); + gsl_vector_set_zero(s); + + cPar.CopyCvtPhen(W, y, 0); + + gsl_matrix_view S_mat = + gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); + gsl_matrix_view Svar_mat = + gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); + gsl_vector_view s_vec = gsl_vector_subvector(s, 0, cPar.n_vc); + + vector<size_t> vec_cat, vec_ni; + vector<double> vec_weight, vec_z2; + + // read beta, based on the mapRS2wK + ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, + vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, + cPar.ns_test); + + cout << "Study Panel: " << endl; + cout << "## number of total individuals = " << cPar.ni_study << endl; + cout << "## number of total SNPs = " << cPar.ns_study << endl; + cout << "## number of analyzed SNPs = " << cPar.ns_test << endl; + cout << "## number of variance components = " << cPar.n_vc << endl; + + // compute q + Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, + &s_vec.vector); + + // compute S + time_start = clock(); + cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, + &s_vec.vector); + cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + if (cPar.error == true) { + cout << "error! fail to calculate the S matrix. " << endl; + return; + } + + // compute vc estimates + CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, + cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, + cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, + cPar.v_enrich, cPar.v_se_enrich); + + // if LDSC weights, then compute the weights and run the above steps again + if (cPar.a_mode == 62) { + // compute the weights and normalize the weights for A + cPar.UpdateWeight(1, mapRS2wK, cPar.ni_study, &s_vec.vector, mapRS2wA); + + // read beta file again, and update weigths vector + ReadFile_beta(cPar.file_beta, cPar.mapRS2cat, mapRS2wA, vec_cat, vec_ni, + vec_weight, vec_z2, cPar.ni_study, cPar.ns_total, + cPar.ns_test); + + // compute q + Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q, + &s_vec.vector); + + // compute S + time_start = clock(); + cPar.CalcS(mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, + &s_vec.vector); + cPar.time_G += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + if (cPar.error == true) { + cout << "error! fail to calculate the S matrix. " << endl; + return; + } + + // compute vc estimates + CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, &s_vec.vector, + cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, + cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, + cPar.v_enrich, cPar.v_se_enrich); + } + + gsl_vector_set(s, cPar.n_vc, cPar.ni_test); + + cPar.WriteMatrix(S, "S"); + cPar.WriteMatrix(Vq, "Vq"); + cPar.WriteVector(q, "q"); + cPar.WriteVector(s, "size"); + + gsl_matrix_free(S); + gsl_matrix_free(Vq); + gsl_vector_free(q); + gsl_vector_free(s); + + gsl_matrix_free(A); + gsl_matrix_free(K); + gsl_vector_free(y); + gsl_matrix_free(W); + } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) { + if (!cPar.file_study.empty()) { + string sfile = cPar.file_study + ".size.txt"; + CountFileLines(sfile, cPar.n_vc); + } else { + string file_name; + igzstream infile(cPar.file_mstudy.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open mstudy file: " << cPar.file_study + << endl; + return; + } + + safeGetline(infile, file_name); + + infile.clear(); + infile.close(); + + string sfile = file_name + ".size.txt"; + CountFileLines(sfile, cPar.n_vc); + } + + cPar.n_vc = cPar.n_vc - 1; + + gsl_matrix *S = gsl_matrix_alloc(2 * cPar.n_vc, cPar.n_vc); + gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc); + // gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1, + // (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) ); + // gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1, + // (n_lines*(n_lines+1))/2*(n_lines+1) ); + gsl_vector *q = gsl_vector_alloc(cPar.n_vc); + gsl_vector *s_study = gsl_vector_alloc(cPar.n_vc); + gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc); + gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1); + + gsl_matrix_set_zero(S); + gsl_matrix_view S_mat = + gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); + gsl_matrix_view Svar_mat = + gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); + + gsl_matrix_set_zero(Vq); + // gsl_matrix_set_zero(V); + // gsl_matrix_set_zero(Vslope); + gsl_vector_set_zero(q); + gsl_vector_set_zero(s_study); + gsl_vector_set_zero(s_ref); + + if (!cPar.file_study.empty()) { + ReadFile_study(cPar.file_study, Vq, q, s_study, cPar.ni_study); + } else { + ReadFile_mstudy(cPar.file_mstudy, Vq, q, s_study, cPar.ni_study); + } + + if (!cPar.file_ref.empty()) { + ReadFile_ref(cPar.file_ref, &S_mat.matrix, &Svar_mat.matrix, s_ref, + cPar.ni_ref); + } else { + ReadFile_mref(cPar.file_mref, &S_mat.matrix, &Svar_mat.matrix, s_ref, + cPar.ni_ref); + } + + cout << "## number of variance components = " << cPar.n_vc << endl; + cout << "## number of individuals in the sample = " << cPar.ni_study + << endl; + cout << "## number of individuals in the reference = " << cPar.ni_ref + << endl; + + CalcVCss(Vq, &S_mat.matrix, &Svar_mat.matrix, q, s_study, cPar.ni_study, + cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, + cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, + cPar.v_se_enrich); + + gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc); + gsl_vector_memcpy(&s_sub.vector, s_ref); + gsl_vector_set(s, cPar.n_vc, cPar.ni_ref); + + cPar.WriteMatrix(S, "S"); + cPar.WriteMatrix(Vq, "Vq"); + cPar.WriteVector(q, "q"); + cPar.WriteVector(s, "size"); + + gsl_matrix_free(S); + gsl_matrix_free(Vq); + // gsl_matrix_free (V); + // gsl_matrix_free (Vslope); + gsl_vector_free(q); + gsl_vector_free(s_study); + gsl_vector_free(s_ref); + gsl_vector_free(s); + } else { + gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph); + gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt); + gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1 * cPar.n_vc); + + // set covariates matrix W and phenotype matrix Y + // an intercept should be included in W, + cPar.CopyCvtPhen(W, Y, 0); + + // read kinship matrices + if (!(cPar.file_mk).empty()) { + ReadFile_mk(cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, + cPar.k_mode, cPar.error, G); + if (cPar.error == true) { + cout << "error! fail to read kinship/relatedness file. " << endl; + return; + } + + // center matrix G, and obtain v_traceG + double d = 0; + (cPar.v_traceG).clear(); + for (size_t i = 0; i < cPar.n_vc; i++) { + gsl_matrix_view G_sub = + gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1); + CenterMatrix(&G_sub.matrix); + d = 0; + for (size_t j = 0; j < G->size1; j++) { + d += gsl_matrix_get(&G_sub.matrix, j, j); + } + d /= (double)G->size1; + (cPar.v_traceG).push_back(d); + } + } else if (!(cPar.file_kin).empty()) { + ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, + cPar.k_mode, cPar.error, G); + if (cPar.error == true) { + cout << "error! fail to read kinship/relatedness file. " << endl; + return; + } + + // center matrix G + CenterMatrix(G); + + (cPar.v_traceG).clear(); + double d = 0; + for (size_t j = 0; j < G->size1; j++) { + d += gsl_matrix_get(G, j, j); + } + d /= (double)G->size1; + (cPar.v_traceG).push_back(d); + } + /* + //eigen-decomposition and calculate trace_G + cout<<"Start Eigen-Decomposition..."<<endl; + time_start=clock(); + + if (cPar.a_mode==31) { + cPar.trace_G=EigenDecomp (G, U, eval, 1); + } else { + cPar.trace_G=EigenDecomp (G, U, eval, 0); + } + + cPar.trace_G=0.0; + for (size_t i=0; i<eval->size; i++) { + if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} + cPar.trace_G+=gsl_vector_get (eval, i); + } + cPar.trace_G/=(double)eval->size; + + cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); +} else { + ReadFile_eigenU (cPar.file_ku, cPar.error, U); + if (cPar.error==true) {cout<<"error! fail to read the U file. "<<endl; +return;} + + ReadFile_eigenD (cPar.file_kd, cPar.error, eval); + if (cPar.error==true) {cout<<"error! fail to read the D file. "<<endl; +return;} + + cPar.trace_G=0.0; + for (size_t i=0; i<eval->size; i++) { + if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);} + cPar.trace_G+=gsl_vector_get(eval, i); + } + cPar.trace_G/=(double)eval->size; } +*/ + // fit multiple variance components + if (cPar.n_ph == 1) { + // if (cPar.n_vc==1) { + /* + //calculate UtW and Uty + CalcUtX (U, W, UtW); + CalcUtX (U, Y, UtY); + + gsl_vector_view beta=gsl_matrix_row (B, 0); + gsl_vector_view se_beta=gsl_matrix_row (se_B, 0); + gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); + + CalcLambda ('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, + cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); + CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_mle_null, + cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, &se_beta.vector); + + cPar.beta_mle_null.clear(); + cPar.se_beta_mle_null.clear(); + for (size_t i=0; i<B->size2; i++) { + cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i) ); + cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i) ); + } + + CalcLambda ('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, + cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); + CalcLmmVgVeBeta (eval, UtW, &UtY_col.vector, cPar.l_remle_null, + cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, &se_beta.vector); + cPar.beta_remle_null.clear(); + cPar.se_beta_remle_null.clear(); + for (size_t i=0; i<B->size2; i++) { + cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) ); + cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) ); + } + + CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, + cPar.pve_null, cPar.pve_se_null); + cPar.PrintSummary(); + + //calculate and output residuals + if (cPar.a_mode==5) { + gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1); + gsl_vector *Ute_hat=gsl_vector_alloc (Y->size1); + gsl_vector *u_hat=gsl_vector_alloc (Y->size1); + gsl_vector *e_hat=gsl_vector_alloc (Y->size1); + gsl_vector *y_hat=gsl_vector_alloc (Y->size1); + + //obtain Utu and Ute + gsl_vector_memcpy (y_hat, &UtY_col.vector); + gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat); + + double d, u, e; + for (size_t i=0; i<eval->size; i++) { + d=gsl_vector_get (eval, i); + u=cPar.l_remle_null*d/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, + i); + e=1.0/(cPar.l_remle_null*d+1.0)*gsl_vector_get(y_hat, i); + gsl_vector_set (Utu_hat, i, u); + gsl_vector_set (Ute_hat, i, e); + } + + //obtain u and e + gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat); + gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat); + + //output residuals + cPar.WriteVector(u_hat, "residU"); + cPar.WriteVector(e_hat, "residE"); + + gsl_vector_free(u_hat); + gsl_vector_free(e_hat); + gsl_vector_free(y_hat); + } +*/ + // } else { + gsl_vector_view Y_col = gsl_matrix_column(Y, 0); + VC cVc; + cVc.CopyFromParam(cPar); + if (cPar.a_mode == 61) { + cVc.CalcVChe(G, W, &Y_col.vector); + } else if (cPar.a_mode == 62) { + cVc.CalcVCreml(cPar.noconstrain, G, W, &Y_col.vector); + } else { + cVc.CalcVCacl(G, W, &Y_col.vector); + } + cVc.CopyToParam(cPar); + // obtain pve from sigma2 + // obtain se_pve from se_sigma2 + + //} + } + } + } + + // compute confidence intervals with additional summary statistics + // we do not check the sign of z-scores here, but they have to be matched with + // the genotypes + if (cPar.a_mode == 66 || cPar.a_mode == 67) { + // read reference file first + gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc); + gsl_matrix *Svar = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc); + gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc); + + gsl_matrix_set_zero(S); + gsl_matrix_set_zero(Svar); + gsl_vector_set_zero(s_ref); + + if (!cPar.file_ref.empty()) { + ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref); + } else { + ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref); + } + + // need to obtain a common set of SNPs between beta file and the genotype + // file; these are saved in mapRS2wA and mapRS2wK + // normalize the weight in mapRS2wK to have an average of one; each element + // of mapRS2wA is 1 + set<string> setSnps_beta; + ReadFile_snps_header(cPar.file_beta, setSnps_beta); + + // obtain the weights for wA, which contains the SNP weights for SNPs used + // in the model + map<string, double> mapRS2wK; + cPar.ObtainWeight(setSnps_beta, mapRS2wK); + + // set up matrices and vector + gsl_matrix *Xz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc); + gsl_matrix *XWz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc); + gsl_matrix *XtXWz = + gsl_matrix_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc); + gsl_vector *w = gsl_vector_alloc(mapRS2wK.size()); + gsl_vector *w1 = gsl_vector_alloc(mapRS2wK.size()); + gsl_vector *z = gsl_vector_alloc(mapRS2wK.size()); + gsl_vector *s_vec = gsl_vector_alloc(cPar.n_vc); + + vector<size_t> vec_cat, vec_size; + vector<double> vec_z; + + map<string, double> mapRS2z, mapRS2wA; + map<string, string> mapRS2A1; + string file_str; + + // update s_vec, the number of snps in each category + for (size_t i = 0; i < cPar.n_vc; i++) { + vec_size.push_back(0); + } + + for (map<string, double>::const_iterator it = mapRS2wK.begin(); + it != mapRS2wK.end(); ++it) { + vec_size[cPar.mapRS2cat[it->first]]++; + } + + for (size_t i = 0; i < cPar.n_vc; i++) { + gsl_vector_set(s_vec, i, vec_size[i]); + } + + // update mapRS2wA using v_pve and s_vec + if (cPar.a_mode == 66) { + for (map<string, double>::const_iterator it = mapRS2wK.begin(); + it != mapRS2wK.end(); ++it) { + mapRS2wA[it->first] = 1; + } + } else { + cPar.UpdateWeight(0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA); + } + + // read in z-scores based on allele 0, and save that into a vector + ReadFile_beta(cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z); + + // update snp indicator, save weights to w, save z-scores to vec_z, save + // category label to vec_cat + // sign of z is determined by matching alleles + cPar.UpdateSNPnZ(mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat); + + // compute an n by k matrix of X_iWz + cout << "Calculating Xz ... " << endl; + + gsl_matrix_set_zero(Xz); + gsl_vector_set_all(w1, 1); + + if (!cPar.file_bfile.empty()) { + file_str = cPar.file_bfile + ".bed"; + PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, + vec_cat, w1, z, 0, Xz); + } else if (!cPar.file_geno.empty()) { + BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv, + cPar.indicator_snp, vec_cat, w1, z, 0, Xz); + } else if (!cPar.file_mbfile.empty()) { + MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, + cPar.mindicator_snp, vec_cat, w1, z, Xz); + } else if (!cPar.file_mgeno.empty()) { + MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, + cPar.mindicator_snp, vec_cat, w1, z, Xz); + } + /* + cout<<"Xz: "<<endl; + for (size_t i=0; i<5; i++) { + for (size_t j=0; j<cPar.n_vc; j++) { + cout<<gsl_matrix_get (Xz, i, j)<<" "; + } + cout<<endl; + } + */ + if (cPar.a_mode == 66) { + gsl_matrix_memcpy(XWz, Xz); + } else if (cPar.a_mode == 67) { + cout << "Calculating XWz ... " << endl; + + gsl_matrix_set_zero(XWz); + + if (!cPar.file_bfile.empty()) { + file_str = cPar.file_bfile + ".bed"; + PlinkXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, + vec_cat, w, z, 0, XWz); + } else if (!cPar.file_geno.empty()) { + BimbamXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv, + cPar.indicator_snp, vec_cat, w, z, 0, XWz); + } else if (!cPar.file_mbfile.empty()) { + MFILEXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, + cPar.mindicator_snp, vec_cat, w, z, XWz); + } else if (!cPar.file_mgeno.empty()) { + MFILEXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, + cPar.mindicator_snp, vec_cat, w, z, XWz); + } + } + /* + cout<<"XWz: "<<endl; + for (size_t i=0; i<5; i++) { + cout<<gsl_vector_get (w, i)<<endl; + for (size_t j=0; j<cPar.n_vc; j++) { + cout<<gsl_matrix_get (XWz, i, j)<<" "; + } + cout<<endl; + } + */ + // compute an p by k matrix of X_j^TWX_iWz + cout << "Calculating XtXWz ... " << endl; + gsl_matrix_set_zero(XtXWz); + + if (!cPar.file_bfile.empty()) { + file_str = cPar.file_bfile + ".bed"; + PlinkXtXwz(file_str, cPar.d_pace, cPar.indicator_idv, cPar.indicator_snp, + XWz, 0, XtXWz); + } else if (!cPar.file_geno.empty()) { + BimbamXtXwz(cPar.file_geno, cPar.d_pace, cPar.indicator_idv, + cPar.indicator_snp, XWz, 0, XtXWz); + } else if (!cPar.file_mbfile.empty()) { + MFILEXtXwz(1, cPar.file_mbfile, cPar.d_pace, cPar.indicator_idv, + cPar.mindicator_snp, XWz, XtXWz); + } else if (!cPar.file_mgeno.empty()) { + MFILEXtXwz(0, cPar.file_mgeno, cPar.d_pace, cPar.indicator_idv, + cPar.mindicator_snp, XWz, XtXWz); + } + /* + cout<<"XtXWz: "<<endl; + for (size_t i=0; i<5; i++) { + for (size_t j=0; j<cPar.n_vc; j++) { + cout<<gsl_matrix_get (XtXWz, i, j)<<" "; + } + cout<<endl; + } + */ + // compute confidence intervals + CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve, + cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, + cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); + + // write files + // cPar.WriteMatrix (XWz, "XWz"); + // cPar.WriteMatrix (XtXWz, "XtXWz"); + // cPar.WriteVector (w, "w"); + + gsl_matrix_free(S); + gsl_matrix_free(Svar); + gsl_vector_free(s_ref); + + gsl_matrix_free(Xz); + gsl_matrix_free(XWz); + gsl_matrix_free(XtXWz); + gsl_vector_free(w); + gsl_vector_free(w1); + gsl_vector_free(z); + gsl_vector_free(s_vec); + } + + // LMM or mvLMM or Eigen-Decomposition + if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 || + cPar.a_mode == 4 || cPar.a_mode == 5 || + cPar.a_mode == 31) { // Fit LMM or mvLMM or eigen + gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph); + gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt); + gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c + // matrix + gsl_matrix *se_B = gsl_matrix_alloc(Y->size2, W->size2); + gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1); + gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1); + gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2); + gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2); + gsl_vector *eval = gsl_vector_alloc(Y->size1); + gsl_vector *env = gsl_vector_alloc(Y->size1); + gsl_vector *weight = gsl_vector_alloc(Y->size1); + + // set covariates matrix W and phenotype matrix Y + // an intercept should be included in W, + cPar.CopyCvtPhen(W, Y, 0); + if (!cPar.file_gxe.empty()) { + cPar.CopyGxe(env); + } + + // read relatedness matrix G + if (!(cPar.file_kin).empty()) { + ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, + cPar.k_mode, cPar.error, G); + if (cPar.error == true) { + cout << "error! fail to read kinship/relatedness file. " << endl; + return; + } + + // center matrix G + CenterMatrix(G); + + // is residual weights are provided, then + if (!cPar.file_weight.empty()) { + cPar.CopyWeight(weight); + double d, wi, wj; + for (size_t i = 0; i < G->size1; i++) { + wi = gsl_vector_get(weight, i); + for (size_t j = i; j < G->size2; j++) { + wj = gsl_vector_get(weight, j); + d = gsl_matrix_get(G, i, j); + if (wi <= 0 || wj <= 0) { + d = 0; + } else { + d /= sqrt(wi * wj); + } + gsl_matrix_set(G, i, j, d); + if (j != i) { + gsl_matrix_set(G, j, i, d); + } + } + } + } + + // eigen-decomposition and calculate trace_G + cout << "Start Eigen-Decomposition..." << endl; + time_start = clock(); + + if (cPar.a_mode == 31) { + cPar.trace_G = EigenDecomp(G, U, eval, 1); + } else { + cPar.trace_G = EigenDecomp(G, U, eval, 0); + } + + if (!cPar.file_weight.empty()) { + double wi; + for (size_t i = 0; i < U->size1; i++) { + wi = gsl_vector_get(weight, i); + if (wi <= 0) { + wi = 0; + } else { + wi = sqrt(wi); + } + gsl_vector_view Urow = gsl_matrix_row(U, i); + gsl_vector_scale(&Urow.vector, wi); + } + } + + cPar.trace_G = 0.0; + for (size_t i = 0; i < eval->size; i++) { + if (gsl_vector_get(eval, i) < 1e-10) { + gsl_vector_set(eval, i, 0); + } + cPar.trace_G += gsl_vector_get(eval, i); + } + cPar.trace_G /= (double)eval->size; + + cPar.time_eigen = + (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } else { + ReadFile_eigenU(cPar.file_ku, cPar.error, U); + if (cPar.error == true) { + cout << "error! fail to read the U file. " << endl; + return; + } + + ReadFile_eigenD(cPar.file_kd, cPar.error, eval); + if (cPar.error == true) { + cout << "error! fail to read the D file. " << endl; + return; + } + + cPar.trace_G = 0.0; + for (size_t i = 0; i < eval->size; i++) { + if (gsl_vector_get(eval, i) < 1e-10) { + gsl_vector_set(eval, i, 0); + } + cPar.trace_G += gsl_vector_get(eval, i); + } + cPar.trace_G /= (double)eval->size; + } + + if (cPar.a_mode == 31) { + cPar.WriteMatrix(U, "eigenU"); + cPar.WriteVector(eval, "eigenD"); + } else if (!cPar.file_gene.empty()) { + // calculate UtW and Uty + CalcUtX(U, W, UtW); + CalcUtX(U, Y, UtY); + + LMM cLmm; + cLmm.CopyFromParam(cPar); + + gsl_vector_view Y_col = gsl_matrix_column(Y, 0); + gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); + + cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector); // y is the predictor, not the phenotype + + cLmm.WriteFiles(); + cLmm.CopyToParam(cPar); + } else { + // calculate UtW and Uty + CalcUtX(U, W, UtW); + CalcUtX(U, Y, UtY); + + // calculate REMLE/MLE estimate and pve for univariate model + if (cPar.n_ph == 1) { + gsl_vector_view beta = gsl_matrix_row(B, 0); + gsl_vector_view se_beta = gsl_matrix_row(se_B, 0); + gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); + + CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, + cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); + CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null, + cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector, + &se_beta.vector); + + cPar.beta_mle_null.clear(); + cPar.se_beta_mle_null.clear(); + for (size_t i = 0; i < B->size2; i++) { + cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i)); + cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i)); + } + + CalcLambda('R', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max, + cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); + CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_remle_null, + cPar.vg_remle_null, cPar.ve_remle_null, &beta.vector, + &se_beta.vector); + cPar.beta_remle_null.clear(); + cPar.se_beta_remle_null.clear(); + for (size_t i = 0; i < B->size2; i++) { + cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i)); + cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i)); + } + + CalcPve(eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, + cPar.pve_null, cPar.pve_se_null); + cPar.PrintSummary(); + + // calculate and output residuals + if (cPar.a_mode == 5) { + gsl_vector *Utu_hat = gsl_vector_alloc(Y->size1); + gsl_vector *Ute_hat = gsl_vector_alloc(Y->size1); + gsl_vector *u_hat = gsl_vector_alloc(Y->size1); + gsl_vector *e_hat = gsl_vector_alloc(Y->size1); + gsl_vector *y_hat = gsl_vector_alloc(Y->size1); + + // obtain Utu and Ute + gsl_vector_memcpy(y_hat, &UtY_col.vector); + gsl_blas_dgemv(CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat); + + double d, u, e; + for (size_t i = 0; i < eval->size; i++) { + d = gsl_vector_get(eval, i); + u = cPar.l_remle_null * d / (cPar.l_remle_null * d + 1.0) * + gsl_vector_get(y_hat, i); + e = 1.0 / (cPar.l_remle_null * d + 1.0) * gsl_vector_get(y_hat, i); + gsl_vector_set(Utu_hat, i, u); + gsl_vector_set(Ute_hat, i, e); + } + + // obtain u and e + gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat); + gsl_blas_dgemv(CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat); + + // output residuals + cPar.WriteVector(u_hat, "residU"); + cPar.WriteVector(e_hat, "residE"); + + gsl_vector_free(u_hat); + gsl_vector_free(e_hat); + gsl_vector_free(y_hat); + } + } + + // Fit LMM or mvLMM + if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 || + cPar.a_mode == 4) { + if (cPar.n_ph == 1) { + LMM cLmm; + cLmm.CopyFromParam(cPar); + + gsl_vector_view Y_col = gsl_matrix_column(Y, 0); + gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); + + if (!cPar.file_bfile.empty()) { + if (cPar.file_gxe.empty()) { + cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector); + } else { + cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, env); + } + } + // WJA added + else if (!cPar.file_oxford.empty()) { + cLmm.Analyzebgen(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); + } else { + if (cPar.file_gxe.empty()) { + cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector); + } else { + cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, env); + } + } + + cLmm.WriteFiles(); + cLmm.CopyToParam(cPar); + } else { + MVLMM cMvlmm; + cMvlmm.CopyFromParam(cPar); + + if (!cPar.file_bfile.empty()) { + if (cPar.file_gxe.empty()) { + cMvlmm.AnalyzePlink(U, eval, UtW, UtY); + } else { + cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env); + } + } else if (!cPar.file_oxford.empty()) { + cMvlmm.Analyzebgen(U, eval, UtW, UtY); + } else { + if (cPar.file_gxe.empty()) { + cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY); + } else { + cMvlmm.AnalyzeBimbamGXE(U, eval, UtW, UtY, env); + } + } + + cMvlmm.WriteFiles(); + cMvlmm.CopyToParam(cPar); + } + } + } + + // release all matrices and vectors + gsl_matrix_free(Y); + gsl_matrix_free(W); + gsl_matrix_free(B); + gsl_matrix_free(se_B); + gsl_matrix_free(G); + gsl_matrix_free(U); + gsl_matrix_free(UtW); + gsl_matrix_free(UtY); + gsl_vector_free(eval); + gsl_vector_free(env); + } + + // BSLMM + if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) { + gsl_vector *y = gsl_vector_alloc(cPar.ni_test); + gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt); + gsl_matrix *G = gsl_matrix_alloc(y->size, y->size); + gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test); + + // set covariates matrix W and phenotype vector y + // an intercept should be included in W, + cPar.CopyCvtPhen(W, y, 0); + + // center y, even for case/control data + cPar.pheno_mean = CenterVector(y); + + // run bvsr if rho==1 + if (cPar.rho_min == 1 && cPar.rho_max == 1) { + // read genotypes X (not UtX) + cPar.ReadGenotypes(UtX, G, false); + + // perform BSLMM analysis + BSLMM cBslmm; + cBslmm.CopyFromParam(cPar); + time_start = clock(); + cBslmm.MCMC(UtX, y); + cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + cBslmm.CopyToParam(cPar); + // else, if rho!=1 + } else { + gsl_matrix *U = gsl_matrix_alloc(y->size, y->size); + gsl_vector *eval = gsl_vector_alloc(y->size); + gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2); + gsl_vector *Uty = gsl_vector_alloc(y->size); + + // read relatedness matrix G + if (!(cPar.file_kin).empty()) { + cPar.ReadGenotypes(UtX, G, false); + + // read relatedness matrix G + ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, + cPar.k_mode, cPar.error, G); + if (cPar.error == true) { + cout << "error! fail to read kinship/relatedness file. " << endl; + return; + } + + // center matrix G + CenterMatrix(G); + } else { + cPar.ReadGenotypes(UtX, G, true); + } + + // eigen-decomposition and calculate trace_G + cout << "Start Eigen-Decomposition..." << endl; + time_start = clock(); + cPar.trace_G = EigenDecomp(G, U, eval, 0); + cPar.trace_G = 0.0; + for (size_t i = 0; i < eval->size; i++) { + if (gsl_vector_get(eval, i) < 1e-10) { + gsl_vector_set(eval, i, 0); + } + cPar.trace_G += gsl_vector_get(eval, i); + } + cPar.trace_G /= (double)eval->size; + cPar.time_eigen = + (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // calculate UtW and Uty + CalcUtX(U, W, UtW); + CalcUtX(U, y, Uty); + + // calculate REMLE/MLE estimate and pve + CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, + cPar.l_mle_null, cPar.logl_mle_H0); + CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, + cPar.l_remle_null, cPar.logl_remle_H0); + CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, + cPar.pve_se_null); + + cPar.PrintSummary(); + + // Creat and calcualte UtX, use a large memory + cout << "Calculating UtX..." << endl; + time_start = clock(); + CalcUtX(U, UtX); + cPar.time_UtX = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // perform BSLMM or BSLMMDAP analysis + if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) { + BSLMM cBslmm; + cBslmm.CopyFromParam(cPar); + time_start = clock(); + if (cPar.a_mode == 12) { // ridge regression + cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null); + } else { // Run MCMC + cBslmm.MCMC(U, UtX, Uty, eval, y); + } + cPar.time_opt = + (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + cBslmm.CopyToParam(cPar); + } else { + } + + // release all matrices and vectors + gsl_matrix_free(G); + gsl_matrix_free(U); + gsl_matrix_free(UtW); + gsl_vector_free(eval); + gsl_vector_free(Uty); + } + gsl_matrix_free(W); + gsl_vector_free(y); + gsl_matrix_free(UtX); + } + + // BSLMM-DAP + if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) { + if (cPar.a_mode == 14) { + gsl_vector *y = gsl_vector_alloc(cPar.ni_test); + gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt); + gsl_matrix *G = gsl_matrix_alloc(y->size, y->size); + gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test); + + // set covariates matrix W and phenotype vector y + // an intercept should be included in W, + cPar.CopyCvtPhen(W, y, 0); + + // center y, even for case/control data + cPar.pheno_mean = CenterVector(y); + + // run bvsr if rho==1 + if (cPar.rho_min == 1 && cPar.rho_max == 1) { + // read genotypes X (not UtX) + cPar.ReadGenotypes(UtX, G, false); + + // perform BSLMM analysis + BSLMM cBslmm; + cBslmm.CopyFromParam(cPar); + time_start = clock(); + cBslmm.MCMC(UtX, y); + cPar.time_opt = + (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + cBslmm.CopyToParam(cPar); + // else, if rho!=1 + } else { + gsl_matrix *U = gsl_matrix_alloc(y->size, y->size); + gsl_vector *eval = gsl_vector_alloc(y->size); + gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2); + gsl_vector *Uty = gsl_vector_alloc(y->size); + + // read relatedness matrix G + if (!(cPar.file_kin).empty()) { + cPar.ReadGenotypes(UtX, G, false); + + // read relatedness matrix G + ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, + cPar.k_mode, cPar.error, G); + if (cPar.error == true) { + cout << "error! fail to read kinship/relatedness file. " << endl; + return; + } + + // center matrix G + CenterMatrix(G); + } else { + cPar.ReadGenotypes(UtX, G, true); + } + + // eigen-decomposition and calculate trace_G + cout << "Start Eigen-Decomposition..." << endl; + time_start = clock(); + cPar.trace_G = EigenDecomp(G, U, eval, 0); + cPar.trace_G = 0.0; + for (size_t i = 0; i < eval->size; i++) { + if (gsl_vector_get(eval, i) < 1e-10) { + gsl_vector_set(eval, i, 0); + } + cPar.trace_G += gsl_vector_get(eval, i); + } + cPar.trace_G /= (double)eval->size; + cPar.time_eigen = + (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // calculate UtW and Uty + CalcUtX(U, W, UtW); + CalcUtX(U, y, Uty); + + // calculate REMLE/MLE estimate and pve + CalcLambda('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, + cPar.l_mle_null, cPar.logl_mle_H0); + CalcLambda('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, + cPar.l_remle_null, cPar.logl_remle_H0); + CalcPve(eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, + cPar.pve_se_null); + + cPar.PrintSummary(); + + // Creat and calcualte UtX, use a large memory + cout << "Calculating UtX..." << endl; + time_start = clock(); + CalcUtX(U, UtX); + cPar.time_UtX = + (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // perform analysis; assume X and y are already centered + BSLMMDAP cBslmmDap; + cBslmmDap.CopyFromParam(cPar); + time_start = clock(); + cBslmmDap.DAP_CalcBF(U, UtX, Uty, eval, y); + cPar.time_opt = + (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + cBslmmDap.CopyToParam(cPar); + + // release all matrices and vectors + gsl_matrix_free(G); + gsl_matrix_free(U); + gsl_matrix_free(UtW); + gsl_vector_free(eval); + gsl_vector_free(Uty); + } + + gsl_matrix_free(W); + gsl_vector_free(y); + gsl_matrix_free(UtX); + } else if (cPar.a_mode == 15) { + // perform EM algorithm and estimate parameters + vector<string> vec_rs; + vector<double> vec_sa2, vec_sb2, wab; + vector<vector<vector<double>>> BF; + + // read hyp and bf files (functions defined in BSLMMDAP) + ReadFile_hyb(cPar.file_hyp, vec_sa2, vec_sb2, wab); + ReadFile_bf(cPar.file_bf, vec_rs, BF); + + cPar.ns_test = vec_rs.size(); + if (wab.size() != BF[0][0].size()) { + cout << "error! hyp and bf files dimension do not match" << endl; + } + + // load annotations + gsl_matrix *Ac; + gsl_matrix_int *Ad; + gsl_vector_int *dlevel; + size_t kc, kd; + if (!cPar.file_cat.empty()) { + ReadFile_cat(cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd); + } else { + kc = 0; + kd = 0; + } + + cout << "## number of blocks = " << BF.size() << endl; + cout << "## number of analyzed SNPs = " << vec_rs.size() << endl; + cout << "## grid size for hyperparameters = " << wab.size() << endl; + cout << "## number of continuous annotations = " << kc << endl; + cout << "## number of discrete annotations = " << kd << endl; + + // DAP_EstimateHyper (const size_t kc, const size_t kd, const + // vector<string> &vec_rs, const vector<double> &vec_sa2, const + // vector<double> &vec_sb2, const vector<double> &wab, const + // vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int + // *Ad, gsl_vector_int *dlevel); + + // perform analysis + BSLMMDAP cBslmmDap; + cBslmmDap.CopyFromParam(cPar); + time_start = clock(); + cBslmmDap.DAP_EstimateHyper(kc, kd, vec_rs, vec_sa2, vec_sb2, wab, BF, Ac, + Ad, dlevel); + cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + cBslmmDap.CopyToParam(cPar); + + gsl_matrix_free(Ac); + gsl_matrix_int_free(Ad); + gsl_vector_int_free(dlevel); + } else { + // + } + } + + /* + //LDR (change 14 to 16?) + if (cPar.a_mode==14) { + gsl_vector *y=gsl_vector_alloc (cPar.ni_test); + gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); + gsl_matrix *G=gsl_matrix_alloc (1, 1); + vector<vector<unsigned char> > Xt; + + //set covariates matrix W and phenotype vector y + //an intercept is included in W + cPar.CopyCvtPhen (W, y, 0); + //read in genotype matrix X + cPar.ReadGenotypes (Xt, G, false); + LDR cLdr; + cLdr.CopyFromParam(cPar); + time_start=clock(); + cLdr.VB(Xt, W, y); -void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar) -{ - string file_str; - file_str=cPar.path_out+"/"+cPar.file_out; - file_str+=".log.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing log file: "<<file_str.c_str()<<endl; return;} - - outfile<<"##"<<endl; - outfile<<"## GEMMA Version = "<<version<<endl; - - outfile<<"##"<<endl; - outfile<<"## Command Line Input = "; - for(int i = 0; i < argc; i++) { - outfile<<argv[i]<<" "; - } - outfile<<endl; - - outfile<<"##"<<endl; - time_t rawtime; - time(&rawtime); - tm *ptm = localtime (&rawtime); - - outfile<<"## Date = "<<asctime(ptm); - //ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl; - - outfile<<"##"<<endl; - outfile<<"## Summary Statistics:"<<endl; - if (!cPar.file_cor.empty() || !cPar.file_study.empty() || !cPar.file_mstudy.empty() ) { - outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl; - outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl; - //outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl; - //outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_ref<<endl; - //outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; - //outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl; - outfile<<"## number of variance components = "<<cPar.n_vc<<endl; - - outfile<<"## pve estimates = "; - for (size_t i=0; i<cPar.v_pve.size(); i++) { - outfile<<" "<<cPar.v_pve[i]; - } - outfile<<endl; - - outfile<<"## se(pve) = "; - for (size_t i=0; i<cPar.v_se_pve.size(); i++) { - outfile<<" "<<cPar.v_se_pve[i]; - } - outfile<<endl; - - if (cPar.n_vc>1) { - outfile<<"## total pve = "<<cPar.pve_total<<endl; - outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl; - } - - outfile<<"## sigma2 per snp = "; - for (size_t i=0; i<cPar.v_sigma2.size(); i++) { - outfile<<" "<<cPar.v_sigma2[i]; - } - outfile<<endl; - - outfile<<"## se(sigma2 per snp) = "; - for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) { - outfile<<" "<<cPar.v_se_sigma2[i]; - } - outfile<<endl; - - outfile<<"## enrichment = "; - for (size_t i=0; i<cPar.v_enrich.size(); i++) { - outfile<<" "<<cPar.v_enrich[i]; - } - outfile<<endl; - - outfile<<"## se(enrichment) = "; - for (size_t i=0; i<cPar.v_se_enrich.size(); i++) { - outfile<<" "<<cPar.v_se_enrich[i]; - } - outfile<<endl; - } else if (!cPar.file_beta.empty() && (cPar.a_mode==61 || cPar.a_mode==62) ) { - outfile<<"## number of total individuals in the sample = "<<cPar.ni_study<<endl; - outfile<<"## number of total individuals in the reference = "<<cPar.ni_total<<endl; - outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl; - outfile<<"## number of total SNPs in the reference panel = "<<cPar.ns_total<<endl; - outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; - outfile<<"## number of variance components = "<<cPar.n_vc<<endl; - } else if (!cPar.file_beta.empty() && (cPar.a_mode==66 || cPar.a_mode==67) ) { - outfile<<"## number of total individuals in the sample = "<<cPar.ni_total<<endl; - outfile<<"## number of total individuals in the reference = "<<cPar.ni_ref<<endl; - outfile<<"## number of total SNPs in the sample = "<<cPar.ns_total<<endl; - outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; - outfile<<"## number of variance components = "<<cPar.n_vc<<endl; - - outfile<<"## pve estimates = "; - for (size_t i=0; i<cPar.v_pve.size(); i++) { - outfile<<" "<<cPar.v_pve[i]; - } - outfile<<endl; - - outfile<<"## se(pve) = "; - for (size_t i=0; i<cPar.v_se_pve.size(); i++) { - outfile<<" "<<cPar.v_se_pve[i]; - } - outfile<<endl; - - if (cPar.n_vc>1) { - outfile<<"## total pve = "<<cPar.pve_total<<endl; - outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl; - } - - outfile<<"## sigma2 per snp = "; - for (size_t i=0; i<cPar.v_sigma2.size(); i++) { - outfile<<" "<<cPar.v_sigma2[i]; - } - outfile<<endl; - - outfile<<"## se(sigma2 per snp) = "; - for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) { - outfile<<" "<<cPar.v_se_sigma2[i]; - } - outfile<<endl; - - outfile<<"## enrichment = "; - for (size_t i=0; i<cPar.v_enrich.size(); i++) { - outfile<<" "<<cPar.v_enrich[i]; - } - outfile<<endl; - - outfile<<"## se(enrichment) = "; - for (size_t i=0; i<cPar.v_se_enrich.size(); i++) { - outfile<<" "<<cPar.v_se_enrich[i]; - } - outfile<<endl; - } else { - outfile<<"## number of total individuals = "<<cPar.ni_total<<endl; - - if (cPar.a_mode==43) { - outfile<<"## number of analyzed individuals = "<<cPar.ni_cvt<<endl; - outfile<<"## number of individuals with full phenotypes = "<<cPar.ni_test<<endl; - } else if (cPar.a_mode!=27 && cPar.a_mode!=28) { - outfile<<"## number of analyzed individuals = "<<cPar.ni_test<<endl; - } - - outfile<<"## number of covariates = "<<cPar.n_cvt<<endl; - outfile<<"## number of phenotypes = "<<cPar.n_ph<<endl; - if (cPar.a_mode==43) { - outfile<<"## number of observed data = "<<cPar.np_obs<<endl; - outfile<<"## number of missing data = "<<cPar.np_miss<<endl; - } - if (cPar.a_mode==25 || cPar.a_mode==26 || cPar.a_mode==27 || cPar.a_mode==28 || cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63 || cPar.a_mode==66 || cPar.a_mode==67) { - outfile<<"## number of variance components = "<<cPar.n_vc<<endl; - } - - if (!(cPar.file_gene).empty()) { - outfile<<"## number of total genes = "<<cPar.ng_total<<endl; - outfile<<"## number of analyzed genes = "<<cPar.ng_test<<endl; - } else if (cPar.file_epm.empty()) { - outfile<<"## number of total SNPs = "<<cPar.ns_total<<endl; - outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; - } else { - outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; - } - - if (cPar.a_mode==13) { - outfile<<"## number of cases = "<<cPar.ni_case<<endl; - outfile<<"## number of controls = "<<cPar.ni_control<<endl; - } - } - - if ( (cPar.a_mode==61 || cPar.a_mode==62 || cPar.a_mode==63) && cPar.file_cor.empty() && cPar.file_study.empty() && cPar.file_mstudy.empty() ) { - // outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl; - if (cPar.n_ph==1) { - outfile<<"## pve estimates = "; - for (size_t i=0; i<cPar.v_pve.size(); i++) { - outfile<<" "<<cPar.v_pve[i]; - } - outfile<<endl; - - outfile<<"## se(pve) = "; - for (size_t i=0; i<cPar.v_se_pve.size(); i++) { - outfile<<" "<<cPar.v_se_pve[i]; - } - outfile<<endl; - - if (cPar.n_vc>1) { - outfile<<"## total pve = "<<cPar.pve_total<<endl; - outfile<<"## se(total pve) = "<<cPar.se_pve_total<<endl; - } - - outfile<<"## sigma2 estimates = "; - for (size_t i=0; i<cPar.v_sigma2.size(); i++) { - outfile<<" "<<cPar.v_sigma2[i]; - } - outfile<<endl; - - outfile<<"## se(sigma2) = "; - for (size_t i=0; i<cPar.v_se_sigma2.size(); i++) { - outfile<<" "<<cPar.v_se_sigma2[i]; - } - outfile<<endl; - - if (!cPar.file_beta.empty() ) { - outfile<<"## enrichment = "; - for (size_t i=0; i<cPar.v_enrich.size(); i++) { - outfile<<" "<<cPar.v_enrich[i]; - } - outfile<<endl; - - outfile<<"## se(enrichment) = "; - for (size_t i=0; i<cPar.v_se_enrich.size(); i++) { - outfile<<" "<<cPar.v_se_enrich[i]; - } - outfile<<endl; - } - /* - outfile<<"## beta estimate in the null model = "; - for (size_t i=0; i<cPar.beta_remle_null.size(); i++) { - outfile<<" "<<cPar.beta_remle_null[i]; - } - outfile<<endl; - outfile<<"## se(beta) = "; - for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) { - outfile<<" "<<cPar.se_beta_remle_null[i]; - } - outfile<<endl; - */ - } - } - - if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { - outfile<<"## REMLE log-likelihood in the null model = "<<cPar.logl_remle_H0<<endl; - outfile<<"## MLE log-likelihood in the null model = "<<cPar.logl_mle_H0<<endl; - if (cPar.n_ph==1) { - //outfile<<"## lambda REMLE estimate in the null (linear mixed) model = "<<cPar.l_remle_null<<endl; - //outfile<<"## lambda MLE estimate in the null (linear mixed) model = "<<cPar.l_mle_null<<endl; - outfile<<"## pve estimate in the null model = "<<cPar.pve_null<<endl; - outfile<<"## se(pve) in the null model = "<<cPar.pve_se_null<<endl; - outfile<<"## vg estimate in the null model = "<<cPar.vg_remle_null<<endl; - outfile<<"## ve estimate in the null model = "<<cPar.ve_remle_null<<endl; - outfile<<"## beta estimate in the null model = "; - for (size_t i=0; i<cPar.beta_remle_null.size(); i++) { - outfile<<" "<<cPar.beta_remle_null[i]; - } - outfile<<endl; - outfile<<"## se(beta) = "; - for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) { - outfile<<" "<<cPar.se_beta_remle_null[i]; - } - outfile<<endl; - - } else { - size_t c; - outfile<<"## REMLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<=i; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<cPar.Vg_remle_null[c]<<"\t"; - } - outfile<<endl; - } - outfile<<"## se(Vg): "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<=i; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<sqrt(cPar.VVg_remle_null[c])<<"\t"; - } - outfile<<endl; - } - outfile<<"## REMLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<=i; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<cPar.Ve_remle_null[c]<<"\t"; - } - outfile<<endl; - } - outfile<<"## se(Ve): "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<=i; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<sqrt(cPar.VVe_remle_null[c])<<"\t"; - } - outfile<<endl; - } - - outfile<<"## MLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<cPar.n_ph; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<cPar.Vg_mle_null[c]<<"\t"; - } - outfile<<endl; - } - outfile<<"## se(Vg): "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<=i; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<sqrt(cPar.VVg_mle_null[c])<<"\t"; - } - outfile<<endl; - } - outfile<<"## MLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<cPar.n_ph; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<cPar.Ve_mle_null[c]<<"\t"; - } - outfile<<endl; - } - outfile<<"## se(Ve): "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<=i; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<sqrt(cPar.VVe_mle_null[c])<<"\t"; - } - outfile<<endl; - } - outfile<<"## estimate for B (d by c) in the null model (columns correspond to the covariates provided in the file): "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<cPar.n_cvt; j++) { - c=i*cPar.n_cvt+j; - outfile<<cPar.beta_remle_null[c]<<"\t"; - } - outfile<<endl; - } - outfile<<"## se(B): "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<cPar.n_cvt; j++) { - c=i*cPar.n_cvt+j; - outfile<<cPar.se_beta_remle_null[c]<<"\t"; - } - outfile<<endl; - } - } - } - - /* - if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { - if (cPar.n_ph==1) { - outfile<<"## REMLE vg estimate in the null model = "<<cPar.vg_remle_null<<endl; - outfile<<"## REMLE ve estimate in the null model = "<<cPar.ve_remle_null<<endl; - } else { - size_t c; - outfile<<"## REMLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<=i; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<cPar.Vg_remle_null[c]<<"\t"; - } - outfile<<endl; - } - outfile<<"## REMLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<cPar.n_ph; i++) { - for (size_t j=0; j<=i; j++) { - c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); - outfile<<cPar.Ve_remle_null[c]<<"\t"; - } - outfile<<endl; - } - } - } - */ - - - if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) { - outfile<<"## estimated mean = "<<cPar.pheno_mean<<endl; - } - - if (cPar.a_mode==11 || cPar.a_mode==13) { - outfile<<"##"<<endl; - outfile<<"## MCMC related:"<<endl; - outfile<<"## initial value of h = "<<cPar.cHyp_initial.h<<endl; - outfile<<"## initial value of rho = "<<cPar.cHyp_initial.rho<<endl; - outfile<<"## initial value of pi = "<<exp(cPar.cHyp_initial.logp)<<endl; - outfile<<"## initial value of |gamma| = "<<cPar.cHyp_initial.n_gamma<<endl; - outfile<<"## random seed = "<<cPar.randseed<<endl; - outfile<<"## acceptance ratio = "<<(double)cPar.n_accept/(double)((cPar.w_step+cPar.s_step)*cPar.n_mh)<<endl; - } - - outfile<<"##"<<endl; - outfile<<"## Computation Time:"<<endl; - outfile<<"## total computation time = "<<cPar.time_total<<" min "<<endl; - outfile<<"## computation time break down: "<<endl; - if (cPar.a_mode==21 || cPar.a_mode==22 || cPar.a_mode==11 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) { - outfile<<"## time on calculating relatedness matrix = "<<cPar.time_G<<" min "<<endl; - } - if (cPar.a_mode==31) { - outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl; - } - if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || cPar.a_mode==5 || cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13 || cPar.a_mode==14 || cPar.a_mode==16) { - outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl; - outfile<<"## time on calculating UtX = "<<cPar.time_UtX<<" min "<<endl; - } - if ((cPar.a_mode>=1 && cPar.a_mode<=4) || (cPar.a_mode>=51 && cPar.a_mode<=54) ) { - outfile<<"## time on optimization = "<<cPar.time_opt<<" min "<<endl; - } - if (cPar.a_mode==11 || cPar.a_mode==13) { - outfile<<"## time on proposal = "<<cPar.time_Proposal<<" min "<<endl; - outfile<<"## time on mcmc = "<<cPar.time_opt<<" min "<<endl; - outfile<<"## time on Omega = "<<cPar.time_Omega<<" min "<<endl; - } - if (cPar.a_mode==41 || cPar.a_mode==42) { - outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl; - } - if (cPar.a_mode==43) { - outfile<<"## time on eigen-decomposition = "<<cPar.time_eigen<<" min "<<endl; - outfile<<"## time on predicting phenotypes = "<<cPar.time_opt<<" min "<<endl; - } - outfile<<"##"<<endl; - - outfile.close(); - outfile.clear(); - return; + cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + cLdr.CopyToParam(cPar); + + gsl_vector_free (y); + gsl_matrix_free (W); + gsl_matrix_free (G); + } + */ + + cPar.time_total = (clock() - time_begin) / (double(CLOCKS_PER_SEC) * 60.0); + + return; } +void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) { + string file_str; + file_str = cPar.path_out + "/" + cPar.file_out; + file_str += ".log.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing log file: " << file_str.c_str() << endl; + return; + } + + outfile << "##" << endl; + outfile << "## GEMMA Version = " << version << endl; + outfile << "##" << endl; + outfile << "## Command Line Input = "; + for (int i = 0; i < argc; i++) { + outfile << argv[i] << " "; + } + outfile << endl; + + outfile << "##" << endl; + time_t rawtime; + time(&rawtime); + tm *ptm = localtime(&rawtime); + + outfile << "## Date = " << asctime(ptm); + // ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl; + + outfile << "##" << endl; + outfile << "## Summary Statistics:" << endl; + if (!cPar.file_cor.empty() || !cPar.file_study.empty() || + !cPar.file_mstudy.empty()) { + outfile << "## number of total individuals in the sample = " + << cPar.ni_study << endl; + outfile << "## number of total individuals in the reference = " + << cPar.ni_ref << endl; + // outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl; + // outfile<<"## number of total SNPs in the reference panel = + // "<<cPar.ns_ref<<endl; + // outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl; + // outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl; + outfile << "## number of variance components = " << cPar.n_vc << endl; + + outfile << "## pve estimates = "; + for (size_t i = 0; i < cPar.v_pve.size(); i++) { + outfile << " " << cPar.v_pve[i]; + } + outfile << endl; + + outfile << "## se(pve) = "; + for (size_t i = 0; i < cPar.v_se_pve.size(); i++) { + outfile << " " << cPar.v_se_pve[i]; + } + outfile << endl; + + if (cPar.n_vc > 1) { + outfile << "## total pve = " << cPar.pve_total << endl; + outfile << "## se(total pve) = " << cPar.se_pve_total << endl; + } + + outfile << "## sigma2 per snp = "; + for (size_t i = 0; i < cPar.v_sigma2.size(); i++) { + outfile << " " << cPar.v_sigma2[i]; + } + outfile << endl; + + outfile << "## se(sigma2 per snp) = "; + for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) { + outfile << " " << cPar.v_se_sigma2[i]; + } + outfile << endl; + + outfile << "## enrichment = "; + for (size_t i = 0; i < cPar.v_enrich.size(); i++) { + outfile << " " << cPar.v_enrich[i]; + } + outfile << endl; + + outfile << "## se(enrichment) = "; + for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) { + outfile << " " << cPar.v_se_enrich[i]; + } + outfile << endl; + } else if (!cPar.file_beta.empty() && + (cPar.a_mode == 61 || cPar.a_mode == 62)) { + outfile << "## number of total individuals in the sample = " + << cPar.ni_study << endl; + outfile << "## number of total individuals in the reference = " + << cPar.ni_total << endl; + outfile << "## number of total SNPs in the sample = " << cPar.ns_study + << endl; + outfile << "## number of total SNPs in the reference panel = " + << cPar.ns_total << endl; + outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl; + outfile << "## number of variance components = " << cPar.n_vc << endl; + } else if (!cPar.file_beta.empty() && + (cPar.a_mode == 66 || cPar.a_mode == 67)) { + outfile << "## number of total individuals in the sample = " + << cPar.ni_total << endl; + outfile << "## number of total individuals in the reference = " + << cPar.ni_ref << endl; + outfile << "## number of total SNPs in the sample = " << cPar.ns_total + << endl; + outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl; + outfile << "## number of variance components = " << cPar.n_vc << endl; + + outfile << "## pve estimates = "; + for (size_t i = 0; i < cPar.v_pve.size(); i++) { + outfile << " " << cPar.v_pve[i]; + } + outfile << endl; + + outfile << "## se(pve) = "; + for (size_t i = 0; i < cPar.v_se_pve.size(); i++) { + outfile << " " << cPar.v_se_pve[i]; + } + outfile << endl; + + if (cPar.n_vc > 1) { + outfile << "## total pve = " << cPar.pve_total << endl; + outfile << "## se(total pve) = " << cPar.se_pve_total << endl; + } + + outfile << "## sigma2 per snp = "; + for (size_t i = 0; i < cPar.v_sigma2.size(); i++) { + outfile << " " << cPar.v_sigma2[i]; + } + outfile << endl; + + outfile << "## se(sigma2 per snp) = "; + for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) { + outfile << " " << cPar.v_se_sigma2[i]; + } + outfile << endl; + + outfile << "## enrichment = "; + for (size_t i = 0; i < cPar.v_enrich.size(); i++) { + outfile << " " << cPar.v_enrich[i]; + } + outfile << endl; + + outfile << "## se(enrichment) = "; + for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) { + outfile << " " << cPar.v_se_enrich[i]; + } + outfile << endl; + } else { + outfile << "## number of total individuals = " << cPar.ni_total << endl; + + if (cPar.a_mode == 43) { + outfile << "## number of analyzed individuals = " << cPar.ni_cvt << endl; + outfile << "## number of individuals with full phenotypes = " + << cPar.ni_test << endl; + } else if (cPar.a_mode != 27 && cPar.a_mode != 28) { + outfile << "## number of analyzed individuals = " << cPar.ni_test << endl; + } + + outfile << "## number of covariates = " << cPar.n_cvt << endl; + outfile << "## number of phenotypes = " << cPar.n_ph << endl; + if (cPar.a_mode == 43) { + outfile << "## number of observed data = " << cPar.np_obs << endl; + outfile << "## number of missing data = " << cPar.np_miss << endl; + } + if (cPar.a_mode == 25 || cPar.a_mode == 26 || cPar.a_mode == 27 || + cPar.a_mode == 28 || cPar.a_mode == 61 || cPar.a_mode == 62 || + cPar.a_mode == 63 || cPar.a_mode == 66 || cPar.a_mode == 67) { + outfile << "## number of variance components = " << cPar.n_vc << endl; + } + + if (!(cPar.file_gene).empty()) { + outfile << "## number of total genes = " << cPar.ng_total << endl; + outfile << "## number of analyzed genes = " << cPar.ng_test << endl; + } else if (cPar.file_epm.empty()) { + outfile << "## number of total SNPs = " << cPar.ns_total << endl; + outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl; + } else { + outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl; + } + + if (cPar.a_mode == 13) { + outfile << "## number of cases = " << cPar.ni_case << endl; + outfile << "## number of controls = " << cPar.ni_control << endl; + } + } + + if ((cPar.a_mode == 61 || cPar.a_mode == 62 || cPar.a_mode == 63) && + cPar.file_cor.empty() && cPar.file_study.empty() && + cPar.file_mstudy.empty()) { + // outfile<<"## REMLE log-likelihood in the null model = + //"<<cPar.logl_remle_H0<<endl; + if (cPar.n_ph == 1) { + outfile << "## pve estimates = "; + for (size_t i = 0; i < cPar.v_pve.size(); i++) { + outfile << " " << cPar.v_pve[i]; + } + outfile << endl; + + outfile << "## se(pve) = "; + for (size_t i = 0; i < cPar.v_se_pve.size(); i++) { + outfile << " " << cPar.v_se_pve[i]; + } + outfile << endl; + + if (cPar.n_vc > 1) { + outfile << "## total pve = " << cPar.pve_total << endl; + outfile << "## se(total pve) = " << cPar.se_pve_total << endl; + } + + outfile << "## sigma2 estimates = "; + for (size_t i = 0; i < cPar.v_sigma2.size(); i++) { + outfile << " " << cPar.v_sigma2[i]; + } + outfile << endl; + + outfile << "## se(sigma2) = "; + for (size_t i = 0; i < cPar.v_se_sigma2.size(); i++) { + outfile << " " << cPar.v_se_sigma2[i]; + } + outfile << endl; + + if (!cPar.file_beta.empty()) { + outfile << "## enrichment = "; + for (size_t i = 0; i < cPar.v_enrich.size(); i++) { + outfile << " " << cPar.v_enrich[i]; + } + outfile << endl; + + outfile << "## se(enrichment) = "; + for (size_t i = 0; i < cPar.v_se_enrich.size(); i++) { + outfile << " " << cPar.v_se_enrich[i]; + } + outfile << endl; + } + /* + outfile<<"## beta estimate in the null model = "; + for (size_t i=0; i<cPar.beta_remle_null.size(); i++) { + outfile<<" "<<cPar.beta_remle_null[i]; + } + outfile<<endl; + outfile<<"## se(beta) = "; + for (size_t i=0; i<cPar.se_beta_remle_null.size(); i++) { + outfile<<" "<<cPar.se_beta_remle_null[i]; + } + outfile<<endl; + */ + } + } + + if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 || + cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 || + cPar.a_mode == 12 || cPar.a_mode == 13) { + outfile << "## REMLE log-likelihood in the null model = " + << cPar.logl_remle_H0 << endl; + outfile << "## MLE log-likelihood in the null model = " << cPar.logl_mle_H0 + << endl; + if (cPar.n_ph == 1) { + // outfile<<"## lambda REMLE estimate in the null (linear mixed) model = + // "<<cPar.l_remle_null<<endl; + // outfile<<"## lambda MLE estimate in the null (linear mixed) model = + // "<<cPar.l_mle_null<<endl; + outfile << "## pve estimate in the null model = " << cPar.pve_null + << endl; + outfile << "## se(pve) in the null model = " << cPar.pve_se_null << endl; + outfile << "## vg estimate in the null model = " << cPar.vg_remle_null + << endl; + outfile << "## ve estimate in the null model = " << cPar.ve_remle_null + << endl; + outfile << "## beta estimate in the null model = "; + for (size_t i = 0; i < cPar.beta_remle_null.size(); i++) { + outfile << " " << cPar.beta_remle_null[i]; + } + outfile << endl; + outfile << "## se(beta) = "; + for (size_t i = 0; i < cPar.se_beta_remle_null.size(); i++) { + outfile << " " << cPar.se_beta_remle_null[i]; + } + outfile << endl; + + } else { + size_t c; + outfile << "## REMLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j <= i; j++) { + c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) - + min(i, j); + outfile << cPar.Vg_remle_null[c] << "\t"; + } + outfile << endl; + } + outfile << "## se(Vg): " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j <= i; j++) { + c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) - + min(i, j); + outfile << sqrt(cPar.VVg_remle_null[c]) << "\t"; + } + outfile << endl; + } + outfile << "## REMLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j <= i; j++) { + c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) - + min(i, j); + outfile << cPar.Ve_remle_null[c] << "\t"; + } + outfile << endl; + } + outfile << "## se(Ve): " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j <= i; j++) { + c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) - + min(i, j); + outfile << sqrt(cPar.VVe_remle_null[c]) << "\t"; + } + outfile << endl; + } + + outfile << "## MLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j < cPar.n_ph; j++) { + c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) - + min(i, j); + outfile << cPar.Vg_mle_null[c] << "\t"; + } + outfile << endl; + } + outfile << "## se(Vg): " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j <= i; j++) { + c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) - + min(i, j); + outfile << sqrt(cPar.VVg_mle_null[c]) << "\t"; + } + outfile << endl; + } + outfile << "## MLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j < cPar.n_ph; j++) { + c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) - + min(i, j); + outfile << cPar.Ve_mle_null[c] << "\t"; + } + outfile << endl; + } + outfile << "## se(Ve): " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j <= i; j++) { + c = (2 * cPar.n_ph - min(i, j) + 1) * min(i, j) / 2 + max(i, j) - + min(i, j); + outfile << sqrt(cPar.VVe_mle_null[c]) << "\t"; + } + outfile << endl; + } + outfile << "## estimate for B (d by c) in the null model (columns " + "correspond to the covariates provided in the file): " + << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j < cPar.n_cvt; j++) { + c = i * cPar.n_cvt + j; + outfile << cPar.beta_remle_null[c] << "\t"; + } + outfile << endl; + } + outfile << "## se(B): " << endl; + for (size_t i = 0; i < cPar.n_ph; i++) { + for (size_t j = 0; j < cPar.n_cvt; j++) { + c = i * cPar.n_cvt + j; + outfile << cPar.se_beta_remle_null[c] << "\t"; + } + outfile << endl; + } + } + } + + /* + if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4 || + cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { + if (cPar.n_ph==1) { + outfile<<"## REMLE vg estimate in the null model = + "<<cPar.vg_remle_null<<endl; + outfile<<"## REMLE ve estimate in the null model = + "<<cPar.ve_remle_null<<endl; + } else { + size_t c; + outfile<<"## REMLE estimate for Vg in the null model: "<<endl; + for (size_t i=0; i<cPar.n_ph; i++) { + for (size_t j=0; j<=i; j++) { + c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); + outfile<<cPar.Vg_remle_null[c]<<"\t"; + } + outfile<<endl; + } + outfile<<"## REMLE estimate for Ve in the null model: "<<endl; + for (size_t i=0; i<cPar.n_ph; i++) { + for (size_t j=0; j<=i; j++) { + c=(2*cPar.n_ph-min(i,j)+1)*min(i,j)/2+max(i,j)-min(i,j); + outfile<<cPar.Ve_remle_null[c]<<"\t"; + } + outfile<<endl; + } + } + } + */ + + if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13 || + cPar.a_mode == 14 || cPar.a_mode == 16) { + outfile << "## estimated mean = " << cPar.pheno_mean << endl; + } + + if (cPar.a_mode == 11 || cPar.a_mode == 13) { + outfile << "##" << endl; + outfile << "## MCMC related:" << endl; + outfile << "## initial value of h = " << cPar.cHyp_initial.h << endl; + outfile << "## initial value of rho = " << cPar.cHyp_initial.rho << endl; + outfile << "## initial value of pi = " << exp(cPar.cHyp_initial.logp) + << endl; + outfile << "## initial value of |gamma| = " << cPar.cHyp_initial.n_gamma + << endl; + outfile << "## random seed = " << cPar.randseed << endl; + outfile << "## acceptance ratio = " + << (double)cPar.n_accept / + (double)((cPar.w_step + cPar.s_step) * cPar.n_mh) + << endl; + } + + outfile << "##" << endl; + outfile << "## Computation Time:" << endl; + outfile << "## total computation time = " << cPar.time_total << " min " + << endl; + outfile << "## computation time break down: " << endl; + if (cPar.a_mode == 21 || cPar.a_mode == 22 || cPar.a_mode == 11 || + cPar.a_mode == 13 || cPar.a_mode == 14 || cPar.a_mode == 16) { + outfile << "## time on calculating relatedness matrix = " + << cPar.time_G << " min " << endl; + } + if (cPar.a_mode == 31) { + outfile << "## time on eigen-decomposition = " << cPar.time_eigen + << " min " << endl; + } + if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 || + cPar.a_mode == 4 || cPar.a_mode == 5 || cPar.a_mode == 11 || + cPar.a_mode == 12 || cPar.a_mode == 13 || cPar.a_mode == 14 || + cPar.a_mode == 16) { + outfile << "## time on eigen-decomposition = " << cPar.time_eigen + << " min " << endl; + outfile << "## time on calculating UtX = " << cPar.time_UtX << " min " + << endl; + } + if ((cPar.a_mode >= 1 && cPar.a_mode <= 4) || + (cPar.a_mode >= 51 && cPar.a_mode <= 54)) { + outfile << "## time on optimization = " << cPar.time_opt << " min " + << endl; + } + if (cPar.a_mode == 11 || cPar.a_mode == 13) { + outfile << "## time on proposal = " << cPar.time_Proposal << " min " + << endl; + outfile << "## time on mcmc = " << cPar.time_opt << " min " << endl; + outfile << "## time on Omega = " << cPar.time_Omega << " min " << endl; + } + if (cPar.a_mode == 41 || cPar.a_mode == 42) { + outfile << "## time on eigen-decomposition = " << cPar.time_eigen + << " min " << endl; + } + if (cPar.a_mode == 43) { + outfile << "## time on eigen-decomposition = " << cPar.time_eigen + << " min " << endl; + outfile << "## time on predicting phenotypes = " << cPar.time_opt + << " min " << endl; + } + outfile << "##" << endl; + + outfile.close(); + outfile.clear(); + return; +} diff --git a/src/gemma.h b/src/gemma.h index 78828ef..cd1683a 100644 --- a/src/gemma.h +++ b/src/gemma.h @@ -26,22 +26,21 @@ using namespace std; class GEMMA { public: - // Parameters. - string version; - string date; - string year; - - // Constructor. - GEMMA(void); - - // Functions. - void PrintHeader (void); - void PrintHelp (size_t option); - void PrintLicense (void); - void Assign (int argc, char **argv, PARAM &cPar); - void BatchRun (PARAM &cPar); - void WriteLog (int argc, char **argv, PARAM &cPar); + // Parameters. + string version; + string date; + string year; + + // Constructor. + GEMMA(void); + + // Functions. + void PrintHeader(void); + void PrintHelp(size_t option); + void PrintLicense(void); + void Assign(int argc, char **argv, PARAM &cPar); + void BatchRun(PARAM &cPar); + void WriteLog(int argc, char **argv, PARAM &cPar); }; #endif - diff --git a/src/gzstream.cpp b/src/gzstream.cpp index 688b625..a7014d6 100644 --- a/src/gzstream.cpp +++ b/src/gzstream.cpp @@ -28,7 +28,7 @@ #include "gzstream.h" #include <iostream> -#include <string.h> // for memcpy +#include <string.h> // for memcpy #ifdef GZSTREAM_NAMESPACE namespace GZSTREAM_NAMESPACE { @@ -42,119 +42,117 @@ namespace GZSTREAM_NAMESPACE { // class gzstreambuf: // -------------------------------------- -gzstreambuf* gzstreambuf::open( const char* name, int open_mode) { - if ( is_open()) - return (gzstreambuf*)0; - mode = open_mode; - // no append nor read/write mode - if ((mode & std::ios::ate) || (mode & std::ios::app) - || ((mode & std::ios::in) && (mode & std::ios::out))) - return (gzstreambuf*)0; - char fmode[10]; - char* fmodeptr = fmode; - if ( mode & std::ios::in) - *fmodeptr++ = 'r'; - else if ( mode & std::ios::out) - *fmodeptr++ = 'w'; - *fmodeptr++ = 'b'; - *fmodeptr = '\0'; - file = gzopen( name, fmode); - if (file == 0) - return (gzstreambuf*)0; - opened = 1; - return this; +gzstreambuf *gzstreambuf::open(const char *name, int open_mode) { + if (is_open()) + return (gzstreambuf *)0; + mode = open_mode; + // no append nor read/write mode + if ((mode & std::ios::ate) || (mode & std::ios::app) || + ((mode & std::ios::in) && (mode & std::ios::out))) + return (gzstreambuf *)0; + char fmode[10]; + char *fmodeptr = fmode; + if (mode & std::ios::in) + *fmodeptr++ = 'r'; + else if (mode & std::ios::out) + *fmodeptr++ = 'w'; + *fmodeptr++ = 'b'; + *fmodeptr = '\0'; + file = gzopen(name, fmode); + if (file == 0) + return (gzstreambuf *)0; + opened = 1; + return this; } -gzstreambuf * gzstreambuf::close() { - if ( is_open()) { - sync(); - opened = 0; - if ( gzclose( file) == Z_OK) - return this; - } - return (gzstreambuf*)0; +gzstreambuf *gzstreambuf::close() { + if (is_open()) { + sync(); + opened = 0; + if (gzclose(file) == Z_OK) + return this; + } + return (gzstreambuf *)0; } int gzstreambuf::underflow() { // used for input buffer only - if ( gptr() && ( gptr() < egptr())) - return * reinterpret_cast<unsigned char *>( gptr()); - - if ( ! (mode & std::ios::in) || ! opened) - return EOF; - // Josuttis' implementation of inbuf - int n_putback = gptr() - eback(); - if ( n_putback > 4) - n_putback = 4; - memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback); - - int num = gzread( file, buffer+4, bufferSize-4); - if (num <= 0) // ERROR or EOF - return EOF; - - // reset buffer pointers - setg( buffer + (4 - n_putback), // beginning of putback area - buffer + 4, // read position - buffer + 4 + num); // end of buffer - - // return next character - return * reinterpret_cast<unsigned char *>( gptr()); + if (gptr() && (gptr() < egptr())) + return *reinterpret_cast<unsigned char *>(gptr()); + + if (!(mode & std::ios::in) || !opened) + return EOF; + // Josuttis' implementation of inbuf + int n_putback = gptr() - eback(); + if (n_putback > 4) + n_putback = 4; + memcpy(buffer + (4 - n_putback), gptr() - n_putback, n_putback); + + int num = gzread(file, buffer + 4, bufferSize - 4); + if (num <= 0) // ERROR or EOF + return EOF; + + // reset buffer pointers + setg(buffer + (4 - n_putback), // beginning of putback area + buffer + 4, // read position + buffer + 4 + num); // end of buffer + + // return next character + return *reinterpret_cast<unsigned char *>(gptr()); } int gzstreambuf::flush_buffer() { - // Separate the writing of the buffer from overflow() and - // sync() operation. - int w = pptr() - pbase(); - if ( gzwrite( file, pbase(), w) != w) - return EOF; - pbump( -w); - return w; + // Separate the writing of the buffer from overflow() and + // sync() operation. + int w = pptr() - pbase(); + if (gzwrite(file, pbase(), w) != w) + return EOF; + pbump(-w); + return w; } -int gzstreambuf::overflow( int c) { // used for output buffer only - if ( ! ( mode & std::ios::out) || ! opened) - return EOF; - if (c != EOF) { - *pptr() = c; - pbump(1); - } - if ( flush_buffer() == EOF) - return EOF; - return c; +int gzstreambuf::overflow(int c) { // used for output buffer only + if (!(mode & std::ios::out) || !opened) + return EOF; + if (c != EOF) { + *pptr() = c; + pbump(1); + } + if (flush_buffer() == EOF) + return EOF; + return c; } int gzstreambuf::sync() { - // Changed to use flush_buffer() instead of overflow( EOF) - // which caused improper behavior with std::endl and flush(), - // bug reported by Vincent Ricard. - if ( pptr() && pptr() > pbase()) { - if ( flush_buffer() == EOF) - return -1; - } - return 0; + // Changed to use flush_buffer() instead of overflow( EOF) + // which caused improper behavior with std::endl and flush(), + // bug reported by Vincent Ricard. + if (pptr() && pptr() > pbase()) { + if (flush_buffer() == EOF) + return -1; + } + return 0; } // -------------------------------------- // class gzstreambase: // -------------------------------------- -gzstreambase::gzstreambase( const char* name, int mode) { - init( &buf); - open( name, mode); +gzstreambase::gzstreambase(const char *name, int mode) { + init(&buf); + open(name, mode); } -gzstreambase::~gzstreambase() { - buf.close(); -} +gzstreambase::~gzstreambase() { buf.close(); } -void gzstreambase::open( const char* name, int open_mode) { - if ( ! buf.open( name, open_mode)) - clear( rdstate() | std::ios::badbit); +void gzstreambase::open(const char *name, int open_mode) { + if (!buf.open(name, open_mode)) + clear(rdstate() | std::ios::badbit); } void gzstreambase::close() { - if ( buf.is_open()) - if ( ! buf.close()) - clear( rdstate() | std::ios::badbit); + if (buf.is_open()) + if (!buf.close()) + clear(rdstate() | std::ios::badbit); } #ifdef GZSTREAM_NAMESPACE diff --git a/src/gzstream.h b/src/gzstream.h index 241ff76..f760138 100644 --- a/src/gzstream.h +++ b/src/gzstream.h @@ -30,8 +30,8 @@ #define GZSTREAM_H 1 // Standard C++ with new header file names and std::namespace. -#include <iostream> #include <fstream> +#include <iostream> #include <zlib.h> #ifdef GZSTREAM_NAMESPACE @@ -44,43 +44,45 @@ namespace GZSTREAM_NAMESPACE { class gzstreambuf : public std::streambuf { private: - static const int bufferSize = 47+256; // size of data buff - // totals 512 bytes under g++ for igzstream at the end. + static const int bufferSize = 47 + 256; // size of data buff + // totals 512 bytes under g++ for igzstream at the end. + + gzFile file; // file handle for compressed file + char buffer[bufferSize]; // data buffer + char opened; // open/close state of stream + int mode; // I/O mode - gzFile file; // file handle for compressed file - char buffer[bufferSize]; // data buffer - char opened; // open/close state of stream - int mode; // I/O mode + int flush_buffer(); - int flush_buffer(); public: - gzstreambuf() : opened(0) { - setp( buffer, buffer + (bufferSize-1)); - setg( buffer + 4, // beginning of putback area - buffer + 4, // read position - buffer + 4); // end position - // ASSERT: both input & output capabilities will not be used together - } - int is_open() { return opened; } - gzstreambuf* open( const char* name, int open_mode); - gzstreambuf* close(); - ~gzstreambuf() { close(); } - - virtual int overflow( int c = EOF); - virtual int underflow(); - virtual int sync(); + gzstreambuf() : opened(0) { + setp(buffer, buffer + (bufferSize - 1)); + setg(buffer + 4, // beginning of putback area + buffer + 4, // read position + buffer + 4); // end position + // ASSERT: both input & output capabilities will not be used together + } + int is_open() { return opened; } + gzstreambuf *open(const char *name, int open_mode); + gzstreambuf *close(); + ~gzstreambuf() { close(); } + + virtual int overflow(int c = EOF); + virtual int underflow(); + virtual int sync(); }; class gzstreambase : virtual public std::ios { protected: - gzstreambuf buf; + gzstreambuf buf; + public: - gzstreambase() { init(&buf); } - gzstreambase( const char* name, int open_mode); - ~gzstreambase(); - void open( const char* name, int open_mode); - void close(); - gzstreambuf* rdbuf() { return &buf; } + gzstreambase() { init(&buf); } + gzstreambase(const char *name, int open_mode); + ~gzstreambase(); + void open(const char *name, int open_mode); + void close(); + gzstreambuf *rdbuf() { return &buf; } }; // ---------------------------------------------------------------------------- @@ -91,24 +93,24 @@ public: class igzstream : public gzstreambase, public std::istream { public: - igzstream() : std::istream( &buf) {} - igzstream( const char* name, int open_mode = std::ios::in) - : gzstreambase( name, open_mode), std::istream( &buf) {} - gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } - void open( const char* name, int open_mode = std::ios::in) { - gzstreambase::open( name, open_mode); - } + igzstream() : std::istream(&buf) {} + igzstream(const char *name, int open_mode = std::ios::in) + : gzstreambase(name, open_mode), std::istream(&buf) {} + gzstreambuf *rdbuf() { return gzstreambase::rdbuf(); } + void open(const char *name, int open_mode = std::ios::in) { + gzstreambase::open(name, open_mode); + } }; class ogzstream : public gzstreambase, public std::ostream { public: - ogzstream() : std::ostream( &buf) {} - ogzstream( const char* name, int mode = std::ios::out) - : gzstreambase( name, mode), std::ostream( &buf) {} - gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } - void open( const char* name, int open_mode = std::ios::out) { - gzstreambase::open( name, open_mode); - } + ogzstream() : std::ostream(&buf) {} + ogzstream(const char *name, int mode = std::ios::out) + : gzstreambase(name, mode), std::ostream(&buf) {} + gzstreambuf *rdbuf() { return gzstreambase::rdbuf(); } + void open(const char *name, int open_mode = std::ios::out) { + gzstreambase::open(name, open_mode); + } }; #ifdef GZSTREAM_NAMESPACE @@ -16,3007 +16,3191 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include <assert.h> +#include <bitset> +#include <cmath> +#include <cstdint> +#include <cstring> #include <fstream> -#include <sstream> -#include <string> #include <iomanip> -#include <bitset> -#include <vector> +#include <iostream> #include <map> #include <set> -#include <cstring> -#include <cmath> -#include <cstdint> +#include <sstream> #include <stdio.h> #include <stdlib.h> -#include <assert.h> +#include <string> +#include <vector> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" -#include "lapack.h" -#include "gzstream.h" -#include "mathfunc.h" #include "eigenlib.h" +#include "gzstream.h" #include "io.h" +#include "lapack.h" +#include "mathfunc.h" using namespace std; // Print progress bar. -void ProgressBar (string str, double p, double total) { - double progress = (100.0 * p / total); - int barsize = (int) (progress / 2.0); - char bar[51]; - - cout<<str; - for (int i = 0; i <50; i++) { - if (i<barsize) {bar[i] = '=';} - else {bar[i]=' ';} - cout<<bar[i]; - } - cout<<setprecision(2)<<fixed<<progress<<"%\r"<<flush; - - return; -} +void ProgressBar(string str, double p, double total) { + double progress = (100.0 * p / total); + int barsize = (int)(progress / 2.0); + char bar[51]; + + cout << str; + for (int i = 0; i < 50; i++) { + if (i < barsize) { + bar[i] = '='; + } else { + bar[i] = ' '; + } + cout << bar[i]; + } + cout << setprecision(2) << fixed << progress << "%\r" << flush; -// Print progress bar with acceptance ratio. -void ProgressBar (string str, double p, double total, double ratio) { - double progress = (100.0 * p / total); - int barsize = (int) (progress / 2.0); - char bar[51]; - - cout<<str; - for (int i = 0; i <50; i++) { - if (i<barsize) {bar[i] = '=';} - else {bar[i]=' ';} - cout<<bar[i]; - } - cout<<setprecision(2)<<fixed<<progress<<"% "<<ratio<<"\r"<<flush; - return; + return; } -bool isBlankLine(char const* line) { - for ( char const* cp = line; *cp; ++cp ) { - if ( !isspace(*cp) ) - return false; +// Print progress bar with acceptance ratio. +void ProgressBar(string str, double p, double total, double ratio) { + double progress = (100.0 * p / total); + int barsize = (int)(progress / 2.0); + char bar[51]; + + cout << str; + for (int i = 0; i < 50; i++) { + if (i < barsize) { + bar[i] = '='; + } else { + bar[i] = ' '; } - return true; + cout << bar[i]; + } + cout << setprecision(2) << fixed << progress << "% " << ratio << "\r" + << flush; + return; } -bool isBlankLine(std::string const& line) { - return isBlankLine(line.c_str()); +bool isBlankLine(char const *line) { + for (char const *cp = line; *cp; ++cp) { + if (!isspace(*cp)) + return false; + } + return true; } +bool isBlankLine(std::string const &line) { return isBlankLine(line.c_str()); } + // In case files are ended with "\r" or "\r\n". -std::istream& safeGetline(std::istream& is, std::string& t) { - t.clear(); - - // The characters in the stream are read one-by-one using a - // std::streambuf. That is faster than reading them one-by-one - // using the std::istream. Code that uses streambuf this way must - // be guarded by a sentry object. The sentry object performs - // various tasks, such as thread synchronization and updating the - // stream state. - std::istream::sentry se(is, true); - std::streambuf* sb = is.rdbuf(); - - for(;;) { - int c = sb->sbumpc(); - switch (c) { - case '\n': - return is; - case '\r': - if(sb->sgetc() == '\n') - sb->sbumpc(); - return is; - case EOF: - - // Also handle the case when the last line has no line - // ending. - if(t.empty()) - is.setstate(std::ios::eofbit); - return is; - default: - t += (char)c; - } +std::istream &safeGetline(std::istream &is, std::string &t) { + t.clear(); + + // The characters in the stream are read one-by-one using a + // std::streambuf. That is faster than reading them one-by-one + // using the std::istream. Code that uses streambuf this way must + // be guarded by a sentry object. The sentry object performs + // various tasks, such as thread synchronization and updating the + // stream state. + std::istream::sentry se(is, true); + std::streambuf *sb = is.rdbuf(); + + for (;;) { + int c = sb->sbumpc(); + switch (c) { + case '\n': + return is; + case '\r': + if (sb->sgetc() == '\n') + sb->sbumpc(); + return is; + case EOF: + + // Also handle the case when the last line has no line + // ending. + if (t.empty()) + is.setstate(std::ios::eofbit); + return is; + default: + t += (char)c; } + } } // Read SNP file. -bool ReadFile_snps (const string &file_snps, set<string> &setSnps) { - setSnps.clear(); +bool ReadFile_snps(const string &file_snps, set<string> &setSnps) { + setSnps.clear(); - igzstream infile (file_snps.c_str(), igzstream::in); - if (!infile) { - cout << "error! fail to open snps file: " << file_snps << endl; - return false; - } + igzstream infile(file_snps.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open snps file: " << file_snps << endl; + return false; + } - string line; - char *ch_ptr; + string line; + char *ch_ptr; - while (getline(infile, line)) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - setSnps.insert(ch_ptr); - } + while (getline(infile, line)) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + setSnps.insert(ch_ptr); + } - infile.close(); - infile.clear(); + infile.close(); + infile.clear(); - return true; + return true; } -bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps) { - setSnps.clear(); +bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) { + setSnps.clear(); - igzstream infile (file_snps.c_str(), igzstream::in); - if (!infile) { - cout << "error! fail to open snps file: " << file_snps << endl; - return false; - } + igzstream infile(file_snps.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open snps file: " << file_snps << endl; + return false; + } - string line, rs, chr, pos; - char *ch_ptr; + string line, rs, chr, pos; + char *ch_ptr; - // Read header. - HEADER header; - !safeGetline(infile, line).eof(); - ReadHeader_io (line, header); + // Read header. + HEADER header; + !safeGetline(infile, line).eof(); + ReadHeader_io(line, header); - if (header.rs_col==0 && (header.chr_col==0 || header.pos_col==0) ) { - cout<<"missing rs id in the hearder"<<endl; - } + if (header.rs_col == 0 && (header.chr_col == 0 || header.pos_col == 0)) { + cout << "missing rs id in the hearder" << endl; + } - while (!safeGetline(infile, line).eof()) { - if (isBlankLine(line)) {continue;} - ch_ptr=strtok ((char *)line.c_str(), " , \t"); + while (!safeGetline(infile, line).eof()) { + if (isBlankLine(line)) { + continue; + } + ch_ptr = strtok((char *)line.c_str(), " , \t"); - for (size_t i=0; i<header.coln; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;} - if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;} - if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;} + for (size_t i = 0; i < header.coln; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } + if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } + if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + } - ch_ptr=strtok (NULL, " , \t"); - } + ch_ptr = strtok(NULL, " , \t"); + } - if (header.rs_col==0) { - rs=chr+":"+pos; - } + if (header.rs_col == 0) { + rs = chr + ":" + pos; + } - setSnps.insert(rs); - } + setSnps.insert(rs); + } - infile.close(); - infile.clear(); + infile.close(); + infile.clear(); - return true; + return true; } // Read log file. -bool ReadFile_log (const string &file_log, double &pheno_mean) { - ifstream infile (file_log.c_str(), ifstream::in); - if (!infile) { - cout << "error! fail to open log file: " << file_log << endl; - return false; - } - - string line; - char *ch_ptr; - size_t flag=0; - - while (getline(infile, line)) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - if (ch_ptr!=NULL && strcmp(ch_ptr, "estimated")==0) { - ch_ptr=strtok (NULL, " , \t"); - if (ch_ptr!=NULL && strcmp(ch_ptr, "mean")==0) { - ch_ptr=strtok (NULL, " , \t"); - if (ch_ptr!=NULL && strcmp(ch_ptr, "=")==0) { - ch_ptr=strtok (NULL, " , \t"); - pheno_mean=atof(ch_ptr); - flag=1; - } - } - } - - if (flag==1) {break;} - } - - infile.close(); - infile.clear(); - - return true; +bool ReadFile_log(const string &file_log, double &pheno_mean) { + ifstream infile(file_log.c_str(), ifstream::in); + if (!infile) { + cout << "error! fail to open log file: " << file_log << endl; + return false; + } + + string line; + char *ch_ptr; + size_t flag = 0; + + while (getline(infile, line)) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + if (ch_ptr != NULL && strcmp(ch_ptr, "estimated") == 0) { + ch_ptr = strtok(NULL, " , \t"); + if (ch_ptr != NULL && strcmp(ch_ptr, "mean") == 0) { + ch_ptr = strtok(NULL, " , \t"); + if (ch_ptr != NULL && strcmp(ch_ptr, "=") == 0) { + ch_ptr = strtok(NULL, " , \t"); + pheno_mean = atof(ch_ptr); + flag = 1; + } + } + } + + if (flag == 1) { + break; + } + } + + infile.close(); + infile.clear(); + + return true; } // Read bimbam annotation file. -bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, - map<string, long int> &mapRS2bp, - map<string, double> &mapRS2cM) { - mapRS2chr.clear(); - mapRS2bp.clear(); - - ifstream infile (file_anno.c_str(), ifstream::in); - if (!infile) { - cout << "error opening annotation file: " << file_anno << endl; - return false; - } - - string line; - char *ch_ptr; - - string rs; - long int b_pos; - string chr; - double cM; - - while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; - ch_ptr=strtok (NULL, " , \t"); - if (strcmp(ch_ptr, "NA")==0) { - b_pos=-9; - } else { - b_pos=atol(ch_ptr); - } - ch_ptr=strtok (NULL, " , \t"); - if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) { - chr="-9"; - } else { - chr=ch_ptr; - } - ch_ptr=strtok (NULL, " , \t"); - if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) { - cM=-9; - } else { - cM=atof(ch_ptr); - } - - mapRS2chr[rs]=chr; - mapRS2bp[rs]=b_pos; - mapRS2cM[rs]=cM; - } - - infile.close(); - infile.clear(); - - return true; +bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr, + map<string, long int> &mapRS2bp, + map<string, double> &mapRS2cM) { + mapRS2chr.clear(); + mapRS2bp.clear(); + + ifstream infile(file_anno.c_str(), ifstream::in); + if (!infile) { + cout << "error opening annotation file: " << file_anno << endl; + return false; + } + + string line; + char *ch_ptr; + + string rs; + long int b_pos; + string chr; + double cM; + + while (!safeGetline(infile, line).eof()) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + if (strcmp(ch_ptr, "NA") == 0) { + b_pos = -9; + } else { + b_pos = atol(ch_ptr); + } + ch_ptr = strtok(NULL, " , \t"); + if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) { + chr = "-9"; + } else { + chr = ch_ptr; + } + ch_ptr = strtok(NULL, " , \t"); + if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) { + cM = -9; + } else { + cM = atof(ch_ptr); + } + + mapRS2chr[rs] = chr; + mapRS2bp[rs] = b_pos; + mapRS2cM[rs] = cM; + } + + infile.close(); + infile.clear(); + + return true; } // Read 1 column of phenotype. -bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, - vector<double> &pheno, const int &p_column) { - indicator_idv.clear(); - pheno.clear(); - - igzstream infile (file_pheno.c_str(), igzstream::in); - if (!infile) { - cout << "error! fail to open phenotype file: " << file_pheno << endl; - return false; - } - - string line; - char *ch_ptr; - - string id; - double p; - while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - for (int i=0; i<(p_column-1); ++i) { - ch_ptr=strtok (NULL, " , \t"); - } - if (strcmp(ch_ptr, "NA")==0) { - indicator_idv.push_back(0); - pheno.push_back(-9); - } - else { - - // Pheno is different from pimass2. - p=atof(ch_ptr); - indicator_idv.push_back(1); - pheno.push_back(p); - } - } - - infile.close(); - infile.clear(); - - return true; +bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv, + vector<double> &pheno, const int &p_column) { + indicator_idv.clear(); + pheno.clear(); + + igzstream infile(file_pheno.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open phenotype file: " << file_pheno << endl; + return false; + } + + string line; + char *ch_ptr; + + string id; + double p; + while (!safeGetline(infile, line).eof()) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + for (int i = 0; i < (p_column - 1); ++i) { + ch_ptr = strtok(NULL, " , \t"); + } + if (strcmp(ch_ptr, "NA") == 0) { + indicator_idv.push_back(0); + pheno.push_back(-9); + } else { + + // Pheno is different from pimass2. + p = atof(ch_ptr); + indicator_idv.push_back(1); + pheno.push_back(p); + } + } + + infile.close(); + infile.clear(); + + return true; } // Read bimbam phenotype file, p_column=1, 2,... -bool ReadFile_pheno (const string &file_pheno, - vector<vector<int> > &indicator_pheno, - vector<vector<double> > &pheno, - const vector<size_t> &p_column) { - indicator_pheno.clear(); - pheno.clear(); - - igzstream infile (file_pheno.c_str(), igzstream::in); - if (!infile) { - cout << "error! fail to open phenotype file: " << file_pheno << endl; - return false; - } - - string line; - char *ch_ptr; - - string id; - double p; - - vector<double> pheno_row; - vector<int> ind_pheno_row; - - size_t p_max=*max_element(p_column.begin(), p_column.end() ); - map<size_t, size_t> mapP2c; - for (size_t i=0; i<p_column.size(); i++) { - mapP2c[p_column[i]]=i; - pheno_row.push_back(-9); - ind_pheno_row.push_back(0); - } - - while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - - size_t i=0; - while (i<p_max ) { - if (mapP2c.count(i+1)!=0) { - if (strcmp(ch_ptr, "NA")==0) { - ind_pheno_row[mapP2c[i+1]]=0; - pheno_row[mapP2c[i+1]]=-9; - } - else { - p=atof(ch_ptr); - ind_pheno_row[mapP2c[i+1]]=1; - pheno_row[mapP2c[i+1]]=p; - } - } - i++; - ch_ptr=strtok (NULL, " , \t"); - } - - indicator_pheno.push_back(ind_pheno_row); - pheno.push_back(pheno_row); - } - - infile.close(); - infile.clear(); - - return true; +bool ReadFile_pheno(const string &file_pheno, + vector<vector<int>> &indicator_pheno, + vector<vector<double>> &pheno, + const vector<size_t> &p_column) { + indicator_pheno.clear(); + pheno.clear(); + + igzstream infile(file_pheno.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open phenotype file: " << file_pheno << endl; + return false; + } + + string line; + char *ch_ptr; + + string id; + double p; + + vector<double> pheno_row; + vector<int> ind_pheno_row; + + size_t p_max = *max_element(p_column.begin(), p_column.end()); + map<size_t, size_t> mapP2c; + for (size_t i = 0; i < p_column.size(); i++) { + mapP2c[p_column[i]] = i; + pheno_row.push_back(-9); + ind_pheno_row.push_back(0); + } + + while (!safeGetline(infile, line).eof()) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + + size_t i = 0; + while (i < p_max) { + if (mapP2c.count(i + 1) != 0) { + if (strcmp(ch_ptr, "NA") == 0) { + ind_pheno_row[mapP2c[i + 1]] = 0; + pheno_row[mapP2c[i + 1]] = -9; + } else { + p = atof(ch_ptr); + ind_pheno_row[mapP2c[i + 1]] = 1; + pheno_row[mapP2c[i + 1]] = p; + } + } + i++; + ch_ptr = strtok(NULL, " , \t"); + } + + indicator_pheno.push_back(ind_pheno_row); + pheno.push_back(pheno_row); + } + + infile.close(); + infile.clear(); + + return true; } -bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, - vector<vector<double> > &cvt, size_t &n_cvt) { - indicator_cvt.clear(); - - ifstream infile (file_cvt.c_str(), ifstream::in); - if (!infile) { - cout << "error! fail to open covariates file: " << file_cvt << endl; - return false; - } - - string line; - char *ch_ptr; - double d; - - int flag_na=0; - - while (!safeGetline(infile, line).eof()) { - vector<double> v_d; flag_na=0; - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - while (ch_ptr!=NULL) { - if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;} - else {d=atof(ch_ptr);} - - v_d.push_back(d); - ch_ptr=strtok (NULL, " , \t"); - } - if (flag_na==0) { - indicator_cvt.push_back(1); - } else { - indicator_cvt.push_back(0); - } - cvt.push_back(v_d); - } - - if (indicator_cvt.empty()) {n_cvt=0;} - else { - flag_na=0; - for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) { - if (indicator_cvt[i]==0) { - continue; - } - - if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();} - if (flag_na!=0 && n_cvt!=cvt[i].size()) { - cout << "error! number of covariates in row " << - i << " do not match other rows." << endl; - return false; - } - } - } - - infile.close(); - infile.clear(); - - return true; +bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt, + vector<vector<double>> &cvt, size_t &n_cvt) { + indicator_cvt.clear(); + + ifstream infile(file_cvt.c_str(), ifstream::in); + if (!infile) { + cout << "error! fail to open covariates file: " << file_cvt << endl; + return false; + } + + string line; + char *ch_ptr; + double d; + + int flag_na = 0; + + while (!safeGetline(infile, line).eof()) { + vector<double> v_d; + flag_na = 0; + ch_ptr = strtok((char *)line.c_str(), " , \t"); + while (ch_ptr != NULL) { + if (strcmp(ch_ptr, "NA") == 0) { + flag_na = 1; + d = -9; + } else { + d = atof(ch_ptr); + } + + v_d.push_back(d); + ch_ptr = strtok(NULL, " , \t"); + } + if (flag_na == 0) { + indicator_cvt.push_back(1); + } else { + indicator_cvt.push_back(0); + } + cvt.push_back(v_d); + } + + if (indicator_cvt.empty()) { + n_cvt = 0; + } else { + flag_na = 0; + for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) { + if (indicator_cvt[i] == 0) { + continue; + } + + if (flag_na == 0) { + flag_na = 1; + n_cvt = cvt[i].size(); + } + if (flag_na != 0 && n_cvt != cvt[i].size()) { + cout << "error! number of covariates in row " << i + << " do not match other rows." << endl; + return false; + } + } + } + + infile.close(); + infile.clear(); + + return true; } // Read .bim file. -bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo) { - snpInfo.clear(); - - ifstream infile (file_bim.c_str(), ifstream::in); - if (!infile) { - cout << "error opening .bim file: " << file_bim << endl; - return false; - } - - string line; - char *ch_ptr; - - string rs; - long int b_pos; - string chr; - double cM; - string major; - string minor; - - while (getline(infile, line)) { - ch_ptr=strtok ((char *)line.c_str(), " \t"); - chr=ch_ptr; - ch_ptr=strtok (NULL, " \t"); - rs=ch_ptr; - ch_ptr=strtok (NULL, " \t"); - cM=atof(ch_ptr); - ch_ptr=strtok (NULL, " \t"); - b_pos=atol(ch_ptr); - ch_ptr=strtok (NULL, " \t"); - minor=ch_ptr; - ch_ptr=strtok (NULL, " \t"); - major=ch_ptr; - - SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, - 0, -9, -9, 0, 0, 0}; - snpInfo.push_back(sInfo); - } - - infile.close(); - infile.clear(); - return true; +bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) { + snpInfo.clear(); + + ifstream infile(file_bim.c_str(), ifstream::in); + if (!infile) { + cout << "error opening .bim file: " << file_bim << endl; + return false; + } + + string line; + char *ch_ptr; + + string rs; + long int b_pos; + string chr; + double cM; + string major; + string minor; + + while (getline(infile, line)) { + ch_ptr = strtok((char *)line.c_str(), " \t"); + chr = ch_ptr; + ch_ptr = strtok(NULL, " \t"); + rs = ch_ptr; + ch_ptr = strtok(NULL, " \t"); + cM = atof(ch_ptr); + ch_ptr = strtok(NULL, " \t"); + b_pos = atol(ch_ptr); + ch_ptr = strtok(NULL, " \t"); + minor = ch_ptr; + ch_ptr = strtok(NULL, " \t"); + major = ch_ptr; + + SNPINFO sInfo = {chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0}; + snpInfo.push_back(sInfo); + } + + infile.close(); + infile.clear(); + return true; } // Read .fam file. -bool ReadFile_fam (const string &file_fam, - vector<vector<int> > &indicator_pheno, - vector<vector<double> > &pheno, - map<string, int> &mapID2num, - const vector<size_t> &p_column) { - indicator_pheno.clear(); - pheno.clear(); - mapID2num.clear(); - - igzstream infile (file_fam.c_str(), igzstream::in); - if (!infile) { - cout<<"error opening .fam file: "<<file_fam<<endl; return false;} - - string line; - char *ch_ptr; - - string id; - int c=0; - double p; - - vector<double> pheno_row; - vector<int> ind_pheno_row; - - size_t p_max=*max_element(p_column.begin(), p_column.end() ); - map<size_t, size_t> mapP2c; - for (size_t i=0; i<p_column.size(); i++) { - mapP2c[p_column[i]]=i; - pheno_row.push_back(-9); - ind_pheno_row.push_back(0); - } - - while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " \t"); - ch_ptr=strtok (NULL, " \t"); - id=ch_ptr; - ch_ptr=strtok (NULL, " \t"); - ch_ptr=strtok (NULL, " \t"); - ch_ptr=strtok (NULL, " \t"); - ch_ptr=strtok (NULL, " \t"); - - size_t i=0; - while (i<p_max ) { - if (mapP2c.count(i+1)!=0 ) { - if (strcmp(ch_ptr, "NA")==0) { - ind_pheno_row[mapP2c[i+1]]=0; - pheno_row[mapP2c[i+1]]=-9; - } else { - p=atof(ch_ptr); - - if (p==-9) { - ind_pheno_row[mapP2c[i+1]]=0; - pheno_row[mapP2c[i+1]]=-9; - } - else { - ind_pheno_row[mapP2c[i+1]]=1; - pheno_row[mapP2c[i+1]]=p; - } - } - } - i++; - ch_ptr=strtok (NULL, " , \t"); - } - - indicator_pheno.push_back(ind_pheno_row); - pheno.push_back(pheno_row); - - mapID2num[id]=c; c++; - } - - infile.close(); - infile.clear(); - return true; +bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno, + vector<vector<double>> &pheno, map<string, int> &mapID2num, + const vector<size_t> &p_column) { + indicator_pheno.clear(); + pheno.clear(); + mapID2num.clear(); + + igzstream infile(file_fam.c_str(), igzstream::in); + if (!infile) { + cout << "error opening .fam file: " << file_fam << endl; + return false; + } + + string line; + char *ch_ptr; + + string id; + int c = 0; + double p; + + vector<double> pheno_row; + vector<int> ind_pheno_row; + + size_t p_max = *max_element(p_column.begin(), p_column.end()); + map<size_t, size_t> mapP2c; + for (size_t i = 0; i < p_column.size(); i++) { + mapP2c[p_column[i]] = i; + pheno_row.push_back(-9); + ind_pheno_row.push_back(0); + } + + while (!safeGetline(infile, line).eof()) { + ch_ptr = strtok((char *)line.c_str(), " \t"); + ch_ptr = strtok(NULL, " \t"); + id = ch_ptr; + ch_ptr = strtok(NULL, " \t"); + ch_ptr = strtok(NULL, " \t"); + ch_ptr = strtok(NULL, " \t"); + ch_ptr = strtok(NULL, " \t"); + + size_t i = 0; + while (i < p_max) { + if (mapP2c.count(i + 1) != 0) { + if (strcmp(ch_ptr, "NA") == 0) { + ind_pheno_row[mapP2c[i + 1]] = 0; + pheno_row[mapP2c[i + 1]] = -9; + } else { + p = atof(ch_ptr); + + if (p == -9) { + ind_pheno_row[mapP2c[i + 1]] = 0; + pheno_row[mapP2c[i + 1]] = -9; + } else { + ind_pheno_row[mapP2c[i + 1]] = 1; + pheno_row[mapP2c[i + 1]] = p; + } + } + } + i++; + ch_ptr = strtok(NULL, " , \t"); + } + + indicator_pheno.push_back(ind_pheno_row); + pheno.push_back(pheno_row); + + mapID2num[id] = c; + c++; + } + + infile.close(); + infile.clear(); + return true; } // Read bimbam mean genotype file, the first time, to obtain #SNPs for // analysis (ns_test) and total #SNP (ns_total). -bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, - const gsl_matrix *W, vector<int> &indicator_idv, - vector<int> &indicator_snp, const double &maf_level, - const double &miss_level, const double &hwe_level, - const double &r2_level, - map<string, string> &mapRS2chr, - map<string, long int> &mapRS2bp, - map<string, double> &mapRS2cM, - vector<SNPINFO> &snpInfo, - size_t &ns_test) { - indicator_snp.clear(); - snpInfo.clear(); - - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return false; - } - - gsl_vector *genotype=gsl_vector_alloc (W->size1); - gsl_vector *genotype_miss=gsl_vector_alloc (W->size1); - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - double v_x, v_w; - int c_idv=0; - - string line; - char *ch_ptr; - - string rs; - long int b_pos; - string chr; - string major; - string minor; - double cM; - size_t file_pos; - - double maf, geno, geno_old; - size_t n_miss; - size_t n_0, n_1, n_2; - int flag_poly; - - int ni_total=indicator_idv.size(); - int ni_test=0; - for (int i=0; i<ni_total; ++i) { - ni_test+=indicator_idv[i]; - } - ns_test=0; - - file_pos=0; - while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; - ch_ptr=strtok (NULL, " , \t"); - minor=ch_ptr; - ch_ptr=strtok (NULL, " , \t"); - major=ch_ptr; - - if (setSnps.size()!=0 && setSnps.count(rs)==0) { - SNPINFO sInfo={"-9", rs, -9, -9, minor, major, 0, -9, -9, - 0, 0, file_pos}; - snpInfo.push_back(sInfo); - indicator_snp.push_back(0); - - file_pos++; - continue; - } - - if (mapRS2bp.count(rs)==0) {chr="-9"; b_pos=-9;cM=-9;} - else {b_pos=mapRS2bp[rs]; chr=mapRS2chr[rs]; cM=mapRS2cM[rs];} - - maf=0; n_miss=0; flag_poly=0; geno_old=-9; - n_0=0; n_1=0; n_2=0; - c_idv=0; gsl_vector_set_zero (genotype_miss); - for (int i=0; i<ni_total; ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} - - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set (genotype_miss, c_idv, 1); - n_miss++; - c_idv++; - continue; - } - - geno=atof(ch_ptr); - if (geno>=0 && geno<=0.5) {n_0++;} - if (geno>0.5 && geno<1.5) {n_1++;} - if (geno>=1.5 && geno<=2.0) {n_2++;} - - gsl_vector_set (genotype, c_idv, geno); - - if (flag_poly==0) {geno_old=geno; flag_poly=2;} - if (flag_poly==2 && geno!=geno_old) {flag_poly=1;} - - maf+=geno; - - c_idv++; - } - maf/=2.0*(double)(ni_test-n_miss); - - SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss, - (double)n_miss/(double)ni_test, maf, - ni_test-n_miss, 0, file_pos}; - snpInfo.push_back(sInfo); - file_pos++; - - if ( (double)n_miss/(double)ni_test > miss_level) { - indicator_snp.push_back(0); - continue; - } - - if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) { - indicator_snp.push_back(0); - continue; - } - - if (flag_poly!=1) {indicator_snp.push_back(0); continue;} - - if (hwe_level!=0 && maf_level!=-1) { - if (CalcHWE(n_0, n_2, n_1)<hwe_level) { - indicator_snp.push_back(0); - continue; - } - } - - // Filter SNP if it is correlated with W unless W has - // only one column, of 1s. - for (size_t i=0; i<genotype->size; ++i) { - if (gsl_vector_get (genotype_miss, i)==1) { - geno=maf*2.0; - gsl_vector_set (genotype, i, geno); - } - } - - gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); - gsl_blas_ddot (genotype, genotype, &v_x); - gsl_blas_ddot (Wtx, WtWiWtx, &v_w); - - if (W->size2!=1 && v_w/v_x >= r2_level) { - indicator_snp.push_back(0); - continue; - } - - indicator_snp.push_back(1); - ns_test++; - } - - gsl_vector_free (genotype); - gsl_vector_free (genotype_miss); - gsl_matrix_free (WtW); - gsl_matrix_free (WtWi); - gsl_vector_free (Wtx); - gsl_vector_free (WtWiWtx); - gsl_permutation_free (pmt); - - infile.close(); - infile.clear(); - - return true; +bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, + const gsl_matrix *W, vector<int> &indicator_idv, + vector<int> &indicator_snp, const double &maf_level, + const double &miss_level, const double &hwe_level, + const double &r2_level, map<string, string> &mapRS2chr, + map<string, long int> &mapRS2bp, + map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, + size_t &ns_test) { + indicator_snp.clear(); + snpInfo.clear(); + + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + gsl_vector *genotype = gsl_vector_alloc(W->size1); + gsl_vector *genotype_miss = gsl_vector_alloc(W->size1); + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + double v_x, v_w; + int c_idv = 0; + + string line; + char *ch_ptr; + + string rs; + long int b_pos; + string chr; + string major; + string minor; + double cM; + size_t file_pos; + + double maf, geno, geno_old; + size_t n_miss; + size_t n_0, n_1, n_2; + int flag_poly; + + int ni_total = indicator_idv.size(); + int ni_test = 0; + for (int i = 0; i < ni_total; ++i) { + ni_test += indicator_idv[i]; + } + ns_test = 0; + + file_pos = 0; + while (!safeGetline(infile, line).eof()) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + minor = ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + major = ch_ptr; + + if (setSnps.size() != 0 && setSnps.count(rs) == 0) { + SNPINFO sInfo = {"-9", rs, -9, -9, minor, major, + 0, -9, -9, 0, 0, file_pos}; + snpInfo.push_back(sInfo); + indicator_snp.push_back(0); + + file_pos++; + continue; + } + + if (mapRS2bp.count(rs) == 0) { + chr = "-9"; + b_pos = -9; + cM = -9; + } else { + b_pos = mapRS2bp[rs]; + chr = mapRS2chr[rs]; + cM = mapRS2cM[rs]; + } + + maf = 0; + n_miss = 0; + flag_poly = 0; + geno_old = -9; + n_0 = 0; + n_1 = 0; + n_2 = 0; + c_idv = 0; + gsl_vector_set_zero(genotype_miss); + for (int i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } + + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(genotype_miss, c_idv, 1); + n_miss++; + c_idv++; + continue; + } + + geno = atof(ch_ptr); + if (geno >= 0 && geno <= 0.5) { + n_0++; + } + if (geno > 0.5 && geno < 1.5) { + n_1++; + } + if (geno >= 1.5 && geno <= 2.0) { + n_2++; + } + + gsl_vector_set(genotype, c_idv, geno); + + if (flag_poly == 0) { + geno_old = geno; + flag_poly = 2; + } + if (flag_poly == 2 && geno != geno_old) { + flag_poly = 1; + } + + maf += geno; + + c_idv++; + } + maf /= 2.0 * (double)(ni_test - n_miss); + + SNPINFO sInfo = {chr, rs, + cM, b_pos, + minor, major, + n_miss, (double)n_miss / (double)ni_test, + maf, ni_test - n_miss, + 0, file_pos}; + snpInfo.push_back(sInfo); + file_pos++; + + if ((double)n_miss / (double)ni_test > miss_level) { + indicator_snp.push_back(0); + continue; + } + + if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) { + indicator_snp.push_back(0); + continue; + } + + if (flag_poly != 1) { + indicator_snp.push_back(0); + continue; + } + + if (hwe_level != 0 && maf_level != -1) { + if (CalcHWE(n_0, n_2, n_1) < hwe_level) { + indicator_snp.push_back(0); + continue; + } + } + + // Filter SNP if it is correlated with W unless W has + // only one column, of 1s. + for (size_t i = 0; i < genotype->size; ++i) { + if (gsl_vector_get(genotype_miss, i) == 1) { + geno = maf * 2.0; + gsl_vector_set(genotype, i, geno); + } + } + + gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx); + gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); + gsl_blas_ddot(genotype, genotype, &v_x); + gsl_blas_ddot(Wtx, WtWiWtx, &v_w); + + if (W->size2 != 1 && v_w / v_x >= r2_level) { + indicator_snp.push_back(0); + continue; + } + + indicator_snp.push_back(1); + ns_test++; + } + + gsl_vector_free(genotype); + gsl_vector_free(genotype_miss); + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_vector_free(Wtx); + gsl_vector_free(WtWiWtx); + gsl_permutation_free(pmt); + + infile.close(); + infile.clear(); + + return true; } // Read bed file, the first time. -bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, - const gsl_matrix *W, vector<int> &indicator_idv, - vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, - const double &maf_level, const double &miss_level, - const double &hwe_level, const double &r2_level, - size_t &ns_test) { - indicator_snp.clear(); - size_t ns_total=snpInfo.size(); - - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return false; - } - - gsl_vector *genotype=gsl_vector_alloc (W->size1); - gsl_vector *genotype_miss=gsl_vector_alloc (W->size1); - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - double v_x, v_w, geno; - size_t c_idv=0; - - char ch[1]; - bitset<8> b; - - size_t ni_total=indicator_idv.size(); - size_t ni_test=0; - for (size_t i=0; i<ni_total; ++i) { - ni_test+=indicator_idv[i]; - } - ns_test=0; - - // Calculate n_bit and c, the number of bit for each snp. - size_t n_bit; - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1;} - - // Ignore the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - double maf; - size_t n_miss; - size_t n_0, n_1, n_2, c; - - // Start reading snps and doing association test. - for (size_t t=0; t<ns_total; ++t) { - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - if (setSnps.size()!=0 && - setSnps.count(snpInfo[t].rs_number) == 0) { - snpInfo[t].n_miss=-9; - snpInfo[t].missingness=-9; - snpInfo[t].maf=-9; - snpInfo[t].file_position=t; - indicator_snp.push_back(0); - continue; - } - - // Read genotypes. - c=0; maf=0.0; n_miss=0; n_0=0; n_1=0; n_2=0; - c_idv=0; gsl_vector_set_zero (genotype_miss); - for (size_t i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0; - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && c==ni_total) {break;} - if (indicator_idv[c]==0) {c++; continue;} - c++; - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(genotype, c_idv, 2.0); - maf+=2.0; - n_2++; - } - else { - gsl_vector_set(genotype, c_idv, 1.0); - maf+=1.0; - n_1++; - } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(genotype, c_idv, 0.0); - maf+=0.0; - n_0++; - } - else { - gsl_vector_set(genotype_miss, c_idv, 1); - n_miss++; - } - } - c_idv++; - } - } - maf/=2.0*(double)(ni_test-n_miss); - - snpInfo[t].n_miss=n_miss; - snpInfo[t].missingness=(double)n_miss/(double)ni_test; - snpInfo[t].maf=maf; - snpInfo[t].n_idv=ni_test-n_miss; - snpInfo[t].n_nb=0; - snpInfo[t].file_position=t; - - if ( (double)n_miss/(double)ni_test > miss_level) { - indicator_snp.push_back(0); - continue; - } - - if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) { - indicator_snp.push_back(0); - continue; - } - - if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) { - indicator_snp.push_back(0); - continue; - } - - if (hwe_level!=0 && maf_level!=-1) { - if (CalcHWE(n_0, n_2, n_1)<hwe_level) { - indicator_snp.push_back(0); - continue; - } - } - - // Filter SNP if it is correlated with W unless W has - // only one column, of 1s. - for (size_t i=0; i<genotype->size; ++i) { - if (gsl_vector_get (genotype_miss, i)==1) { - geno=maf*2.0; - gsl_vector_set (genotype, i, geno); - } - } - - gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); - gsl_blas_ddot (genotype, genotype, &v_x); - gsl_blas_ddot (Wtx, WtWiWtx, &v_w); - - if (W->size2!=1 && v_w/v_x > r2_level) { - indicator_snp.push_back(0); - continue; - } - - indicator_snp.push_back(1); - ns_test++; - } - - gsl_vector_free (genotype); - gsl_vector_free (genotype_miss); - gsl_matrix_free (WtW); - gsl_matrix_free (WtWi); - gsl_vector_free (Wtx); - gsl_vector_free (WtWiWtx); - gsl_permutation_free (pmt); - - infile.close(); - infile.clear(); - - return true; +bool ReadFile_bed(const string &file_bed, const set<string> &setSnps, + const gsl_matrix *W, vector<int> &indicator_idv, + vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, + const double &maf_level, const double &miss_level, + const double &hwe_level, const double &r2_level, + size_t &ns_test) { + indicator_snp.clear(); + size_t ns_total = snpInfo.size(); + + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + gsl_vector *genotype = gsl_vector_alloc(W->size1); + gsl_vector *genotype_miss = gsl_vector_alloc(W->size1); + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + double v_x, v_w, geno; + size_t c_idv = 0; + + char ch[1]; + bitset<8> b; + + size_t ni_total = indicator_idv.size(); + size_t ni_test = 0; + for (size_t i = 0; i < ni_total; ++i) { + ni_test += indicator_idv[i]; + } + ns_test = 0; + + // Calculate n_bit and c, the number of bit for each snp. + size_t n_bit; + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Ignore the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + double maf; + size_t n_miss; + size_t n_0, n_1, n_2, c; + + // Start reading snps and doing association test. + for (size_t t = 0; t < ns_total; ++t) { + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + if (setSnps.size() != 0 && setSnps.count(snpInfo[t].rs_number) == 0) { + snpInfo[t].n_miss = -9; + snpInfo[t].missingness = -9; + snpInfo[t].maf = -9; + snpInfo[t].file_position = t; + indicator_snp.push_back(0); + continue; + } + + // Read genotypes. + c = 0; + maf = 0.0; + n_miss = 0; + n_0 = 0; + n_1 = 0; + n_2 = 0; + c_idv = 0; + gsl_vector_set_zero(genotype_miss); + for (size_t i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0; + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && c == ni_total) { + break; + } + if (indicator_idv[c] == 0) { + c++; + continue; + } + c++; + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(genotype, c_idv, 2.0); + maf += 2.0; + n_2++; + } else { + gsl_vector_set(genotype, c_idv, 1.0); + maf += 1.0; + n_1++; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(genotype, c_idv, 0.0); + maf += 0.0; + n_0++; + } else { + gsl_vector_set(genotype_miss, c_idv, 1); + n_miss++; + } + } + c_idv++; + } + } + maf /= 2.0 * (double)(ni_test - n_miss); + + snpInfo[t].n_miss = n_miss; + snpInfo[t].missingness = (double)n_miss / (double)ni_test; + snpInfo[t].maf = maf; + snpInfo[t].n_idv = ni_test - n_miss; + snpInfo[t].n_nb = 0; + snpInfo[t].file_position = t; + + if ((double)n_miss / (double)ni_test > miss_level) { + indicator_snp.push_back(0); + continue; + } + + if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) { + indicator_snp.push_back(0); + continue; + } + + if ((n_0 + n_1) == 0 || (n_1 + n_2) == 0 || (n_2 + n_0) == 0) { + indicator_snp.push_back(0); + continue; + } + + if (hwe_level != 0 && maf_level != -1) { + if (CalcHWE(n_0, n_2, n_1) < hwe_level) { + indicator_snp.push_back(0); + continue; + } + } + + // Filter SNP if it is correlated with W unless W has + // only one column, of 1s. + for (size_t i = 0; i < genotype->size; ++i) { + if (gsl_vector_get(genotype_miss, i) == 1) { + geno = maf * 2.0; + gsl_vector_set(genotype, i, geno); + } + } + + gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx); + gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); + gsl_blas_ddot(genotype, genotype, &v_x); + gsl_blas_ddot(Wtx, WtWiWtx, &v_w); + + if (W->size2 != 1 && v_w / v_x > r2_level) { + indicator_snp.push_back(0); + continue; + } + + indicator_snp.push_back(1); + ns_test++; + } + + gsl_vector_free(genotype); + gsl_vector_free(genotype_miss); + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_vector_free(Wtx); + gsl_vector_free(WtWiWtx); + gsl_permutation_free(pmt); + + infile.close(); + infile.clear(); + + return true; } // Read the genotype for one SNP; remember to read empty lines. // Geno stores original genotypes without centering. // Missing values are replaced by mean. -bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv, - igzstream &infile, gsl_vector *geno, - double &geno_mean) { - size_t ni_total=indicator_idv.size(); +bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv, + igzstream &infile, gsl_vector *geno, double &geno_mean) { + size_t ni_total = indicator_idv.size(); string line; char *ch_ptr; - bool flag=false; + bool flag = false; - for (size_t i=0; i<inc; i++) { + for (size_t i = 0; i < inc; i++) { !safeGetline(infile, line).eof(); } if (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); - geno_mean=0.0; + geno_mean = 0.0; double d; - size_t c_idv=0; + size_t c_idv = 0; vector<size_t> geno_miss; - for (size_t i=0; i<ni_total; ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } - if (strcmp(ch_ptr, "NA")==0) { - geno_miss.push_back(c_idv); + if (strcmp(ch_ptr, "NA") == 0) { + geno_miss.push_back(c_idv); } else { - d=atof(ch_ptr); - gsl_vector_set (geno, c_idv, d); - geno_mean+=d; + d = atof(ch_ptr); + gsl_vector_set(geno, c_idv, d); + geno_mean += d; } c_idv++; } - geno_mean/=(double)(c_idv-geno_miss.size() ); + geno_mean /= (double)(c_idv - geno_miss.size()); - for (size_t i=0; i<geno_miss.size(); ++i) { + for (size_t i = 0; i < geno_miss.size(); ++i) { gsl_vector_set(geno, geno_miss[i], geno_mean); } - flag=true; + flag = true; } return flag; } // For PLINK, store SNPs as double too. -void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, - ifstream &infile, gsl_vector *geno, double &geno_mean) { - size_t ni_total=indicator_idv.size(), n_bit; - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1;} +void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv, + ifstream &infile, gsl_vector *geno, double &geno_mean) { + size_t ni_total = indicator_idv.size(), n_bit; + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } // n_bit, and 3 is the number of magic numbers. - infile.seekg(pos*n_bit+3); + infile.seekg(pos * n_bit + 3); // Read genotypes. char ch[1]; bitset<8> b; - geno_mean=0.0; - size_t c=0, c_idv=0; + geno_mean = 0.0; + size_t c = 0, c_idv = 0; vector<size_t> geno_miss; - for (size_t i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; + for (size_t i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && c==ni_total) {break;} - if (indicator_idv[c]==0) {c++; continue;} + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && c == ni_total) { + break; + } + if (indicator_idv[c] == 0) { + c++; + continue; + } c++; - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set (geno, c_idv, 2); - geno_mean+=2.0; - } else { - gsl_vector_set (geno, c_idv, 1); - geno_mean+=1.0; - } + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(geno, c_idv, 2); + geno_mean += 2.0; + } else { + gsl_vector_set(geno, c_idv, 1); + geno_mean += 1.0; + } } else { - if (b[2*j+1]==1) { - gsl_vector_set (geno, c_idv, 0); - geno_mean+=0.0; - } else { - geno_miss.push_back(c_idv); - } + if (b[2 * j + 1] == 1) { + gsl_vector_set(geno, c_idv, 0); + geno_mean += 0.0; + } else { + geno_miss.push_back(c_idv); + } } c_idv++; } } - geno_mean/=(double)(c_idv-geno_miss.size()); + geno_mean /= (double)(c_idv - geno_miss.size()); - for (size_t i=0; i<geno_miss.size(); ++i) { + for (size_t i = 0; i < geno_miss.size(); ++i) { gsl_vector_set(geno, geno_miss[i], geno_mean); } return; } -void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, - map<string, int> &mapID2num, const size_t k_mode, - bool &error, gsl_matrix *G) { - igzstream infile (file_kin.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open kinship file: "<<file_kin<<endl; - error=true; return; - } - - size_t ni_total=indicator_idv.size(); - - gsl_matrix_set_zero (G); - - string line; - char *ch_ptr; - double d; - - if (k_mode==1) { - size_t i_test=0, i_total=0, j_test=0, j_total=0; - while (getline(infile, line)) { - if (i_total==ni_total) { - cout<<"error! number of rows in the kinship "<< - "file is larger than the number of phentypes."<< - endl; - error=true; - } - - if (indicator_idv[i_total]==0) {i_total++; continue;} - - j_total=0; j_test=0; - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - while (ch_ptr!=NULL) { - if (j_total==ni_total) { - cout<<"error! number of columns in the "<< - "kinship file is larger than the number"<< - " of phentypes for row = "<<i_total<<endl; - error=true; - } - - d=atof(ch_ptr); - if (indicator_idv[j_total]==1) { - gsl_matrix_set (G, i_test, j_test, d); - j_test++; - } - j_total++; - - ch_ptr=strtok (NULL, " , \t"); - } - if (j_total!=ni_total) { - cout<<"error! number of columns in the kinship "<< - "file do not match the number of phentypes for "<< - "row = "<<i_total<<endl; - error=true; - } - i_total++; i_test++; - } - if (i_total!=ni_total) { - cout<<"error! number of rows in the kinship file do "<< - "not match the number of phentypes."<<endl; - error=true; - } - } - else { - map<size_t, size_t> mapID2ID; - size_t c=0; - for (size_t i=0; i<indicator_idv.size(); i++) { - if (indicator_idv[i]==1) {mapID2ID[i]=c; c++;} - } - - string id1, id2; - double Cov_d; - size_t n_id1, n_id2; - - while (getline(infile, line)) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - id1=ch_ptr; - ch_ptr=strtok (NULL, " , \t"); - id2=ch_ptr; - ch_ptr=strtok (NULL, " , \t"); - d=atof(ch_ptr); - if (mapID2num.count(id1)==0 || - mapID2num.count(id2)==0) { - continue; - } - if (indicator_idv[mapID2num[id1]]==0 || - indicator_idv[mapID2num[id2]]==0) { - continue; - } - - n_id1=mapID2ID[mapID2num[id1]]; - n_id2=mapID2ID[mapID2num[id2]]; - - Cov_d=gsl_matrix_get(G, n_id1, n_id2); - if (Cov_d!=0 && Cov_d!=d) { - cout<<"error! redundant and unequal terms in the "<< - "kinship file, for id1 = "<<id1<<" and id2 = "<< - id2<<endl; - } - else { - gsl_matrix_set(G, n_id1, n_id2, d); - gsl_matrix_set(G, n_id2, n_id1, d); - } - } - } - - infile.close(); - infile.clear(); - - return; +void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv, + map<string, int> &mapID2num, const size_t k_mode, bool &error, + gsl_matrix *G) { + igzstream infile(file_kin.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open kinship file: " << file_kin << endl; + error = true; + return; + } + + size_t ni_total = indicator_idv.size(); + + gsl_matrix_set_zero(G); + + string line; + char *ch_ptr; + double d; + + if (k_mode == 1) { + size_t i_test = 0, i_total = 0, j_test = 0, j_total = 0; + while (getline(infile, line)) { + if (i_total == ni_total) { + cout << "error! number of rows in the kinship " + << "file is larger than the number of phentypes." << endl; + error = true; + } + + if (indicator_idv[i_total] == 0) { + i_total++; + continue; + } + + j_total = 0; + j_test = 0; + ch_ptr = strtok((char *)line.c_str(), " , \t"); + while (ch_ptr != NULL) { + if (j_total == ni_total) { + cout << "error! number of columns in the " + << "kinship file is larger than the number" + << " of phentypes for row = " << i_total << endl; + error = true; + } + + d = atof(ch_ptr); + if (indicator_idv[j_total] == 1) { + gsl_matrix_set(G, i_test, j_test, d); + j_test++; + } + j_total++; + + ch_ptr = strtok(NULL, " , \t"); + } + if (j_total != ni_total) { + cout << "error! number of columns in the kinship " + << "file do not match the number of phentypes for " + << "row = " << i_total << endl; + error = true; + } + i_total++; + i_test++; + } + if (i_total != ni_total) { + cout << "error! number of rows in the kinship file do " + << "not match the number of phentypes." << endl; + error = true; + } + } else { + map<size_t, size_t> mapID2ID; + size_t c = 0; + for (size_t i = 0; i < indicator_idv.size(); i++) { + if (indicator_idv[i] == 1) { + mapID2ID[i] = c; + c++; + } + } + + string id1, id2; + double Cov_d; + size_t n_id1, n_id2; + + while (getline(infile, line)) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + id1 = ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + id2 = ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + d = atof(ch_ptr); + if (mapID2num.count(id1) == 0 || mapID2num.count(id2) == 0) { + continue; + } + if (indicator_idv[mapID2num[id1]] == 0 || + indicator_idv[mapID2num[id2]] == 0) { + continue; + } + + n_id1 = mapID2ID[mapID2num[id1]]; + n_id2 = mapID2ID[mapID2num[id2]]; + + Cov_d = gsl_matrix_get(G, n_id1, n_id2); + if (Cov_d != 0 && Cov_d != d) { + cout << "error! redundant and unequal terms in the " + << "kinship file, for id1 = " << id1 << " and id2 = " << id2 + << endl; + } else { + gsl_matrix_set(G, n_id1, n_id2, d); + gsl_matrix_set(G, n_id2, n_id1, d); + } + } + } + + infile.close(); + infile.clear(); + + return; } -void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, - map<string, int> &mapID2num, const size_t k_mode, - bool &error, gsl_matrix *G) { - igzstream infile (file_mk.c_str(), igzstream::in); - if (!infile) {cout<<"error! fail to open file: "<<file_mk<<endl; - error=true; - return; - } - - string file_kin, line; - - size_t i=0; - while (getline(infile, line)) { - file_kin=line.c_str(); - gsl_matrix_view G_sub=gsl_matrix_submatrix(G, 0, i*G->size1, - G->size1, G->size1); - ReadFile_kin (file_kin, indicator_idv, mapID2num, k_mode, - error, &G_sub.matrix); - i++; - } - - infile.close(); - infile.clear(); - return; +void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv, + map<string, int> &mapID2num, const size_t k_mode, bool &error, + gsl_matrix *G) { + igzstream infile(file_mk.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open file: " << file_mk << endl; + error = true; + return; + } + + string file_kin, line; + + size_t i = 0; + while (getline(infile, line)) { + file_kin = line.c_str(); + gsl_matrix_view G_sub = + gsl_matrix_submatrix(G, 0, i * G->size1, G->size1, G->size1); + ReadFile_kin(file_kin, indicator_idv, mapID2num, k_mode, error, + &G_sub.matrix); + i++; + } + + infile.close(); + infile.clear(); + return; } -void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U) { - igzstream infile (file_ku.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open the U file: "<<file_ku<<endl; - error=true; - return; - } - - size_t n_row=U->size1, n_col=U->size2, i_row=0, i_col=0; - - gsl_matrix_set_zero (U); - - string line; - char *ch_ptr; - double d; - - while (getline(infile, line)) { - if (i_row==n_row) { - cout<<"error! number of rows in the U file is larger "<< - "than expected."<<endl; - error=true; - } - - i_col=0; - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - while (ch_ptr!=NULL) { - if (i_col==n_col) { - cout<<"error! number of columns in the U file "<< - "is larger than expected, for row = "<< - i_row<<endl; - error=true; - } - - d=atof(ch_ptr); - gsl_matrix_set (U, i_row, i_col, d); - i_col++; - - ch_ptr=strtok (NULL, " , \t"); - } - - i_row++; - } - - infile.close(); - infile.clear(); - - return; +void ReadFile_eigenU(const string &file_ku, bool &error, gsl_matrix *U) { + igzstream infile(file_ku.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open the U file: " << file_ku << endl; + error = true; + return; + } + + size_t n_row = U->size1, n_col = U->size2, i_row = 0, i_col = 0; + + gsl_matrix_set_zero(U); + + string line; + char *ch_ptr; + double d; + + while (getline(infile, line)) { + if (i_row == n_row) { + cout << "error! number of rows in the U file is larger " + << "than expected." << endl; + error = true; + } + + i_col = 0; + ch_ptr = strtok((char *)line.c_str(), " , \t"); + while (ch_ptr != NULL) { + if (i_col == n_col) { + cout << "error! number of columns in the U file " + << "is larger than expected, for row = " << i_row << endl; + error = true; + } + + d = atof(ch_ptr); + gsl_matrix_set(U, i_row, i_col, d); + i_col++; + + ch_ptr = strtok(NULL, " , \t"); + } + + i_row++; + } + + infile.close(); + infile.clear(); + + return; } -void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval) { - igzstream infile (file_kd.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open the D file: "<<file_kd<<endl; - error=true; - return; - } +void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) { + igzstream infile(file_kd.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open the D file: " << file_kd << endl; + error = true; + return; + } - size_t n_row=eval->size, i_row=0; + size_t n_row = eval->size, i_row = 0; - gsl_vector_set_zero (eval); + gsl_vector_set_zero(eval); - string line; - char *ch_ptr; - double d; + string line; + char *ch_ptr; + double d; - while (getline(infile, line)) { - if (i_row==n_row) { - cout<<"error! number of rows in the D file is larger "<< - "than expected."<<endl; - error=true; - } + while (getline(infile, line)) { + if (i_row == n_row) { + cout << "error! number of rows in the D file is larger " + << "than expected." << endl; + error = true; + } - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - d=atof(ch_ptr); + ch_ptr = strtok((char *)line.c_str(), " , \t"); + d = atof(ch_ptr); - ch_ptr=strtok (NULL, " , \t"); - if (ch_ptr!=NULL) { - cout<<"error! number of columns in the D file is larger "<< - "than expected, for row = "<<i_row<<endl; - error=true; - } + ch_ptr = strtok(NULL, " , \t"); + if (ch_ptr != NULL) { + cout << "error! number of columns in the D file is larger " + << "than expected, for row = " << i_row << endl; + error = true; + } - gsl_vector_set (eval, i_row, d); + gsl_vector_set(eval, i_row, d); - i_row++; - } + i_row++; + } - infile.close(); - infile.clear(); + infile.close(); + infile.clear(); - return; + return; } // Read bimbam mean genotype file and calculate kinship matrix. -bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return false; - } - - string line; - char *ch_ptr; - - size_t n_miss; - double d, geno_mean, geno_var; - - size_t ni_total=matrix_kin->size1; - gsl_vector *geno=gsl_vector_alloc (ni_total); - gsl_vector *geno_miss=gsl_vector_alloc (ni_total); - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize); - gsl_matrix_set_zero(Xlarge); - - size_t ns_test=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); - if (t%display_pace==0 || t==(indicator_snp.size()-1)) { - ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - geno_mean=0.0; n_miss=0; geno_var=0.0; - gsl_vector_set_all(geno_miss, 0); - for (size_t i=0; i<ni_total; ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(geno_miss, i, 0); n_miss++; - } else { - d=atof(ch_ptr); - gsl_vector_set (geno, i, d); - gsl_vector_set (geno_miss, i, 1); - geno_mean+=d; - geno_var+=d*d; - } - } - - geno_mean/=(double)(ni_total-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_total; - geno_var-=geno_mean*geno_mean; - - for (size_t i=0; i<ni_total; ++i) { - if (gsl_vector_get (geno_miss, i)==0) { - gsl_vector_set(geno, i, geno_mean); - } - } - - gsl_vector_add_constant (geno, -1.0*geno_mean); - - if (k_mode==2 && geno_var!=0) { - gsl_vector_scale (geno, 1.0/sqrt(geno_var)); - } - gsl_vector_view Xlarge_col= - gsl_matrix_column (Xlarge, ns_test%msize); - gsl_vector_memcpy (&Xlarge_col.vector, geno); - - ns_test++; - - if (ns_test%msize==0) { - eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, - matrix_kin); - gsl_matrix_set_zero(Xlarge); - } - } - - if (ns_test%msize!=0) { - eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); - } - cout<<endl; - - gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test); - - for (size_t i=0; i<ni_total; ++i) { - for (size_t j=0; j<i; ++j) { - d=gsl_matrix_get (matrix_kin, j, i); - gsl_matrix_set (matrix_kin, i, j, d); - } - } - - gsl_vector_free (geno); - gsl_vector_free (geno_miss); - gsl_matrix_free (Xlarge); - - infile.close(); - infile.clear(); - - return true; +bool BimbamKin(const string &file_geno, vector<int> &indicator_snp, + const int k_mode, const int display_pace, + gsl_matrix *matrix_kin) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + string line; + char *ch_ptr; + + size_t n_miss; + double d, geno_mean, geno_var; + + size_t ni_total = matrix_kin->size1; + gsl_vector *geno = gsl_vector_alloc(ni_total); + gsl_vector *geno_miss = gsl_vector_alloc(ni_total); + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize); + gsl_matrix_set_zero(Xlarge); + + size_t ns_test = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + geno_mean = 0.0; + n_miss = 0; + geno_var = 0.0; + gsl_vector_set_all(geno_miss, 0); + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(geno_miss, i, 0); + n_miss++; + } else { + d = atof(ch_ptr); + gsl_vector_set(geno, i, d); + gsl_vector_set(geno_miss, i, 1); + geno_mean += d; + geno_var += d * d; + } + } + + geno_mean /= (double)(ni_total - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_total; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_total; ++i) { + if (gsl_vector_get(geno_miss, i) == 0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + if (k_mode == 2 && geno_var != 0) { + gsl_vector_scale(geno, 1.0 / sqrt(geno_var)); + } + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % msize); + gsl_vector_memcpy(&Xlarge_col.vector, geno); + + ns_test++; + + if (ns_test % msize == 0) { + eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); + gsl_matrix_set_zero(Xlarge); + } + } + + if (ns_test % msize != 0) { + eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); + } + cout << endl; + + gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test); + + for (size_t i = 0; i < ni_total; ++i) { + for (size_t j = 0; j < i; ++j) { + d = gsl_matrix_get(matrix_kin, j, i); + gsl_matrix_set(matrix_kin, i, j, d); + } + } + + gsl_vector_free(geno); + gsl_vector_free(geno_miss); + gsl_matrix_free(Xlarge); + + infile.close(); + infile.clear(); + + return true; } -bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin) { - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return false; - } - - char ch[1]; - bitset<8> b; - - size_t n_miss, ci_total; - double d, geno_mean, geno_var; - - size_t ni_total=matrix_kin->size1; - gsl_vector *geno=gsl_vector_alloc (ni_total); - - size_t ns_test=0; - int n_bit; - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (ni_total, msize); - gsl_matrix_set_zero(Xlarge); - - // Calculate n_bit and c, the number of bit for each snp. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - //print the first three magic numbers - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (t%display_pace==0 || t==(indicator_snp.size()-1)) { - ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && ci_total==ni_total) { - break; - } - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(geno, ci_total, 2.0); - geno_mean+=2.0; - geno_var+=4.0; - } - else { - gsl_vector_set(geno, ci_total, 1.0); - geno_mean+=1.0; - geno_var+=1.0; - } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(geno,ci_total,0.0); - } - else { - gsl_vector_set(geno,ci_total,-9.0); - n_miss++; - } - } - - ci_total++; - } - } - - geno_mean/=(double)(ni_total-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_total; - geno_var-=geno_mean*geno_mean; - - for (size_t i=0; i<ni_total; ++i) { - d=gsl_vector_get(geno,i); - if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);} - } - - gsl_vector_add_constant (geno, -1.0*geno_mean); - - if (k_mode==2 && geno_var!=0) { - gsl_vector_scale (geno, 1.0/sqrt(geno_var)); - } - gsl_vector_view Xlarge_col= - gsl_matrix_column (Xlarge, ns_test%msize); - gsl_vector_memcpy (&Xlarge_col.vector, geno); - - ns_test++; - - if (ns_test%msize==0) { - eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin); - gsl_matrix_set_zero(Xlarge); - } - } - - if (ns_test%msize!=0) { - eigenlib_dgemm ("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); - } - - cout<<endl; - - gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test); - - for (size_t i=0; i<ni_total; ++i) { - for (size_t j=0; j<i; ++j) { - d=gsl_matrix_get (matrix_kin, j, i); - gsl_matrix_set (matrix_kin, i, j, d); - } - } - - gsl_vector_free (geno); - gsl_matrix_free (Xlarge); - - infile.close(); - infile.clear(); - - return true; +bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, + const int k_mode, const int display_pace, + gsl_matrix *matrix_kin) { + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + char ch[1]; + bitset<8> b; + + size_t n_miss, ci_total; + double d, geno_mean, geno_var; + + size_t ni_total = matrix_kin->size1; + gsl_vector *geno = gsl_vector_alloc(ni_total); + + size_t ns_test = 0; + int n_bit; + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize); + gsl_matrix_set_zero(Xlarge); + + // Calculate n_bit and c, the number of bit for each snp. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // print the first three magic numbers + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + geno_mean = 0.0; + n_miss = 0; + ci_total = 0; + geno_var = 0.0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == ni_total) { + break; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(geno, ci_total, 2.0); + geno_mean += 2.0; + geno_var += 4.0; + } else { + gsl_vector_set(geno, ci_total, 1.0); + geno_mean += 1.0; + geno_var += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(geno, ci_total, 0.0); + } else { + gsl_vector_set(geno, ci_total, -9.0); + n_miss++; + } + } + + ci_total++; + } + } + + geno_mean /= (double)(ni_total - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_total; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_total; ++i) { + d = gsl_vector_get(geno, i); + if (d == -9.0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + if (k_mode == 2 && geno_var != 0) { + gsl_vector_scale(geno, 1.0 / sqrt(geno_var)); + } + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % msize); + gsl_vector_memcpy(&Xlarge_col.vector, geno); + + ns_test++; + + if (ns_test % msize == 0) { + eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); + gsl_matrix_set_zero(Xlarge); + } + } + + if (ns_test % msize != 0) { + eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); + } + + cout << endl; + + gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test); + + for (size_t i = 0; i < ni_total; ++i) { + for (size_t j = 0; j < i; ++j) { + d = gsl_matrix_get(matrix_kin, j, i); + gsl_matrix_set(matrix_kin, i, j, d); + } + } + + gsl_vector_free(geno); + gsl_matrix_free(Xlarge); + + infile.close(); + infile.clear(); + + return true; } // Read bimbam mean genotype file, the second time, recode "mean" // genotype and calculate K. -bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, - vector<int> &indicator_snp, gsl_matrix *UtX, - gsl_matrix *K, const bool calc_K) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return false; - } - - string line; - char *ch_ptr; - - if (calc_K==true) {gsl_matrix_set_zero (K);} - - gsl_vector *genotype=gsl_vector_alloc (UtX->size1); - gsl_vector *genotype_miss=gsl_vector_alloc (UtX->size1); - double geno, geno_mean; - size_t n_miss; - - int ni_total=(int)indicator_idv.size(); - int ns_total=(int)indicator_snp.size(); - int ni_test=UtX->size1; - int ns_test=UtX->size2; - - int c_idv=0, c_snp=0; - - for (int i=0; i<ns_total; ++i) { - !safeGetline(infile, line).eof(); - if (indicator_snp[i]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - c_idv=0; geno_mean=0; n_miss=0; - gsl_vector_set_zero (genotype_miss); - for (int j=0; j<ni_total; ++j) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[j]==0) {continue;} - - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set (genotype_miss, c_idv, 1); - n_miss++; - } else { - geno=atof(ch_ptr); - gsl_vector_set (genotype, c_idv, geno); - geno_mean+=geno; - } - c_idv++; - } - - geno_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<genotype->size; ++i) { - if (gsl_vector_get (genotype_miss, i)==1) { - geno=0; - } - else { - geno=gsl_vector_get (genotype, i); - geno-=geno_mean; - } - - gsl_vector_set (genotype, i, geno); - gsl_matrix_set (UtX, i, c_snp, geno); - } - - if (calc_K==true) { - gsl_blas_dsyr (CblasUpper, 1.0, genotype, K); - } - - c_snp++; - } - - if (calc_K==true) { - gsl_matrix_scale (K, 1.0/(double)ns_test); - - for (size_t i=0; i<genotype->size; ++i) { - for (size_t j=0; j<i; ++j) { - geno=gsl_matrix_get (K, j, i); - gsl_matrix_set (K, i, j, geno); - } - } - } - - gsl_vector_free (genotype); - gsl_vector_free (genotype_miss); - - infile.clear(); - infile.close(); - - return true; +bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, + vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, + const bool calc_K) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + string line; + char *ch_ptr; + + if (calc_K == true) { + gsl_matrix_set_zero(K); + } + + gsl_vector *genotype = gsl_vector_alloc(UtX->size1); + gsl_vector *genotype_miss = gsl_vector_alloc(UtX->size1); + double geno, geno_mean; + size_t n_miss; + + int ni_total = (int)indicator_idv.size(); + int ns_total = (int)indicator_snp.size(); + int ni_test = UtX->size1; + int ns_test = UtX->size2; + + int c_idv = 0, c_snp = 0; + + for (int i = 0; i < ns_total; ++i) { + !safeGetline(infile, line).eof(); + if (indicator_snp[i] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + c_idv = 0; + geno_mean = 0; + n_miss = 0; + gsl_vector_set_zero(genotype_miss); + for (int j = 0; j < ni_total; ++j) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[j] == 0) { + continue; + } + + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(genotype_miss, c_idv, 1); + n_miss++; + } else { + geno = atof(ch_ptr); + gsl_vector_set(genotype, c_idv, geno); + geno_mean += geno; + } + c_idv++; + } + + geno_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < genotype->size; ++i) { + if (gsl_vector_get(genotype_miss, i) == 1) { + geno = 0; + } else { + geno = gsl_vector_get(genotype, i); + geno -= geno_mean; + } + + gsl_vector_set(genotype, i, geno); + gsl_matrix_set(UtX, i, c_snp, geno); + } + + if (calc_K == true) { + gsl_blas_dsyr(CblasUpper, 1.0, genotype, K); + } + + c_snp++; + } + + if (calc_K == true) { + gsl_matrix_scale(K, 1.0 / (double)ns_test); + + for (size_t i = 0; i < genotype->size; ++i) { + for (size_t j = 0; j < i; ++j) { + geno = gsl_matrix_get(K, j, i); + gsl_matrix_set(K, i, j, geno); + } + } + } + + gsl_vector_free(genotype); + gsl_vector_free(genotype_miss); + + infile.clear(); + infile.close(); + + return true; } // Compact version of the above function, using uchar instead of // gsl_matrix. -bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, - vector<int> &indicator_snp, - vector<vector<unsigned char> > &Xt, - gsl_matrix *K, const bool calc_K, const size_t ni_test, - const size_t ns_test) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return false; - } - - Xt.clear(); - vector<unsigned char> Xt_row; - for (size_t i=0; i<ni_test; i++) { - Xt_row.push_back(0); - } - - string line; - char *ch_ptr; - - if (calc_K==true) {gsl_matrix_set_zero (K);} - - gsl_vector *genotype=gsl_vector_alloc (ni_test); - gsl_vector *genotype_miss=gsl_vector_alloc (ni_test); - double geno, geno_mean; - size_t n_miss; - - size_t ni_total= indicator_idv.size(); - size_t ns_total= indicator_snp.size(); - - size_t c_idv=0, c_snp=0; - - for (size_t i=0; i<ns_total; ++i) { - !safeGetline(infile, line).eof(); - if (indicator_snp[i]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - c_idv=0; geno_mean=0; n_miss=0; - gsl_vector_set_zero (genotype_miss); - for (uint j=0; j<ni_total; ++j) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[j]==0) {continue;} - - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set (genotype_miss, c_idv, 1); - n_miss++; - } else { - geno=atof(ch_ptr); - gsl_vector_set (genotype, c_idv, geno); - geno_mean+=geno; - } - c_idv++; - } - - geno_mean/=(double)(ni_test-n_miss); - - for (size_t j=0; j<genotype->size; ++j) { - if (gsl_vector_get (genotype_miss, j)==1) { - geno=geno_mean; - } else { - geno=gsl_vector_get (genotype, j); - } - - Xt_row[j]=Double02ToUchar(geno); - gsl_vector_set (genotype, j, (geno-geno_mean)); - } - Xt.push_back(Xt_row); - - if (calc_K==true) { - gsl_blas_dsyr (CblasUpper, 1.0, genotype, K); - } - - c_snp++; - } - - if (calc_K==true) { - gsl_matrix_scale (K, 1.0/(double)ns_test); - - for (size_t i=0; i<genotype->size; ++i) { - for (size_t j=0; j<i; ++j) { - geno=gsl_matrix_get (K, j, i); - gsl_matrix_set (K, i, j, geno); - } - } - } - - gsl_vector_free (genotype); - gsl_vector_free (genotype_miss); - - infile.clear(); - infile.close(); - - return true; +bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, + vector<int> &indicator_snp, + vector<vector<unsigned char>> &Xt, gsl_matrix *K, + const bool calc_K, const size_t ni_test, + const size_t ns_test) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + Xt.clear(); + vector<unsigned char> Xt_row; + for (size_t i = 0; i < ni_test; i++) { + Xt_row.push_back(0); + } + + string line; + char *ch_ptr; + + if (calc_K == true) { + gsl_matrix_set_zero(K); + } + + gsl_vector *genotype = gsl_vector_alloc(ni_test); + gsl_vector *genotype_miss = gsl_vector_alloc(ni_test); + double geno, geno_mean; + size_t n_miss; + + size_t ni_total = indicator_idv.size(); + size_t ns_total = indicator_snp.size(); + + size_t c_idv = 0, c_snp = 0; + + for (size_t i = 0; i < ns_total; ++i) { + !safeGetline(infile, line).eof(); + if (indicator_snp[i] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + c_idv = 0; + geno_mean = 0; + n_miss = 0; + gsl_vector_set_zero(genotype_miss); + for (uint j = 0; j < ni_total; ++j) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[j] == 0) { + continue; + } + + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(genotype_miss, c_idv, 1); + n_miss++; + } else { + geno = atof(ch_ptr); + gsl_vector_set(genotype, c_idv, geno); + geno_mean += geno; + } + c_idv++; + } + + geno_mean /= (double)(ni_test - n_miss); + + for (size_t j = 0; j < genotype->size; ++j) { + if (gsl_vector_get(genotype_miss, j) == 1) { + geno = geno_mean; + } else { + geno = gsl_vector_get(genotype, j); + } + + Xt_row[j] = Double02ToUchar(geno); + gsl_vector_set(genotype, j, (geno - geno_mean)); + } + Xt.push_back(Xt_row); + + if (calc_K == true) { + gsl_blas_dsyr(CblasUpper, 1.0, genotype, K); + } + + c_snp++; + } + + if (calc_K == true) { + gsl_matrix_scale(K, 1.0 / (double)ns_test); + + for (size_t i = 0; i < genotype->size; ++i) { + for (size_t j = 0; j < i; ++j) { + geno = gsl_matrix_get(K, j, i); + gsl_matrix_set(K, i, j, geno); + } + } + } + + gsl_vector_free(genotype); + gsl_vector_free(genotype_miss); + + infile.clear(); + infile.close(); + + return true; } // Read bimbam mean genotype file, the second time, recode "mean" // genotype and calculate K. -bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, - vector<int> &indicator_snp, gsl_matrix *UtX, - gsl_matrix *K, const bool calc_K) { - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return false; - } - - char ch[1]; - bitset<8> b; - - size_t ni_total=indicator_idv.size(); - size_t ns_total=indicator_snp.size(); - size_t ni_test=UtX->size1; - size_t ns_test=UtX->size2; - int n_bit; - - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1;} - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - if (calc_K==true) {gsl_matrix_set_zero (K);} - - gsl_vector *genotype=gsl_vector_alloc (UtX->size1); - - double geno, geno_mean; - size_t n_miss; - size_t c_idv=0, c_snp=0, c=0; - - // Start reading snps and doing association test. - for (size_t t=0; t<ns_total; ++t) { - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - c_idv=0; geno_mean=0.0; n_miss=0; c=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && c==ni_total) {break;} - if (indicator_idv[c]==0) {c++; continue;} - c++; - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(genotype, c_idv, 2.0); - geno_mean+=2.0; - } - else { - gsl_vector_set(genotype, c_idv, 1.0); - geno_mean+=1.0; - } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(genotype, c_idv, 0.0); - geno_mean+=0.0; - } - else { - gsl_vector_set(genotype, c_idv, -9.0); - n_miss++; - } - } - c_idv++; - } - } - - geno_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<genotype->size; ++i) { - geno=gsl_vector_get (genotype, i); - if (geno==-9) {geno=0;} - else {geno-=geno_mean;} - - gsl_vector_set (genotype, i, geno); - gsl_matrix_set (UtX, i, c_snp, geno); - } - - if (calc_K==true) { - gsl_blas_dsyr (CblasUpper, 1.0, genotype, K); - } - - c_snp++; - } - - if (calc_K==true) { - gsl_matrix_scale (K, 1.0/(double)ns_test); - - for (size_t i=0; i<genotype->size; ++i) { - for (size_t j=0; j<i; ++j) { - geno=gsl_matrix_get (K, j, i); - gsl_matrix_set (K, i, j, geno); - } - } - } - - gsl_vector_free (genotype); - infile.clear(); - infile.close(); - - return true; +bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, + vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, + const bool calc_K) { + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + char ch[1]; + bitset<8> b; + + size_t ni_total = indicator_idv.size(); + size_t ns_total = indicator_snp.size(); + size_t ni_test = UtX->size1; + size_t ns_test = UtX->size2; + int n_bit; + + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + if (calc_K == true) { + gsl_matrix_set_zero(K); + } + + gsl_vector *genotype = gsl_vector_alloc(UtX->size1); + + double geno, geno_mean; + size_t n_miss; + size_t c_idv = 0, c_snp = 0, c = 0; + + // Start reading snps and doing association test. + for (size_t t = 0; t < ns_total; ++t) { + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + c_idv = 0; + geno_mean = 0.0; + n_miss = 0; + c = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && c == ni_total) { + break; + } + if (indicator_idv[c] == 0) { + c++; + continue; + } + c++; + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(genotype, c_idv, 2.0); + geno_mean += 2.0; + } else { + gsl_vector_set(genotype, c_idv, 1.0); + geno_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(genotype, c_idv, 0.0); + geno_mean += 0.0; + } else { + gsl_vector_set(genotype, c_idv, -9.0); + n_miss++; + } + } + c_idv++; + } + } + + geno_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < genotype->size; ++i) { + geno = gsl_vector_get(genotype, i); + if (geno == -9) { + geno = 0; + } else { + geno -= geno_mean; + } + + gsl_vector_set(genotype, i, geno); + gsl_matrix_set(UtX, i, c_snp, geno); + } + + if (calc_K == true) { + gsl_blas_dsyr(CblasUpper, 1.0, genotype, K); + } + + c_snp++; + } + + if (calc_K == true) { + gsl_matrix_scale(K, 1.0 / (double)ns_test); + + for (size_t i = 0; i < genotype->size; ++i) { + for (size_t j = 0; j < i; ++j) { + geno = gsl_matrix_get(K, j, i); + gsl_matrix_set(K, i, j, geno); + } + } + } + + gsl_vector_free(genotype); + infile.clear(); + infile.close(); + + return true; } // Compact version of the above function, using uchar instead of gsl_matrix. -bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, - vector<int> &indicator_snp, - vector<vector<unsigned char> > &Xt, gsl_matrix *K, - const bool calc_K, const size_t ni_test, - const size_t ns_test) { - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return false; - } - - Xt.clear(); - vector<unsigned char> Xt_row; - for (size_t i=0; i<ni_test; i++) { - Xt_row.push_back(0); - } - - char ch[1]; - bitset<8> b; - - size_t ni_total=indicator_idv.size(); - size_t ns_total=indicator_snp.size(); - int n_bit; - - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1;} - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - if (calc_K==true) {gsl_matrix_set_zero (K);} - - gsl_vector *genotype=gsl_vector_alloc (ni_test); - - double geno, geno_mean; - size_t n_miss; - size_t c_idv=0, c_snp=0, c=0; - - // Start reading SNPs and doing association test. - for (size_t t=0; t<ns_total; ++t) { - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - c_idv=0; geno_mean=0.0; n_miss=0; c=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && c==ni_total) {break;} - if (indicator_idv[c]==0) {c++; continue;} - c++; - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(genotype, c_idv, 2.0); - geno_mean+=2.0; - } - else { - gsl_vector_set(genotype, c_idv, 1.0); - geno_mean+=1.0; - } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(genotype, c_idv, 0.0); - geno_mean+=0.0; - } - else { - gsl_vector_set(genotype, c_idv, -9.0); - n_miss++; - } - } - c_idv++; - } - } - - geno_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<genotype->size; ++i) { - geno=gsl_vector_get (genotype, i); - if (geno==-9) {geno=geno_mean;} - - Xt_row[i]=Double02ToUchar(geno); - - geno-=geno_mean; - - gsl_vector_set (genotype, i, geno); - } - Xt.push_back(Xt_row); - - if (calc_K==true) { - gsl_blas_dsyr (CblasUpper, 1.0, genotype, K); - } - - c_snp++; - } - - if (calc_K==true) { - gsl_matrix_scale (K, 1.0/(double)ns_test); - - for (size_t i=0; i<genotype->size; ++i) { - for (size_t j=0; j<i; ++j) { - geno=gsl_matrix_get (K, j, i); - gsl_matrix_set (K, i, j, geno); - } - } - } - - gsl_vector_free (genotype); - infile.clear(); - infile.close(); - - return true; +bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, + vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt, + gsl_matrix *K, const bool calc_K, const size_t ni_test, + const size_t ns_test) { + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + Xt.clear(); + vector<unsigned char> Xt_row; + for (size_t i = 0; i < ni_test; i++) { + Xt_row.push_back(0); + } + + char ch[1]; + bitset<8> b; + + size_t ni_total = indicator_idv.size(); + size_t ns_total = indicator_snp.size(); + int n_bit; + + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + if (calc_K == true) { + gsl_matrix_set_zero(K); + } + + gsl_vector *genotype = gsl_vector_alloc(ni_test); + + double geno, geno_mean; + size_t n_miss; + size_t c_idv = 0, c_snp = 0, c = 0; + + // Start reading SNPs and doing association test. + for (size_t t = 0; t < ns_total; ++t) { + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + c_idv = 0; + geno_mean = 0.0; + n_miss = 0; + c = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && c == ni_total) { + break; + } + if (indicator_idv[c] == 0) { + c++; + continue; + } + c++; + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(genotype, c_idv, 2.0); + geno_mean += 2.0; + } else { + gsl_vector_set(genotype, c_idv, 1.0); + geno_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(genotype, c_idv, 0.0); + geno_mean += 0.0; + } else { + gsl_vector_set(genotype, c_idv, -9.0); + n_miss++; + } + } + c_idv++; + } + } + + geno_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < genotype->size; ++i) { + geno = gsl_vector_get(genotype, i); + if (geno == -9) { + geno = geno_mean; + } + + Xt_row[i] = Double02ToUchar(geno); + + geno -= geno_mean; + + gsl_vector_set(genotype, i, geno); + } + Xt.push_back(Xt_row); + + if (calc_K == true) { + gsl_blas_dsyr(CblasUpper, 1.0, genotype, K); + } + + c_snp++; + } + + if (calc_K == true) { + gsl_matrix_scale(K, 1.0 / (double)ns_test); + + for (size_t i = 0; i < genotype->size; ++i) { + for (size_t j = 0; j < i; ++j) { + geno = gsl_matrix_get(K, j, i); + gsl_matrix_set(K, i, j, geno); + } + } + } + + gsl_vector_free(genotype); + infile.clear(); + infile.close(); + + return true; } -bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, - map<string, double> &mapRS2est) { - mapRS2est.clear(); - - ifstream infile (file_est.c_str(), ifstream::in); - if (!infile) { - cout<<"error opening estimated parameter file: "<<file_est<<endl; - return false; - } - - string line; - char *ch_ptr; - - string rs; - double alpha, beta, gamma, d; - - // Header. - getline(infile, line); - - size_t n=*max_element(est_column.begin(), est_column.end()); - - while (getline(infile, line)) { - ch_ptr=strtok ((char *)line.c_str(), " \t"); - - alpha=0.0; beta=0.0; gamma=1.0; - for (size_t i=0; i<n+1; ++i) { - if (i==est_column[0]-1) {rs=ch_ptr;} - if (i==est_column[1]-1) {alpha=atof(ch_ptr);} - if (i==est_column[2]-1) {beta=atof(ch_ptr);} - if (i==est_column[3]-1) {gamma=atof(ch_ptr);} - if (i<n) {ch_ptr=strtok (NULL, " \t");} - } - - d=alpha+beta*gamma; - - if (mapRS2est.count(rs)==0) { - mapRS2est[rs]=d; - } - else { - cout << "the same SNP occurs more than once in estimated "<< - "parameter file: "<<rs<<endl; - return false; - } - } - - infile.clear(); - infile.close(); - return true; +bool ReadFile_est(const string &file_est, const vector<size_t> &est_column, + map<string, double> &mapRS2est) { + mapRS2est.clear(); + + ifstream infile(file_est.c_str(), ifstream::in); + if (!infile) { + cout << "error opening estimated parameter file: " << file_est << endl; + return false; + } + + string line; + char *ch_ptr; + + string rs; + double alpha, beta, gamma, d; + + // Header. + getline(infile, line); + + size_t n = *max_element(est_column.begin(), est_column.end()); + + while (getline(infile, line)) { + ch_ptr = strtok((char *)line.c_str(), " \t"); + + alpha = 0.0; + beta = 0.0; + gamma = 1.0; + for (size_t i = 0; i < n + 1; ++i) { + if (i == est_column[0] - 1) { + rs = ch_ptr; + } + if (i == est_column[1] - 1) { + alpha = atof(ch_ptr); + } + if (i == est_column[2] - 1) { + beta = atof(ch_ptr); + } + if (i == est_column[3] - 1) { + gamma = atof(ch_ptr); + } + if (i < n) { + ch_ptr = strtok(NULL, " \t"); + } + } + + d = alpha + beta * gamma; + + if (mapRS2est.count(rs) == 0) { + mapRS2est[rs] = d; + } else { + cout << "the same SNP occurs more than once in estimated " + << "parameter file: " << rs << endl; + return false; + } + } + + infile.clear(); + infile.close(); + return true; } -bool CountFileLines (const string &file_input, size_t &n_lines) { - igzstream infile (file_input.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open file: "<<file_input<<endl; - return false; - } +bool CountFileLines(const string &file_input, size_t &n_lines) { + igzstream infile(file_input.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open file: " << file_input << endl; + return false; + } - n_lines=count(istreambuf_iterator<char>(infile), istreambuf_iterator<char>(), '\n'); - infile.seekg (0, ios::beg); + n_lines = count(istreambuf_iterator<char>(infile), + istreambuf_iterator<char>(), '\n'); + infile.seekg(0, ios::beg); - return true; + return true; } // Read gene expression file. -bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, - vector<SNPINFO> &snpInfo, size_t &ng_total) { - vec_read.clear(); - ng_total=0; - - igzstream infile (file_gene.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open gene expression file: "<<file_gene<<endl; - return false; - } - - string line; - char *ch_ptr; - string rs; - - size_t n_idv=0, t=0; - - // Header. - getline(infile, line); - - while (getline(infile, line)) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; - - ch_ptr=strtok (NULL, " , \t"); - - t=0; - while (ch_ptr!=NULL) { - if (ng_total==0) { - vec_read.push_back(0); - t++; - n_idv++; - } else { - vec_read[t]+=atof(ch_ptr); - t++; - } - - ch_ptr=strtok (NULL, " , \t"); - } - - if (t!=n_idv) { - cout<<"error! number of columns doesn't match in row: "<< - ng_total<<endl; - return false; - } - - SNPINFO sInfo={"-9",rs,-9,-9,"-9","-9",0,-9,-9,0,0,0}; - snpInfo.push_back(sInfo); - - ng_total++; - } - - infile.close(); - infile.clear(); - - return true; +bool ReadFile_gene(const string &file_gene, vector<double> &vec_read, + vector<SNPINFO> &snpInfo, size_t &ng_total) { + vec_read.clear(); + ng_total = 0; + + igzstream infile(file_gene.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open gene expression file: " << file_gene << endl; + return false; + } + + string line; + char *ch_ptr; + string rs; + + size_t n_idv = 0, t = 0; + + // Header. + getline(infile, line); + + while (getline(infile, line)) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; + + ch_ptr = strtok(NULL, " , \t"); + + t = 0; + while (ch_ptr != NULL) { + if (ng_total == 0) { + vec_read.push_back(0); + t++; + n_idv++; + } else { + vec_read[t] += atof(ch_ptr); + t++; + } + + ch_ptr = strtok(NULL, " , \t"); + } + + if (t != n_idv) { + cout << "error! number of columns doesn't match in row: " << ng_total + << endl; + return false; + } + + SNPINFO sInfo = {"-9", rs, -9, -9, "-9", "-9", 0, -9, -9, 0, 0, 0}; + snpInfo.push_back(sInfo); + + ng_total++; + } + + infile.close(); + infile.clear(); + + return true; } // WJA Added // Read Oxford sample file. -bool ReadFile_sample (const string &file_sample, - vector<vector<int> > &indicator_pheno, - vector<vector<double> > &pheno, - const vector<size_t> &p_column, - vector<int> &indicator_cvt, - vector<vector<double> > &cvt, size_t &n_cvt) { - indicator_pheno.clear(); - pheno.clear(); - indicator_cvt.clear(); - - igzstream infile (file_sample.c_str(), igzstream::in); - - if (!infile) { - cout<<"error! fail to open sample file: "<<file_sample<<endl; - return false; - } - - string line; - char *ch_ptr; - - string id; - double p,d; - - vector<double> pheno_row; - vector<int> ind_pheno_row; - int flag_na=0; - - size_t num_cols=0; - size_t num_p_in_file=0; - size_t num_cvt_in_file=0; - - map<size_t, size_t> mapP2c; - for (size_t i=0; i<p_column.size(); i++) { - mapP2c[p_column[i]]=i; - pheno_row.push_back(-9); - ind_pheno_row.push_back(0); - } - - // Read header line1. - if(!safeGetline(infile, line).eof()) { - ch_ptr=strtok((char *)line.c_str(), " \t"); - if(strcmp(ch_ptr, "ID_1")!=0) {return false;} - ch_ptr=strtok(NULL, " \t"); - if(strcmp(ch_ptr, "ID_2")!=0) {return false;} - ch_ptr=strtok(NULL, " \t"); - if(strcmp(ch_ptr, "missing")!=0) {return false;} - while (ch_ptr!=NULL) { - num_cols++; - ch_ptr=strtok (NULL, " \t"); - - } - num_cols--; - } - - vector<map<uint32_t, size_t> > cvt_factor_levels; - - char col_type[num_cols]; - - // Read header line2. - if(!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " \t"); - if(strcmp(ch_ptr, "0")!=0) {return false;} - ch_ptr=strtok(NULL, " \t"); - if(strcmp(ch_ptr, "0")!=0) {return false;} - ch_ptr=strtok(NULL, " \t"); - if(strcmp(ch_ptr, "0")!=0) {return false;} - size_t it=0; - ch_ptr=strtok (NULL, " \t"); - if(ch_ptr!=NULL) - while(ch_ptr!=NULL){ - col_type[it++]=ch_ptr[0]; - if(ch_ptr[0]=='D') { - cvt_factor_levels.push_back(map<uint32_t,size_t>()); - num_cvt_in_file++; - } - if(ch_ptr[0]=='C') {num_cvt_in_file++;} - if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) { - num_p_in_file++;} - ch_ptr=strtok(NULL, " \t"); - } - - } - - while (!safeGetline(infile, line).eof()) { - - ch_ptr=strtok ((char *)line.c_str(), " \t"); - - for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");} - - size_t i=0; - size_t p_i=0; - size_t fac_cvt_i=0; - - while (i<num_cols) { - - if((col_type[i]=='P')||(col_type[i]=='B')) - { - if (mapP2c.count(p_i+1)!=0) { - if (strcmp(ch_ptr, "NA")==0) { - ind_pheno_row[mapP2c[p_i+1]]=0; - pheno_row[mapP2c[p_i+1]]=-9; - } - else { - p=atof(ch_ptr); - ind_pheno_row[mapP2c[p_i+1]]=1; - pheno_row[mapP2c[p_i+1]]=p; - } - } - p_i++; - } - if(col_type[i]=='D') - { - - // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL - // IS INTEGRAL i.e for atoi error. - if (strcmp(ch_ptr, "NA")!=0) { - uint32_t level=atoi(ch_ptr); - if (cvt_factor_levels[fac_cvt_i].count(level)==0) { - cvt_factor_levels[fac_cvt_i][level]= - cvt_factor_levels[fac_cvt_i].size(); - } - } - fac_cvt_i++; - } - - ch_ptr=strtok (NULL, " \t"); - i++; - } - - indicator_pheno.push_back(ind_pheno_row); - pheno.push_back(pheno_row); - - } - - // Close and reopen the file. - infile.close(); - infile.clear(); - - if(num_cvt_in_file>0) { - igzstream infile2 (file_sample.c_str(), igzstream::in); - - if (!infile2) { - cout<<"error! fail to open sample file: "<< - file_sample<<endl; - return false; - } - - // Skip header. - safeGetline(infile2, line); - safeGetline(infile2, line); - - // Pull in the covariates now we now the number of - // factor levels. - while (!safeGetline(infile2, line).eof()) { - - vector<double> v_d; flag_na=0; - ch_ptr=strtok ((char *)line.c_str(), " \t"); - - for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " \t");} - - size_t i=0; - size_t fac_cvt_i=0; - size_t num_fac_levels; - while (i<num_cols) { - - if(col_type[i]=='C') { - if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;} - else {d=atof(ch_ptr);} - - v_d.push_back(d); - } - - if(col_type[i]=='D') { - - // NOTE THIS DOES NOT CHECK TO BE SURE - // LEVEL IS INTEGRAL i.e for atoi error. - num_fac_levels=cvt_factor_levels[fac_cvt_i].size(); - if(num_fac_levels>1) { - if (strcmp(ch_ptr, "NA")==0) { - flag_na=1; - for(size_t it=0;it<num_fac_levels-1; it++) { - v_d.push_back(-9); - } - } - else { - uint32_t level=atoi(ch_ptr); - for(size_t it=0;it<num_fac_levels-1;it++) { - cvt_factor_levels[fac_cvt_i][level]==it+1 ? - v_d.push_back(1.0) : - v_d.push_back(0.0); - } - } - } - fac_cvt_i++; - } - - ch_ptr=strtok (NULL, " \t"); - i++; - } - - if (flag_na==0) { - indicator_cvt.push_back(1); - } else { - indicator_cvt.push_back(0); - } - cvt.push_back(v_d); - - - } - - if (indicator_cvt.empty()) {n_cvt=0;} - else { - flag_na=0; - for (vector<int>::size_type i=0; - i<indicator_cvt.size(); - ++i) { - if (indicator_cvt[i]==0) {continue;} - - if (flag_na==0) { - flag_na=1; - n_cvt=cvt[i].size(); - } - if (flag_na!=0 && n_cvt!=cvt[i].size()) { - cout<<"error! number of covariates in row "<< - i<<" do not match other rows."<<endl; - return false; - } - } - } - - infile2.close(); - infile2.clear(); - } - return true; +bool ReadFile_sample(const string &file_sample, + vector<vector<int>> &indicator_pheno, + vector<vector<double>> &pheno, + const vector<size_t> &p_column, vector<int> &indicator_cvt, + vector<vector<double>> &cvt, size_t &n_cvt) { + indicator_pheno.clear(); + pheno.clear(); + indicator_cvt.clear(); + + igzstream infile(file_sample.c_str(), igzstream::in); + + if (!infile) { + cout << "error! fail to open sample file: " << file_sample << endl; + return false; + } + + string line; + char *ch_ptr; + + string id; + double p, d; + + vector<double> pheno_row; + vector<int> ind_pheno_row; + int flag_na = 0; + + size_t num_cols = 0; + size_t num_p_in_file = 0; + size_t num_cvt_in_file = 0; + + map<size_t, size_t> mapP2c; + for (size_t i = 0; i < p_column.size(); i++) { + mapP2c[p_column[i]] = i; + pheno_row.push_back(-9); + ind_pheno_row.push_back(0); + } + + // Read header line1. + if (!safeGetline(infile, line).eof()) { + ch_ptr = strtok((char *)line.c_str(), " \t"); + if (strcmp(ch_ptr, "ID_1") != 0) { + return false; + } + ch_ptr = strtok(NULL, " \t"); + if (strcmp(ch_ptr, "ID_2") != 0) { + return false; + } + ch_ptr = strtok(NULL, " \t"); + if (strcmp(ch_ptr, "missing") != 0) { + return false; + } + while (ch_ptr != NULL) { + num_cols++; + ch_ptr = strtok(NULL, " \t"); + } + num_cols--; + } + + vector<map<uint32_t, size_t>> cvt_factor_levels; + + char col_type[num_cols]; + + // Read header line2. + if (!safeGetline(infile, line).eof()) { + ch_ptr = strtok((char *)line.c_str(), " \t"); + if (strcmp(ch_ptr, "0") != 0) { + return false; + } + ch_ptr = strtok(NULL, " \t"); + if (strcmp(ch_ptr, "0") != 0) { + return false; + } + ch_ptr = strtok(NULL, " \t"); + if (strcmp(ch_ptr, "0") != 0) { + return false; + } + size_t it = 0; + ch_ptr = strtok(NULL, " \t"); + if (ch_ptr != NULL) + while (ch_ptr != NULL) { + col_type[it++] = ch_ptr[0]; + if (ch_ptr[0] == 'D') { + cvt_factor_levels.push_back(map<uint32_t, size_t>()); + num_cvt_in_file++; + } + if (ch_ptr[0] == 'C') { + num_cvt_in_file++; + } + if ((ch_ptr[0] == 'P') || (ch_ptr[0] == 'B')) { + num_p_in_file++; + } + ch_ptr = strtok(NULL, " \t"); + } + } + + while (!safeGetline(infile, line).eof()) { + + ch_ptr = strtok((char *)line.c_str(), " \t"); + + for (int it = 0; it < 3; it++) { + ch_ptr = strtok(NULL, " \t"); + } + + size_t i = 0; + size_t p_i = 0; + size_t fac_cvt_i = 0; + + while (i < num_cols) { + + if ((col_type[i] == 'P') || (col_type[i] == 'B')) { + if (mapP2c.count(p_i + 1) != 0) { + if (strcmp(ch_ptr, "NA") == 0) { + ind_pheno_row[mapP2c[p_i + 1]] = 0; + pheno_row[mapP2c[p_i + 1]] = -9; + } else { + p = atof(ch_ptr); + ind_pheno_row[mapP2c[p_i + 1]] = 1; + pheno_row[mapP2c[p_i + 1]] = p; + } + } + p_i++; + } + if (col_type[i] == 'D') { + + // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL + // IS INTEGRAL i.e for atoi error. + if (strcmp(ch_ptr, "NA") != 0) { + uint32_t level = atoi(ch_ptr); + if (cvt_factor_levels[fac_cvt_i].count(level) == 0) { + cvt_factor_levels[fac_cvt_i][level] = + cvt_factor_levels[fac_cvt_i].size(); + } + } + fac_cvt_i++; + } + + ch_ptr = strtok(NULL, " \t"); + i++; + } + + indicator_pheno.push_back(ind_pheno_row); + pheno.push_back(pheno_row); + } + + // Close and reopen the file. + infile.close(); + infile.clear(); + + if (num_cvt_in_file > 0) { + igzstream infile2(file_sample.c_str(), igzstream::in); + + if (!infile2) { + cout << "error! fail to open sample file: " << file_sample << endl; + return false; + } + + // Skip header. + safeGetline(infile2, line); + safeGetline(infile2, line); + + // Pull in the covariates now we now the number of + // factor levels. + while (!safeGetline(infile2, line).eof()) { + + vector<double> v_d; + flag_na = 0; + ch_ptr = strtok((char *)line.c_str(), " \t"); + + for (int it = 0; it < 3; it++) { + ch_ptr = strtok(NULL, " \t"); + } + + size_t i = 0; + size_t fac_cvt_i = 0; + size_t num_fac_levels; + while (i < num_cols) { + + if (col_type[i] == 'C') { + if (strcmp(ch_ptr, "NA") == 0) { + flag_na = 1; + d = -9; + } else { + d = atof(ch_ptr); + } + + v_d.push_back(d); + } + + if (col_type[i] == 'D') { + + // NOTE THIS DOES NOT CHECK TO BE SURE + // LEVEL IS INTEGRAL i.e for atoi error. + num_fac_levels = cvt_factor_levels[fac_cvt_i].size(); + if (num_fac_levels > 1) { + if (strcmp(ch_ptr, "NA") == 0) { + flag_na = 1; + for (size_t it = 0; it < num_fac_levels - 1; it++) { + v_d.push_back(-9); + } + } else { + uint32_t level = atoi(ch_ptr); + for (size_t it = 0; it < num_fac_levels - 1; it++) { + cvt_factor_levels[fac_cvt_i][level] == it + 1 + ? v_d.push_back(1.0) + : v_d.push_back(0.0); + } + } + } + fac_cvt_i++; + } + + ch_ptr = strtok(NULL, " \t"); + i++; + } + + if (flag_na == 0) { + indicator_cvt.push_back(1); + } else { + indicator_cvt.push_back(0); + } + cvt.push_back(v_d); + } + + if (indicator_cvt.empty()) { + n_cvt = 0; + } else { + flag_na = 0; + for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) { + if (indicator_cvt[i] == 0) { + continue; + } + + if (flag_na == 0) { + flag_na = 1; + n_cvt = cvt[i].size(); + } + if (flag_na != 0 && n_cvt != cvt[i].size()) { + cout << "error! number of covariates in row " << i + << " do not match other rows." << endl; + return false; + } + } + } + + infile2.close(); + infile2.clear(); + } + return true; } // WJA Added. // Read bgen file, the first time. bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, - const gsl_matrix *W, vector<int> &indicator_idv, - vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, - const double &maf_level, const double &miss_level, - const double &hwe_level, const double &r2_level, - size_t &ns_test) { - - indicator_snp.clear(); - - ifstream infile (file_bgen.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bgen file:"<<file_bgen<<endl; - return false; - } - - gsl_vector *genotype=gsl_vector_alloc (W->size1); - gsl_vector *genotype_miss=gsl_vector_alloc (W->size1); - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4); - infile.read(reinterpret_cast<char*>(&bgen_header_length),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsnps),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsamples),4); - bgen_snp_block_offset-=4; - infile.ignore(4+bgen_header_length-20); - bgen_snp_block_offset-=4+bgen_header_length-20; - infile.read(reinterpret_cast<char*>(&bgen_flags),4); - bgen_snp_block_offset-=4; - bool CompressedSNPBlocks=bgen_flags&0x1; - bool LongIds=bgen_flags&0x4; - - if(!LongIds) {return false;} - - infile.ignore(bgen_snp_block_offset); - - ns_test=0; - - size_t ns_total=static_cast<size_t>(bgen_nsnps); - - snpInfo.clear(); - string rs; - long int b_pos; - string chr; - string major; - string minor; - string id; - - double v_x, v_w; - int c_idv=0; - - double maf, geno, geno_old; - size_t n_miss; - size_t n_0, n_1, n_2; - int flag_poly; - - double bgen_geno_prob_AA, bgen_geno_prob_AB; - double bgen_geno_prob_BB, bgen_geno_prob_non_miss; - - // Total number of samples in phenotype file. - size_t ni_total=indicator_idv.size(); - - // Number of samples to use in test. - size_t ni_test=0; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - - for (size_t i=0; i<ni_total; ++i) { - ni_test+=indicator_idv[i]; - } - - for (size_t t=0; t<ns_total; ++t) { - - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char*>(&bgen_N),4); - infile.read(reinterpret_cast<char*>(&bgen_LS),2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char*>(&bgen_LR),2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char*>(&bgen_LC),2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4); - - infile.read(reinterpret_cast<char*>(&bgen_LA),4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - - infile.read(reinterpret_cast<char*>(&bgen_LB),4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - // Should we switch according to MAF? - minor=bgen_B_allele; - major=bgen_A_allele; - b_pos=static_cast<long int>(bgen_SNP_pos); - - uint16_t unzipped_data[3*bgen_N]; - - if (setSnps.size()!=0 && setSnps.count(rs)==0) { - SNPINFO sInfo={"-9", rs, -9, -9, minor, major, - static_cast<size_t>(-9), -9, (long int) -9}; - - snpInfo.push_back(sInfo); - indicator_snp.push_back(0); - if(CompressedSNPBlocks) - infile.read(reinterpret_cast<char*>(&bgen_P),4); - else - bgen_P=6*bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if(CompressedSNPBlocks) - { - infile.read(reinterpret_cast<char*>(&bgen_P),4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size=6*bgen_N; - - infile.read(reinterpret_cast<char*>(zipped_data), - bgen_P); - int result= - uncompress(reinterpret_cast<Bytef*>(unzipped_data), - reinterpret_cast<uLongf*>(&unzipped_data_size), - reinterpret_cast<Bytef*>(zipped_data), - static_cast<uLong> (bgen_P)); - assert(result == Z_OK); - - } - else - { - bgen_P=6*bgen_N; - infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P); - - } - - maf=0; n_miss=0; flag_poly=0; geno_old=-9; - n_0=0; n_1=0; n_2=0; - c_idv=0; - gsl_vector_set_zero (genotype_miss); - for (size_t i=0; i<bgen_N; ++i) { - - // CHECK this set correctly! - if (indicator_idv[i]==0) {continue;} - - bgen_geno_prob_AA= - static_cast<double>(unzipped_data[i*3])/32768.0; - bgen_geno_prob_AB= - static_cast<double>(unzipped_data[i*3+1])/32768.0; - bgen_geno_prob_BB= - static_cast<double>(unzipped_data[i*3+2])/32768.0; - bgen_geno_prob_non_miss= - bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB; - - //CHECK 0.1 OK. - if (bgen_geno_prob_non_miss<0.9) { - gsl_vector_set (genotype_miss, c_idv, 1); - n_miss++; - c_idv++; - continue; - } - - bgen_geno_prob_AA/=bgen_geno_prob_non_miss; - bgen_geno_prob_AB/=bgen_geno_prob_non_miss; - bgen_geno_prob_BB/=bgen_geno_prob_non_miss; - - geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; - if (geno>=0 && geno<=0.5) {n_0++;} - if (geno>0.5 && geno<1.5) {n_1++;} - if (geno>=1.5 && geno<=2.0) {n_2++;} - - gsl_vector_set (genotype, c_idv, geno); - - // CHECK WHAT THIS DOES. - if (flag_poly==0) {geno_old=geno; flag_poly=2;} - if (flag_poly==2 && geno!=geno_old) {flag_poly=1;} - - maf+=geno; - - c_idv++; - } - - maf/=2.0*static_cast<double>(ni_test-n_miss); - - SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss, - (double)n_miss/(double)ni_test, maf}; - snpInfo.push_back(sInfo); - - if ( (double)n_miss/(double)ni_test > miss_level) { - indicator_snp.push_back(0); - continue; - } - - if ((maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1) { - indicator_snp.push_back(0); - continue; - } - - if (flag_poly!=1) { - indicator_snp.push_back(0); - continue; - } - - if (hwe_level!=0 && maf_level!=-1) { - if (CalcHWE(n_0, n_2, n_1)<hwe_level) { - indicator_snp.push_back(0); - continue; - } - } - - // Filter SNP if it is correlated with W - // unless W has only one column, of 1s. - for (size_t i=0; i<genotype->size; ++i) { - if (gsl_vector_get (genotype_miss, i)==1) { - geno=maf*2.0; - gsl_vector_set (genotype, i, geno); - } - } - - gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); - gsl_blas_ddot (genotype, genotype, &v_x); - gsl_blas_ddot (Wtx, WtWiWtx, &v_w); - - if (W->size2!=1 && v_w/v_x >= r2_level) { - indicator_snp.push_back(0); continue;} - - indicator_snp.push_back(1); - ns_test++; - - } - - return true; + const gsl_matrix *W, vector<int> &indicator_idv, + vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, + const double &maf_level, const double &miss_level, + const double &hwe_level, const double &r2_level, + size_t &ns_test) { + + indicator_snp.clear(); + + ifstream infile(file_bgen.c_str(), ios::binary); + if (!infile) { + cout << "error reading bgen file:" << file_bgen << endl; + return false; + } + + gsl_vector *genotype = gsl_vector_alloc(W->size1); + gsl_vector *genotype_miss = gsl_vector_alloc(W->size1); + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + // Read in header. + uint32_t bgen_snp_block_offset; + uint32_t bgen_header_length; + uint32_t bgen_nsamples; + uint32_t bgen_nsnps; + uint32_t bgen_flags; + infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); + infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); + bgen_snp_block_offset -= 4; + infile.ignore(4 + bgen_header_length - 20); + bgen_snp_block_offset -= 4 + bgen_header_length - 20; + infile.read(reinterpret_cast<char *>(&bgen_flags), 4); + bgen_snp_block_offset -= 4; + bool CompressedSNPBlocks = bgen_flags & 0x1; + bool LongIds = bgen_flags & 0x4; + + if (!LongIds) { + return false; + } + + infile.ignore(bgen_snp_block_offset); + + ns_test = 0; + + size_t ns_total = static_cast<size_t>(bgen_nsnps); + + snpInfo.clear(); + string rs; + long int b_pos; + string chr; + string major; + string minor; + string id; + + double v_x, v_w; + int c_idv = 0; + + double maf, geno, geno_old; + size_t n_miss; + size_t n_0, n_1, n_2; + int flag_poly; + + double bgen_geno_prob_AA, bgen_geno_prob_AB; + double bgen_geno_prob_BB, bgen_geno_prob_non_miss; + + // Total number of samples in phenotype file. + size_t ni_total = indicator_idv.size(); + + // Number of samples to use in test. + size_t ni_test = 0; + + uint32_t bgen_N; + uint16_t bgen_LS; + uint16_t bgen_LR; + uint16_t bgen_LC; + uint32_t bgen_SNP_pos; + uint32_t bgen_LA; + std::string bgen_A_allele; + uint32_t bgen_LB; + std::string bgen_B_allele; + uint32_t bgen_P; + size_t unzipped_data_size; + + for (size_t i = 0; i < ni_total; ++i) { + ni_test += indicator_idv[i]; + } + + for (size_t t = 0; t < ns_total; ++t) { + + id.clear(); + rs.clear(); + chr.clear(); + bgen_A_allele.clear(); + bgen_B_allele.clear(); + + infile.read(reinterpret_cast<char *>(&bgen_N), 4); + infile.read(reinterpret_cast<char *>(&bgen_LS), 2); + + id.resize(bgen_LS); + infile.read(&id[0], bgen_LS); + + infile.read(reinterpret_cast<char *>(&bgen_LR), 2); + rs.resize(bgen_LR); + infile.read(&rs[0], bgen_LR); + + infile.read(reinterpret_cast<char *>(&bgen_LC), 2); + chr.resize(bgen_LC); + infile.read(&chr[0], bgen_LC); + + infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); + + infile.read(reinterpret_cast<char *>(&bgen_LA), 4); + bgen_A_allele.resize(bgen_LA); + infile.read(&bgen_A_allele[0], bgen_LA); + + infile.read(reinterpret_cast<char *>(&bgen_LB), 4); + bgen_B_allele.resize(bgen_LB); + infile.read(&bgen_B_allele[0], bgen_LB); + + // Should we switch according to MAF? + minor = bgen_B_allele; + major = bgen_A_allele; + b_pos = static_cast<long int>(bgen_SNP_pos); + + uint16_t unzipped_data[3 * bgen_N]; + + if (setSnps.size() != 0 && setSnps.count(rs) == 0) { + SNPINFO sInfo = { + "-9", rs, -9, -9, minor, major, static_cast<size_t>(-9), + -9, (long int)-9}; + + snpInfo.push_back(sInfo); + indicator_snp.push_back(0); + if (CompressedSNPBlocks) + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + else + bgen_P = 6 * bgen_N; + + infile.ignore(static_cast<size_t>(bgen_P)); + + continue; + } + + if (CompressedSNPBlocks) { + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + uint8_t zipped_data[bgen_P]; + + unzipped_data_size = 6 * bgen_N; + + infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); + int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), + reinterpret_cast<uLongf *>(&unzipped_data_size), + reinterpret_cast<Bytef *>(zipped_data), + static_cast<uLong>(bgen_P)); + assert(result == Z_OK); + + } else { + bgen_P = 6 * bgen_N; + infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); + } + + maf = 0; + n_miss = 0; + flag_poly = 0; + geno_old = -9; + n_0 = 0; + n_1 = 0; + n_2 = 0; + c_idv = 0; + gsl_vector_set_zero(genotype_miss); + for (size_t i = 0; i < bgen_N; ++i) { + + // CHECK this set correctly! + if (indicator_idv[i] == 0) { + continue; + } + + bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; + bgen_geno_prob_AB = + static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; + bgen_geno_prob_BB = + static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; + bgen_geno_prob_non_miss = + bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; + + // CHECK 0.1 OK. + if (bgen_geno_prob_non_miss < 0.9) { + gsl_vector_set(genotype_miss, c_idv, 1); + n_miss++; + c_idv++; + continue; + } + + bgen_geno_prob_AA /= bgen_geno_prob_non_miss; + bgen_geno_prob_AB /= bgen_geno_prob_non_miss; + bgen_geno_prob_BB /= bgen_geno_prob_non_miss; + + geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; + if (geno >= 0 && geno <= 0.5) { + n_0++; + } + if (geno > 0.5 && geno < 1.5) { + n_1++; + } + if (geno >= 1.5 && geno <= 2.0) { + n_2++; + } + + gsl_vector_set(genotype, c_idv, geno); + + // CHECK WHAT THIS DOES. + if (flag_poly == 0) { + geno_old = geno; + flag_poly = 2; + } + if (flag_poly == 2 && geno != geno_old) { + flag_poly = 1; + } + + maf += geno; + + c_idv++; + } + + maf /= 2.0 * static_cast<double>(ni_test - n_miss); + + SNPINFO sInfo = {chr, rs, -9, b_pos, + minor, major, n_miss, (double)n_miss / (double)ni_test, + maf}; + snpInfo.push_back(sInfo); + + if ((double)n_miss / (double)ni_test > miss_level) { + indicator_snp.push_back(0); + continue; + } + + if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) { + indicator_snp.push_back(0); + continue; + } + + if (flag_poly != 1) { + indicator_snp.push_back(0); + continue; + } + + if (hwe_level != 0 && maf_level != -1) { + if (CalcHWE(n_0, n_2, n_1) < hwe_level) { + indicator_snp.push_back(0); + continue; + } + } + + // Filter SNP if it is correlated with W + // unless W has only one column, of 1s. + for (size_t i = 0; i < genotype->size; ++i) { + if (gsl_vector_get(genotype_miss, i) == 1) { + geno = maf * 2.0; + gsl_vector_set(genotype, i, geno); + } + } + + gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx); + gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); + gsl_blas_ddot(genotype, genotype, &v_x); + gsl_blas_ddot(Wtx, WtWiWtx, &v_w); + + if (W->size2 != 1 && v_w / v_x >= r2_level) { + indicator_snp.push_back(0); + continue; + } + + indicator_snp.push_back(1); + ns_test++; + } + + return true; } // Read oxford genotype file and calculate kinship matrix. -bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin) { - string file_bgen=file_oxford; - ifstream infile (file_bgen.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bgen file:"<<file_bgen<<endl; - return false; - } - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4); - infile.read(reinterpret_cast<char*>(&bgen_header_length),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsnps),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsamples),4); - bgen_snp_block_offset-=4; - infile.ignore(4+bgen_header_length-20); - bgen_snp_block_offset-=4+bgen_header_length-20; - infile.read(reinterpret_cast<char*>(&bgen_flags),4); - bgen_snp_block_offset-=4; - bool CompressedSNPBlocks=bgen_flags&0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB; - double bgen_geno_prob_BB, bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - double genotype; - - size_t n_miss; - double d, geno_mean, geno_var; - - size_t ni_total=matrix_kin->size1; - gsl_vector *geno=gsl_vector_alloc (ni_total); - gsl_vector *geno_miss=gsl_vector_alloc (ni_total); - - size_t ns_test=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - - if (t%display_pace==0 || t==(indicator_snp.size()-1)) { - ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1); - } - - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char*>(&bgen_N),4); - infile.read(reinterpret_cast<char*>(&bgen_LS),2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char*>(&bgen_LR),2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char*>(&bgen_LC),2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4); - - infile.read(reinterpret_cast<char*>(&bgen_LA),4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - - infile.read(reinterpret_cast<char*>(&bgen_LB),4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3*bgen_N]; - - if (indicator_snp[t]==0) { - if(CompressedSNPBlocks) - infile.read(reinterpret_cast<char*>(&bgen_P),4); - else - bgen_P=6*bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if(CompressedSNPBlocks) - { - infile.read(reinterpret_cast<char*>(&bgen_P),4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size=6*bgen_N; - - infile.read(reinterpret_cast<char*>(zipped_data),bgen_P); - - int result= - uncompress(reinterpret_cast<Bytef*>(unzipped_data), - reinterpret_cast<uLongf*>(&unzipped_data_size), - reinterpret_cast<Bytef*>(zipped_data), - static_cast<uLong> (bgen_P)); - assert(result == Z_OK); - - } - else - { - - bgen_P=6*bgen_N; - infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P); - } - - geno_mean=0.0; n_miss=0; geno_var=0.0; - gsl_vector_set_all(geno_miss, 0); - - for (size_t i=0; i<bgen_N; ++i) { +bool bgenKin(const string &file_oxford, vector<int> &indicator_snp, + const int k_mode, const int display_pace, gsl_matrix *matrix_kin) { + string file_bgen = file_oxford; + ifstream infile(file_bgen.c_str(), ios::binary); + if (!infile) { + cout << "error reading bgen file:" << file_bgen << endl; + return false; + } + + // Read in header. + uint32_t bgen_snp_block_offset; + uint32_t bgen_header_length; + uint32_t bgen_nsamples; + uint32_t bgen_nsnps; + uint32_t bgen_flags; + infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); + infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); + bgen_snp_block_offset -= 4; + infile.ignore(4 + bgen_header_length - 20); + bgen_snp_block_offset -= 4 + bgen_header_length - 20; + infile.read(reinterpret_cast<char *>(&bgen_flags), 4); + bgen_snp_block_offset -= 4; + bool CompressedSNPBlocks = bgen_flags & 0x1; + + infile.ignore(bgen_snp_block_offset); + + double bgen_geno_prob_AA, bgen_geno_prob_AB; + double bgen_geno_prob_BB, bgen_geno_prob_non_miss; + + uint32_t bgen_N; + uint16_t bgen_LS; + uint16_t bgen_LR; + uint16_t bgen_LC; + uint32_t bgen_SNP_pos; + uint32_t bgen_LA; + std::string bgen_A_allele; + uint32_t bgen_LB; + std::string bgen_B_allele; + uint32_t bgen_P; + size_t unzipped_data_size; + string id; + string rs; + string chr; + double genotype; + + size_t n_miss; + double d, geno_mean, geno_var; + + size_t ni_total = matrix_kin->size1; + gsl_vector *geno = gsl_vector_alloc(ni_total); + gsl_vector *geno_miss = gsl_vector_alloc(ni_total); + + size_t ns_test = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + + id.clear(); + rs.clear(); + chr.clear(); + bgen_A_allele.clear(); + bgen_B_allele.clear(); + + infile.read(reinterpret_cast<char *>(&bgen_N), 4); + infile.read(reinterpret_cast<char *>(&bgen_LS), 2); + + id.resize(bgen_LS); + infile.read(&id[0], bgen_LS); + + infile.read(reinterpret_cast<char *>(&bgen_LR), 2); + rs.resize(bgen_LR); + infile.read(&rs[0], bgen_LR); + + infile.read(reinterpret_cast<char *>(&bgen_LC), 2); + chr.resize(bgen_LC); + infile.read(&chr[0], bgen_LC); + + infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); + + infile.read(reinterpret_cast<char *>(&bgen_LA), 4); + bgen_A_allele.resize(bgen_LA); + infile.read(&bgen_A_allele[0], bgen_LA); + + infile.read(reinterpret_cast<char *>(&bgen_LB), 4); + bgen_B_allele.resize(bgen_LB); + infile.read(&bgen_B_allele[0], bgen_LB); + + uint16_t unzipped_data[3 * bgen_N]; + + if (indicator_snp[t] == 0) { + if (CompressedSNPBlocks) + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + else + bgen_P = 6 * bgen_N; + + infile.ignore(static_cast<size_t>(bgen_P)); + + continue; + } + if (CompressedSNPBlocks) { + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + uint8_t zipped_data[bgen_P]; - bgen_geno_prob_AA= - static_cast<double>(unzipped_data[i*3])/32768.0; - bgen_geno_prob_AB= - static_cast<double>(unzipped_data[i*3+1])/32768.0; - bgen_geno_prob_BB= - static_cast<double>(unzipped_data[i*3+2])/32768.0; - // WJA - bgen_geno_prob_non_miss=bgen_geno_prob_AA + - bgen_geno_prob_AB+bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss<0.9) { - gsl_vector_set(geno_miss, i, 0.0); - n_miss++; - } - else { + unzipped_data_size = 6 * bgen_N; - bgen_geno_prob_AA/=bgen_geno_prob_non_miss; - bgen_geno_prob_AB/=bgen_geno_prob_non_miss; - bgen_geno_prob_BB/=bgen_geno_prob_non_miss; + infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); - genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; + int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), + reinterpret_cast<uLongf *>(&unzipped_data_size), + reinterpret_cast<Bytef *>(zipped_data), + static_cast<uLong>(bgen_P)); + assert(result == Z_OK); - gsl_vector_set(geno, i, genotype); - gsl_vector_set(geno_miss, i, 1.0); - geno_mean+=genotype; - geno_var+=genotype*genotype; - } + } else { - } + bgen_P = 6 * bgen_N; + infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); + } + geno_mean = 0.0; + n_miss = 0; + geno_var = 0.0; + gsl_vector_set_all(geno_miss, 0); + + for (size_t i = 0; i < bgen_N; ++i) { + + bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; + bgen_geno_prob_AB = + static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; + bgen_geno_prob_BB = + static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; + // WJA + bgen_geno_prob_non_miss = + bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; + if (bgen_geno_prob_non_miss < 0.9) { + gsl_vector_set(geno_miss, i, 0.0); + n_miss++; + } else { - geno_mean/=(double)(ni_total-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_total; - geno_var-=geno_mean*geno_mean; + bgen_geno_prob_AA /= bgen_geno_prob_non_miss; + bgen_geno_prob_AB /= bgen_geno_prob_non_miss; + bgen_geno_prob_BB /= bgen_geno_prob_non_miss; - for (size_t i=0; i<ni_total; ++i) { - if (gsl_vector_get (geno_miss, i)==0) { - gsl_vector_set(geno, i, geno_mean); - } - } + genotype = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; - gsl_vector_add_constant (geno, -1.0*geno_mean); + gsl_vector_set(geno, i, genotype); + gsl_vector_set(geno_miss, i, 1.0); + geno_mean += genotype; + geno_var += genotype * genotype; + } + } + + geno_mean /= (double)(ni_total - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_total; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_total; ++i) { + if (gsl_vector_get(geno_miss, i) == 0) { + gsl_vector_set(geno, i, geno_mean); + } + } - if (geno_var!=0) { - if (k_mode==1) { - gsl_blas_dsyr(CblasUpper,1.0,geno,matrix_kin); - } else if (k_mode==2) { - gsl_blas_dsyr(CblasUpper,1.0/geno_var,geno,matrix_kin); - } - else { - cout<<"Unknown kinship mode."<<endl; - } - } + gsl_vector_add_constant(geno, -1.0 * geno_mean); - ns_test++; + if (geno_var != 0) { + if (k_mode == 1) { + gsl_blas_dsyr(CblasUpper, 1.0, geno, matrix_kin); + } else if (k_mode == 2) { + gsl_blas_dsyr(CblasUpper, 1.0 / geno_var, geno, matrix_kin); + } else { + cout << "Unknown kinship mode." << endl; + } } - cout<<endl; - gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test); + ns_test++; + } + cout << endl; - for (size_t i=0; i<ni_total; ++i) { - for (size_t j=0; j<i; ++j) { - d=gsl_matrix_get (matrix_kin, j, i); - gsl_matrix_set (matrix_kin, i, j, d); - } - } + gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test); - gsl_vector_free (geno); - gsl_vector_free (geno_miss); + for (size_t i = 0; i < ni_total; ++i) { + for (size_t j = 0; j < i; ++j) { + d = gsl_matrix_get(matrix_kin, j, i); + gsl_matrix_set(matrix_kin, i, j, d); + } + } - infile.close(); - infile.clear(); + gsl_vector_free(geno); + gsl_vector_free(geno_miss); - return true; + infile.close(); + infile.clear(); + + return true; } // Read header to determine which column contains which item. -bool ReadHeader_io (const string &line, HEADER &header) -{ - string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID", - "rsid","RSID","MarkerName"}; - set<string> rs_set(rs_ptr, rs_ptr+11); - string chr_ptr[]={"chr","CHR"}; - set<string> chr_set(chr_ptr, chr_ptr+2); - string pos_ptr[]={"ps","PS","pos","POS","base_position", - "BASE_POSITION", "bp", "BP"}; - set<string> pos_set(pos_ptr, pos_ptr+8); - string cm_ptr[]={"cm","CM"}; - set<string> cm_set(cm_ptr, cm_ptr+2); - string a1_ptr[]={"a1","A1","allele1","ALLELE1","Allele1","INC_ALLELE"}; - set<string> a1_set(a1_ptr, a1_ptr+5); - string a0_ptr[]={"a0","A0","allele0","ALLELE0","Allele0","a2","A2", - "allele2","ALLELE2","Allele2","DEC_ALLELE"}; - set<string> a0_set(a0_ptr, a0_ptr+10); - - string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"}; - set<string> z_set(z_ptr, z_ptr+6); - string beta_ptr[]={"beta","BETA","b","B"}; - set<string> beta_set(beta_ptr, beta_ptr+4); - string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"}; - set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4); - string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"}; - set<string> chisq_set(chisq_ptr, chisq_ptr+4); - string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"}; - set<string> p_set(p_ptr, p_ptr+6); - - string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"}; - set<string> n_set(n_ptr, n_ptr+6); - string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"}; - set<string> nmis_set(nmis_ptr, nmis_ptr+6); - string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"}; - set<string> nobs_set(nobs_ptr, nobs_ptr+4); - string ncase_ptr[]={"ncase","NCASE","n_case","N_CASE"}; - set<string> ncase_set(ncase_ptr, ncase_ptr+4); - string ncontrol_ptr[]={"ncontrol","NCONTROL","n_control","N_CONTROL"}; - set<string> ncontrol_set(ncontrol_ptr, ncontrol_ptr+4); - - string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq", - "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY", - "Freq.Allele1.HapMapCEU","FreqAllele1HapMapCEU", - "Freq1.Hapmap"}; - set<string> af_set(af_ptr, af_ptr+13); - string var_ptr[]={"var","VAR"}; - set<string> var_set(var_ptr, var_ptr+2); - - string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"}; - set<string> ws_set(ws_ptr, ws_ptr+4); - string cor_ptr[]={"cor","COR","r","R"}; - set<string> cor_set(cor_ptr, cor_ptr+4); - - header.rs_col=0; header.chr_col=0; header.pos_col=0; - header.cm_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; - header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; - header.p_col=0; header.n_col=0; header.nmis_col=0; - header.nobs_col=0; header.ncase_col=0; header.ncontrol_col=0; - header.af_col=0; header.var_col=0; header.ws_col=0; - header.cor_col=0; header.coln=0; +bool ReadHeader_io(const string &line, HEADER &header) { + string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps", "SNPS", + "snpid", "SNPID", "rsid", "RSID", "MarkerName"}; + set<string> rs_set(rs_ptr, rs_ptr + 11); + string chr_ptr[] = {"chr", "CHR"}; + set<string> chr_set(chr_ptr, chr_ptr + 2); + string pos_ptr[] = { + "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"}; + set<string> pos_set(pos_ptr, pos_ptr + 8); + string cm_ptr[] = {"cm", "CM"}; + set<string> cm_set(cm_ptr, cm_ptr + 2); + string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1", "Allele1", "INC_ALLELE"}; + set<string> a1_set(a1_ptr, a1_ptr + 5); + string a0_ptr[] = {"a0", "A0", "allele0", "ALLELE0", "Allele0", "a2", + "A2", "allele2", "ALLELE2", "Allele2", "DEC_ALLELE"}; + set<string> a0_set(a0_ptr, a0_ptr + 10); + + string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"}; + set<string> z_set(z_ptr, z_ptr + 6); + string beta_ptr[] = {"beta", "BETA", "b", "B"}; + set<string> beta_set(beta_ptr, beta_ptr + 4); + string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"}; + set<string> sebeta_set(sebeta_ptr, sebeta_ptr + 4); + string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"}; + set<string> chisq_set(chisq_ptr, chisq_ptr + 4); + string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"}; + set<string> p_set(p_ptr, p_ptr + 6); + + string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"}; + set<string> n_set(n_ptr, n_ptr + 6); + string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"}; + set<string> nmis_set(nmis_ptr, nmis_ptr + 6); + string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"}; + set<string> nobs_set(nobs_ptr, nobs_ptr + 4); + string ncase_ptr[] = {"ncase", "NCASE", "n_case", "N_CASE"}; + set<string> ncase_set(ncase_ptr, ncase_ptr + 4); + string ncontrol_ptr[] = {"ncontrol", "NCONTROL", "n_control", "N_CONTROL"}; + set<string> ncontrol_set(ncontrol_ptr, ncontrol_ptr + 4); + + string af_ptr[] = {"af", + "AF", + "maf", + "MAF", + "f", + "F", + "allele_freq", + "ALLELE_FREQ", + "allele_frequency", + "ALLELE_FREQUENCY", + "Freq.Allele1.HapMapCEU", + "FreqAllele1HapMapCEU", + "Freq1.Hapmap"}; + set<string> af_set(af_ptr, af_ptr + 13); + string var_ptr[] = {"var", "VAR"}; + set<string> var_set(var_ptr, var_ptr + 2); + + string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"}; + set<string> ws_set(ws_ptr, ws_ptr + 4); + string cor_ptr[] = {"cor", "COR", "r", "R"}; + set<string> cor_set(cor_ptr, cor_ptr + 4); + + header.rs_col = 0; + header.chr_col = 0; + header.pos_col = 0; + header.cm_col = 0; + header.a1_col = 0; + header.a0_col = 0; + header.z_col = 0; + header.beta_col = 0; + header.sebeta_col = 0; + header.chisq_col = 0; + header.p_col = 0; + header.n_col = 0; + header.nmis_col = 0; + header.nobs_col = 0; + header.ncase_col = 0; + header.ncontrol_col = 0; + header.af_col = 0; + header.var_col = 0; + header.ws_col = 0; + header.cor_col = 0; + header.coln = 0; char *ch_ptr; string type; - size_t n_error=0; - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - while (ch_ptr!=NULL) { - type=ch_ptr; - if (rs_set.count(type)!=0) { - if (header.rs_col==0) { - header.rs_col=header.coln+1; + size_t n_error = 0; + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + while (ch_ptr != NULL) { + type = ch_ptr; + if (rs_set.count(type) != 0) { + if (header.rs_col == 0) { + header.rs_col = header.coln + 1; } else { - cout<<"error! more than two rs columns in the file."<<endl; - n_error++; + cout << "error! more than two rs columns in the file." << endl; + n_error++; } - } else if (chr_set.count(type)!=0) { - if (header.chr_col==0) { - header.chr_col=header.coln+1; + } else if (chr_set.count(type) != 0) { + if (header.chr_col == 0) { + header.chr_col = header.coln + 1; } else { - cout<<"error! more than two chr columns in the file."<<endl; - n_error++; + cout << "error! more than two chr columns in the file." << endl; + n_error++; } - } else if (pos_set.count(type)!=0) { - if (header.pos_col==0) { - header.pos_col=header.coln+1; + } else if (pos_set.count(type) != 0) { + if (header.pos_col == 0) { + header.pos_col = header.coln + 1; } else { - cout<<"error! more than two pos columns in the file."<<endl; - n_error++; + cout << "error! more than two pos columns in the file." << endl; + n_error++; } - } else if (cm_set.count(type)!=0) { - if (header.cm_col==0) { - header.cm_col=header.coln+1; + } else if (cm_set.count(type) != 0) { + if (header.cm_col == 0) { + header.cm_col = header.coln + 1; } else { - cout<<"error! more than two cm columns in the file."<<endl; - n_error++; + cout << "error! more than two cm columns in the file." << endl; + n_error++; } - } else if (a1_set.count(type)!=0) { - if (header.a1_col==0) { - header.a1_col=header.coln+1; + } else if (a1_set.count(type) != 0) { + if (header.a1_col == 0) { + header.a1_col = header.coln + 1; } else { - cout<<"error! more than two allele1 columns in the file."<<endl; - n_error++; + cout << "error! more than two allele1 columns in the file." << endl; + n_error++; } - } else if (a0_set.count(type)!=0) { - if (header.a0_col==0) { - header.a0_col=header.coln+1; + } else if (a0_set.count(type) != 0) { + if (header.a0_col == 0) { + header.a0_col = header.coln + 1; } else { - cout<<"error! more than two allele0 columns in the file."<<endl; - n_error++; + cout << "error! more than two allele0 columns in the file." << endl; + n_error++; } - } else if (z_set.count(type)!=0) { - if (header.z_col==0) { - header.z_col=header.coln+1; + } else if (z_set.count(type) != 0) { + if (header.z_col == 0) { + header.z_col = header.coln + 1; } else { - cout<<"error! more than two z columns in the file."<<endl; - n_error++; + cout << "error! more than two z columns in the file." << endl; + n_error++; } - } else if (beta_set.count(type)!=0) { - if (header.beta_col==0) { - header.beta_col=header.coln+1; + } else if (beta_set.count(type) != 0) { + if (header.beta_col == 0) { + header.beta_col = header.coln + 1; } else { - cout<<"error! more than two beta columns in the file."<<endl; - n_error++; + cout << "error! more than two beta columns in the file." << endl; + n_error++; } - } else if (sebeta_set.count(type)!=0) { - if (header.sebeta_col==0) { - header.sebeta_col=header.coln+1; + } else if (sebeta_set.count(type) != 0) { + if (header.sebeta_col == 0) { + header.sebeta_col = header.coln + 1; } else { - cout<<"error! more than two se_beta columns in the file."<<endl; - n_error++; + cout << "error! more than two se_beta columns in the file." << endl; + n_error++; } - } else if (chisq_set.count(type)!=0) { - if (header.chisq_col==0) { - header.chisq_col=header.coln+1; + } else if (chisq_set.count(type) != 0) { + if (header.chisq_col == 0) { + header.chisq_col = header.coln + 1; } else { - cout<<"error! more than two z columns in the file."<<endl; - n_error++; + cout << "error! more than two z columns in the file." << endl; + n_error++; } - } else if (p_set.count(type)!=0) { - if (header.p_col==0) { - header.p_col=header.coln+1; + } else if (p_set.count(type) != 0) { + if (header.p_col == 0) { + header.p_col = header.coln + 1; } else { - cout<<"error! more than two p columns in the file."<<endl; - n_error++; + cout << "error! more than two p columns in the file." << endl; + n_error++; } - } else if (n_set.count(type)!=0) { - if (header.n_col==0) { - header.n_col=header.coln+1; + } else if (n_set.count(type) != 0) { + if (header.n_col == 0) { + header.n_col = header.coln + 1; } else { - cout<<"error! more than two n_total columns in the file."<<endl; - n_error++;} - } else if (nmis_set.count(type)!=0) { - if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else { - cout<<"error! more than two n_mis columns in the file."<<endl; - n_error++; - } - } else if (nobs_set.count(type)!=0) { - if (header.nobs_col==0) { - header.nobs_col=header.coln+1; + cout << "error! more than two n_total columns in the file." << endl; + n_error++; + } + } else if (nmis_set.count(type) != 0) { + if (header.nmis_col == 0) { + header.nmis_col = header.coln + 1; + } else { + cout << "error! more than two n_mis columns in the file." << endl; + n_error++; + } + } else if (nobs_set.count(type) != 0) { + if (header.nobs_col == 0) { + header.nobs_col = header.coln + 1; } else { - cout<<"error! more than two n_obs columns in the file."<<endl; - n_error++; + cout << "error! more than two n_obs columns in the file." << endl; + n_error++; } - } else if (ncase_set.count(type)!=0) { - if (header.ncase_col==0) { - header.ncase_col=header.coln+1; + } else if (ncase_set.count(type) != 0) { + if (header.ncase_col == 0) { + header.ncase_col = header.coln + 1; } else { - cout<<"error! more than two n_case columns in the file."<<endl; - n_error++; + cout << "error! more than two n_case columns in the file." << endl; + n_error++; } - } else if (ncontrol_set.count(type)!=0) { - if (header.ncontrol_col==0) { - header.ncontrol_col=header.coln+1; + } else if (ncontrol_set.count(type) != 0) { + if (header.ncontrol_col == 0) { + header.ncontrol_col = header.coln + 1; } else { - cout<<"error! more than two n_control columns in the file."<<endl; - n_error++; + cout << "error! more than two n_control columns in the file." << endl; + n_error++; } - } else if (ws_set.count(type)!=0) { - if (header.ws_col==0) { - header.ws_col=header.coln+1; + } else if (ws_set.count(type) != 0) { + if (header.ws_col == 0) { + header.ws_col = header.coln + 1; } else { - cout<<"error! more than two window_size columns in the file."<<endl; - n_error++; + cout << "error! more than two window_size columns in the file." << endl; + n_error++; } - } else if (af_set.count(type)!=0) { - if (header.af_col==0) { - header.af_col=header.coln+1; + } else if (af_set.count(type) != 0) { + if (header.af_col == 0) { + header.af_col = header.coln + 1; } else { - cout<<"error! more than two af columns in the file."<<endl; - n_error++; + cout << "error! more than two af columns in the file." << endl; + n_error++; } - } else if (cor_set.count(type)!=0) { - if (header.cor_col==0) { - header.cor_col=header.coln+1; + } else if (cor_set.count(type) != 0) { + if (header.cor_col == 0) { + header.cor_col = header.coln + 1; } else { - cout<<"error! more than two cor columns in the file."<<endl; - n_error++; + cout << "error! more than two cor columns in the file." << endl; + n_error++; } } else { string str = ch_ptr; - string cat = str.substr(str.size()-2, 2); + string cat = str.substr(str.size() - 2, 2); - if(cat == "_c" || cat =="_C"){ + if (cat == "_c" || cat == "_C") { // continuous - header.catc_col.insert(header.coln+1); + header.catc_col.insert(header.coln + 1); } else { - // discrete - header.catd_col.insert(header.coln+1); + // discrete + header.catd_col.insert(header.coln + 1); } } - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); header.coln++; } - if (header.cor_col!=0 && header.cor_col!=header.coln) { - cout<<"error! the cor column should be the last column."<<endl; + if (header.cor_col != 0 && header.cor_col != header.coln) { + cout << "error! the cor column should be the last column." << endl; n_error++; } - if (header.rs_col==0) { - if (header.chr_col!=0 && header.pos_col!=0) { - cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl; + if (header.rs_col == 0) { + if (header.chr_col != 0 && header.pos_col != 0) { + cout << "missing an rs column. rs id will be replaced by chr:pos" << endl; } else { - cout<<"error! missing an rs column."<<endl; n_error++; + cout << "error! missing an rs column." << endl; + n_error++; } } - if (n_error==0) { + if (n_error == 0) { return true; } else { return false; @@ -3026,13 +3210,13 @@ bool ReadHeader_io (const string &line, HEADER &header) // Read category file, record mapRS2 in the category file does not // contain a null category so if a snp has 0 for all categories, then // it is not included in the analysis. -bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, - size_t &n_vc) { +bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat, + size_t &n_vc) { mapRS2cat.clear(); - igzstream infile (file_cat.c_str(), igzstream::in); + igzstream infile(file_cat.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open category file: "<<file_cat<<endl; + cout << "error! fail to open category file: " << file_cat << endl; return false; } @@ -3045,47 +3229,62 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, // Read header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_io (line, header); + ReadHeader_io(line, header); // Use the header to count the number of categories. - n_vc=header.coln; - if (header.rs_col!=0) {n_vc--;} - if (header.chr_col!=0) {n_vc--;} - if (header.pos_col!=0) {n_vc--;} - if (header.cm_col!=0) {n_vc--;} - if (header.a1_col!=0) {n_vc--;} - if (header.a0_col!=0) {n_vc--;} + n_vc = header.coln; + if (header.rs_col != 0) { + n_vc--; + } + if (header.chr_col != 0) { + n_vc--; + } + if (header.pos_col != 0) { + n_vc--; + } + if (header.cm_col != 0) { + n_vc--; + } + if (header.a1_col != 0) { + n_vc--; + } + if (header.a0_col != 0) { + n_vc--; + } // Read the following lines to record mapRS2cat. while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - - i_cat=0; - for (size_t i=0; i<header.coln; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) { - rs=ch_ptr; - } else if (header.chr_col!=0 && header.chr_col==i+1) { - chr=ch_ptr; - } else if (header.pos_col!=0 && header.pos_col==i+1) { - pos=ch_ptr; - } else if (header.cm_col!=0 && header.cm_col==i+1) { - cm=ch_ptr; - } else if (header.a1_col!=0 && header.a1_col==i+1) { - a1=ch_ptr; - } else if (header.a0_col!=0 && header.a0_col==i+1) { - a0=ch_ptr; - } else if (atoi(ch_ptr)==1 || atoi(ch_ptr)==0) { - if (i_cat==0) { - if (header.rs_col==0) { - rs=chr+":"+pos; - } - } - - if (atoi(ch_ptr)==1 && mapRS2cat.count(rs)==0) {mapRS2cat[rs]=i_cat;} - i_cat++; - } else {} - - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok((char *)line.c_str(), " , \t"); + + i_cat = 0; + for (size_t i = 0; i < header.coln; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } else if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } else if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + } else if (header.cm_col != 0 && header.cm_col == i + 1) { + cm = ch_ptr; + } else if (header.a1_col != 0 && header.a1_col == i + 1) { + a1 = ch_ptr; + } else if (header.a0_col != 0 && header.a0_col == i + 1) { + a0 = ch_ptr; + } else if (atoi(ch_ptr) == 1 || atoi(ch_ptr) == 0) { + if (i_cat == 0) { + if (header.rs_col == 0) { + rs = chr + ":" + pos; + } + } + + if (atoi(ch_ptr) == 1 && mapRS2cat.count(rs) == 0) { + mapRS2cat[rs] = i_cat; + } + i_cat++; + } else { + } + + ch_ptr = strtok(NULL, " , \t"); } } @@ -3095,25 +3294,29 @@ bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, return true; } -bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, - size_t &n_vc) { +bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat, + size_t &n_vc) { mapRS2cat.clear(); - igzstream infile (file_mcat.c_str(), igzstream::in); + igzstream infile(file_mcat.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open mcategory file: "<<file_mcat<<endl; + cout << "error! fail to open mcategory file: " << file_mcat << endl; return false; } string file_name; map<string, size_t> mapRS2cat_tmp; - size_t n_vc_tmp, t=0; + size_t n_vc_tmp, t = 0; while (!safeGetline(infile, file_name).eof()) { mapRS2cat_tmp.clear(); - ReadFile_cat (file_name, mapRS2cat_tmp, n_vc_tmp); + ReadFile_cat(file_name, mapRS2cat_tmp, n_vc_tmp); mapRS2cat.insert(mapRS2cat_tmp.begin(), mapRS2cat_tmp.end()); - if (t==0) {n_vc=n_vc_tmp;} else {n_vc=max(n_vc, n_vc_tmp);} + if (t == 0) { + n_vc = n_vc_tmp; + } else { + n_vc = max(n_vc, n_vc_tmp); + } t++; } @@ -3123,475 +3326,490 @@ bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, // Read bimbam mean genotype file and calculate kinship matrix; this // time, the kinship matrix is not centered, and can contain multiple // K matrix. -bool BimbamKin (const string &file_geno, const int display_pace, - const vector<int> &indicator_idv, - const vector<int> &indicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<SNPINFO> &snpInfo, - const gsl_matrix *W, gsl_matrix *matrix_kin, - gsl_vector *vector_ns) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return false; - } - - string line; - char *ch_ptr; - - size_t n_miss; - double d, geno_mean, geno_var; - - size_t ni_test=matrix_kin->size1; - gsl_vector *geno=gsl_vector_alloc (ni_test); - gsl_vector *geno_miss=gsl_vector_alloc (ni_test); - - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - size_t n_vc=matrix_kin->size2/ni_test, i_vc; - string rs; - vector<size_t> ns_vec; - for (size_t i=0; i<n_vc; i++) { - ns_vec.push_back(0); - } - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc); - gsl_matrix_set_zero(Xlarge); - - size_t ns_test=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); - if (t%display_pace==0 || t==(indicator_snp.size()-1)) { - ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - rs=snpInfo[t].rs_number; // This line is new. - - geno_mean=0.0; n_miss=0; geno_var=0.0; - gsl_vector_set_all(geno_miss, 0); - - size_t j=0; - for (size_t i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0) {continue;} - ch_ptr=strtok (NULL, " , \t"); - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(geno_miss, i, 0); n_miss++; - } - else { - d=atof(ch_ptr); - gsl_vector_set (geno, j, d); - gsl_vector_set (geno_miss, j, 1); - geno_mean+=d; - geno_var+=d*d; - } - j++; - } - - geno_mean/=(double)(ni_test-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_test; - geno_var-=geno_mean*geno_mean; - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (geno_miss, i)==0) { - gsl_vector_set(geno, i, geno_mean); - } - } - - gsl_vector_add_constant (geno, -1.0*geno_mean); - - gsl_blas_dgemv (CblasTrans, 1.0, W, geno, 0.0, Wtx); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); - gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno); - gsl_blas_ddot (geno, geno, &geno_var); - geno_var/=(double)ni_test; - - if (geno_var!=0 && (mapRS2weight.size()==0 || - mapRS2weight.count(rs)!=0)) { - if (mapRS2weight.size()==0) { - d=1.0/geno_var; - } else { - d=mapRS2weight.at(rs)/geno_var; - } - - gsl_vector_scale (geno, sqrt(d)); - if (n_vc==1 || mapRS2cat.size()==0 ) { - gsl_vector_view Xlarge_col= - gsl_matrix_column(Xlarge,ns_vec[0]%msize); - gsl_vector_memcpy (&Xlarge_col.vector, geno); - ns_vec[0]++; - - if (ns_vec[0]%msize==0) { - eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin); - gsl_matrix_set_zero(Xlarge); - } - } else if (mapRS2cat.count(rs)!=0) { - i_vc=mapRS2cat.at(rs); - - gsl_vector_view Xlarge_col= - gsl_matrix_column(Xlarge,msize*i_vc+ns_vec[i_vc]%msize); - gsl_vector_memcpy (&Xlarge_col.vector, geno); - - ns_vec[i_vc]++; - - if (ns_vec[i_vc]%msize==0) { - gsl_matrix_view X_sub= - gsl_matrix_submatrix(Xlarge,0,msize*i_vc, - ni_test,msize); - gsl_matrix_view kin_sub= - gsl_matrix_submatrix(matrix_kin,0,ni_test*i_vc, - ni_test,ni_test); - eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, - &X_sub.matrix, 1.0, &kin_sub.matrix); - - gsl_matrix_set_zero(&X_sub.matrix); - } - } - - } - ns_test++; - - } - - for (size_t i_vc=0; i_vc<n_vc; i_vc++) { - if (ns_vec[i_vc]%msize!=0) { - gsl_matrix_view X_sub= - gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize); - gsl_matrix_view kin_sub= - gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, - ni_test); - eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, - 1.0, &kin_sub.matrix); - } - } - - cout<<endl; - - for (size_t t=0; t<n_vc; t++) { - gsl_vector_set(vector_ns, t, ns_vec[t]); - - for (size_t i=0; i<ni_test; ++i) { - for (size_t j=0; j<=i; ++j) { - d=gsl_matrix_get (matrix_kin, j, i+ni_test*t); - d/=(double)ns_vec[t]; - gsl_matrix_set (matrix_kin, i, j+ni_test*t, d); - gsl_matrix_set (matrix_kin, j, i+ni_test*t, d); - } - } - } - - gsl_vector_free (geno); - gsl_vector_free (geno_miss); - - gsl_vector_free (Wtx); - gsl_matrix_free (WtW); - gsl_matrix_free (WtWi); - gsl_vector_free (WtWiWtx); - gsl_permutation_free (pmt); - - gsl_matrix_free (Xlarge); - - infile.close(); - infile.clear(); - - return true; +bool BimbamKin(const string &file_geno, const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + string line; + char *ch_ptr; + + size_t n_miss; + double d, geno_mean, geno_var; + + size_t ni_test = matrix_kin->size1; + gsl_vector *geno = gsl_vector_alloc(ni_test); + gsl_vector *geno_miss = gsl_vector_alloc(ni_test); + + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + size_t n_vc = matrix_kin->size2 / ni_test, i_vc; + string rs; + vector<size_t> ns_vec; + for (size_t i = 0; i < n_vc; i++) { + ns_vec.push_back(0); + } + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc); + gsl_matrix_set_zero(Xlarge); + + size_t ns_test = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + rs = snpInfo[t].rs_number; // This line is new. + + geno_mean = 0.0; + n_miss = 0; + geno_var = 0.0; + gsl_vector_set_all(geno_miss, 0); + + size_t j = 0; + for (size_t i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ch_ptr = strtok(NULL, " , \t"); + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(geno_miss, i, 0); + n_miss++; + } else { + d = atof(ch_ptr); + gsl_vector_set(geno, j, d); + gsl_vector_set(geno_miss, j, 1); + geno_mean += d; + geno_var += d * d; + } + j++; + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(geno_miss, i) == 0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + gsl_blas_dgemv(CblasTrans, 1.0, W, geno, 0.0, Wtx); + gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); + gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno); + gsl_blas_ddot(geno, geno, &geno_var); + geno_var /= (double)ni_test; + + if (geno_var != 0 && + (mapRS2weight.size() == 0 || mapRS2weight.count(rs) != 0)) { + if (mapRS2weight.size() == 0) { + d = 1.0 / geno_var; + } else { + d = mapRS2weight.at(rs) / geno_var; + } + + gsl_vector_scale(geno, sqrt(d)); + if (n_vc == 1 || mapRS2cat.size() == 0) { + gsl_vector_view Xlarge_col = + gsl_matrix_column(Xlarge, ns_vec[0] % msize); + gsl_vector_memcpy(&Xlarge_col.vector, geno); + ns_vec[0]++; + + if (ns_vec[0] % msize == 0) { + eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); + gsl_matrix_set_zero(Xlarge); + } + } else if (mapRS2cat.count(rs) != 0) { + i_vc = mapRS2cat.at(rs); + + gsl_vector_view Xlarge_col = + gsl_matrix_column(Xlarge, msize * i_vc + ns_vec[i_vc] % msize); + gsl_vector_memcpy(&Xlarge_col.vector, geno); + + ns_vec[i_vc]++; + + if (ns_vec[i_vc] % msize == 0) { + gsl_matrix_view X_sub = + gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize); + gsl_matrix_view kin_sub = gsl_matrix_submatrix( + matrix_kin, 0, ni_test * i_vc, ni_test, ni_test); + eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, + &kin_sub.matrix); + + gsl_matrix_set_zero(&X_sub.matrix); + } + } + } + ns_test++; + } + + for (size_t i_vc = 0; i_vc < n_vc; i_vc++) { + if (ns_vec[i_vc] % msize != 0) { + gsl_matrix_view X_sub = + gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize); + gsl_matrix_view kin_sub = + gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test); + eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, + &kin_sub.matrix); + } + } + + cout << endl; + + for (size_t t = 0; t < n_vc; t++) { + gsl_vector_set(vector_ns, t, ns_vec[t]); + + for (size_t i = 0; i < ni_test; ++i) { + for (size_t j = 0; j <= i; ++j) { + d = gsl_matrix_get(matrix_kin, j, i + ni_test * t); + d /= (double)ns_vec[t]; + gsl_matrix_set(matrix_kin, i, j + ni_test * t, d); + gsl_matrix_set(matrix_kin, j, i + ni_test * t, d); + } + } + } + + gsl_vector_free(geno); + gsl_vector_free(geno_miss); + + gsl_vector_free(Wtx); + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_vector_free(WtWiWtx); + gsl_permutation_free(pmt); + + gsl_matrix_free(Xlarge); + + infile.close(); + infile.clear(); + + return true; } -bool PlinkKin (const string &file_bed, const int display_pace, - const vector<int> &indicator_idv, - const vector<int> &indicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<SNPINFO> &snpInfo, - const gsl_matrix *W, gsl_matrix *matrix_kin, - gsl_vector *vector_ns) { - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return false; - } - - char ch[1]; - bitset<8> b; - - size_t n_miss, ci_total, ci_test; - double d, geno_mean, geno_var; - - size_t ni_test=matrix_kin->size1; - size_t ni_total=indicator_idv.size(); - gsl_vector *geno=gsl_vector_alloc (ni_test); - - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - size_t ns_test=0; - int n_bit; - - size_t n_vc=matrix_kin->size2/ni_test, i_vc; - string rs; - vector<size_t> ns_vec; - for (size_t i=0; i<n_vc; i++) { - ns_vec.push_back(0); - } - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (ni_test, msize*n_vc); - gsl_matrix_set_zero(Xlarge); - - // Calculate n_bit and c, the number of bit for each SNP. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (t%display_pace==0 || t==(indicator_snp.size()-1)) { - ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers - infile.seekg(t*n_bit+3); - - rs=snpInfo[t].rs_number; // This line is new. - - // Read genotypes. - geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0; - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && ci_total==ni_total) {break;} - if (indicator_idv[ci_total]==0) {ci_total++; continue;} - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(geno, ci_test, 2.0); - geno_mean+=2.0; geno_var+=4.0; - } - else { - gsl_vector_set(geno, ci_test, 1.0); - geno_mean+=1.0; - geno_var+=1.0; - } - } - else { - if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); } - else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; } - } - - ci_test++; - ci_total++; - } - } - - geno_mean/=(double)(ni_test-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_test; - geno_var-=geno_mean*geno_mean; - - for (size_t i=0; i<ni_test; ++i) { - d=gsl_vector_get(geno,i); - if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);} - } - - gsl_vector_add_constant (geno, -1.0*geno_mean); - - gsl_blas_dgemv (CblasTrans, 1.0, W, geno, 0.0, Wtx); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); - gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno); - gsl_blas_ddot (geno, geno, &geno_var); - geno_var/=(double)ni_test; - - if (geno_var!=0 && (mapRS2weight.size()==0 || - mapRS2weight.count(rs)!=0)) { - if (mapRS2weight.size()==0) { - d=1.0/geno_var; - } else { - d=mapRS2weight.at(rs)/geno_var; - } - - gsl_vector_scale (geno, sqrt(d)); - if (n_vc==1 || mapRS2cat.size()==0 ) { - gsl_vector_view Xlarge_col= - gsl_matrix_column (Xlarge, ns_vec[0]%msize); - gsl_vector_memcpy (&Xlarge_col.vector, geno); - ns_vec[0]++; - - if (ns_vec[0]%msize==0) { - eigenlib_dgemm("N","T",1.0,Xlarge,Xlarge,1.0,matrix_kin); - gsl_matrix_set_zero(Xlarge); - } - } else if (mapRS2cat.count(rs)!=0) { - i_vc=mapRS2cat.at(rs); - - gsl_vector_view Xlarge_col= - gsl_matrix_column(Xlarge,msize*i_vc+ns_vec[i_vc]%msize); - gsl_vector_memcpy (&Xlarge_col.vector, geno); - - ns_vec[i_vc]++; - - if (ns_vec[i_vc]%msize==0) { - gsl_matrix_view X_sub= - gsl_matrix_submatrix(Xlarge,0,msize*i_vc,ni_test, - msize); - gsl_matrix_view kin_sub= - gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, - ni_test, ni_test); - eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, - &X_sub.matrix, 1.0, &kin_sub.matrix); - - gsl_matrix_set_zero(&X_sub.matrix); - } - } - - - } - ns_test++; - } - - for (size_t i_vc=0; i_vc<n_vc; i_vc++) { - if (ns_vec[i_vc]%msize!=0) { - gsl_matrix_view X_sub= - gsl_matrix_submatrix(Xlarge, 0, msize*i_vc, ni_test, msize); - gsl_matrix_view kin_sub= - gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, - ni_test, ni_test); - eigenlib_dgemm ("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, - 1.0, &kin_sub.matrix); - } - } - - cout<<endl; - - for (size_t t=0; t<n_vc; t++) { - gsl_vector_set(vector_ns, t, ns_vec[t]); - - for (size_t i=0; i<ni_test; ++i) { - for (size_t j=0; j<=i; ++j) { - d=gsl_matrix_get (matrix_kin, j, i+ni_test*t); - d/=(double)ns_vec[t]; - gsl_matrix_set (matrix_kin, i, j+ni_test*t, d); - gsl_matrix_set (matrix_kin, j, i+ni_test*t, d); - } - } - } - - gsl_vector_free (geno); - - gsl_vector_free (Wtx); - gsl_matrix_free (WtW); - gsl_matrix_free (WtWi); - gsl_vector_free (WtWiWtx); - gsl_permutation_free (pmt); - - gsl_matrix_free (Xlarge); - - infile.close(); - infile.clear(); - - return true; +bool PlinkKin(const string &file_bed, const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns) { + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + char ch[1]; + bitset<8> b; + + size_t n_miss, ci_total, ci_test; + double d, geno_mean, geno_var; + + size_t ni_test = matrix_kin->size1; + size_t ni_total = indicator_idv.size(); + gsl_vector *geno = gsl_vector_alloc(ni_test); + + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + size_t ns_test = 0; + int n_bit; + + size_t n_vc = matrix_kin->size2 / ni_test, i_vc; + string rs; + vector<size_t> ns_vec; + for (size_t i = 0; i < n_vc; i++) { + ns_vec.push_back(0); + } + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc); + gsl_matrix_set_zero(Xlarge); + + // Calculate n_bit and c, the number of bit for each SNP. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers + infile.seekg(t * n_bit + 3); + + rs = snpInfo[t].rs_number; // This line is new. + + // Read genotypes. + geno_mean = 0.0; + n_miss = 0; + ci_total = 0; + geno_var = 0.0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0; + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(geno, ci_test, 2.0); + geno_mean += 2.0; + geno_var += 4.0; + } else { + gsl_vector_set(geno, ci_test, 1.0); + geno_mean += 1.0; + geno_var += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(geno, ci_test, 0.0); + } else { + gsl_vector_set(geno, ci_test, -9.0); + n_miss++; + } + } + + ci_test++; + ci_total++; + } + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + d = gsl_vector_get(geno, i); + if (d == -9.0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + gsl_blas_dgemv(CblasTrans, 1.0, W, geno, 0.0, Wtx); + gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); + gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWtx, 1.0, geno); + gsl_blas_ddot(geno, geno, &geno_var); + geno_var /= (double)ni_test; + + if (geno_var != 0 && + (mapRS2weight.size() == 0 || mapRS2weight.count(rs) != 0)) { + if (mapRS2weight.size() == 0) { + d = 1.0 / geno_var; + } else { + d = mapRS2weight.at(rs) / geno_var; + } + + gsl_vector_scale(geno, sqrt(d)); + if (n_vc == 1 || mapRS2cat.size() == 0) { + gsl_vector_view Xlarge_col = + gsl_matrix_column(Xlarge, ns_vec[0] % msize); + gsl_vector_memcpy(&Xlarge_col.vector, geno); + ns_vec[0]++; + + if (ns_vec[0] % msize == 0) { + eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); + gsl_matrix_set_zero(Xlarge); + } + } else if (mapRS2cat.count(rs) != 0) { + i_vc = mapRS2cat.at(rs); + + gsl_vector_view Xlarge_col = + gsl_matrix_column(Xlarge, msize * i_vc + ns_vec[i_vc] % msize); + gsl_vector_memcpy(&Xlarge_col.vector, geno); + + ns_vec[i_vc]++; + + if (ns_vec[i_vc] % msize == 0) { + gsl_matrix_view X_sub = + gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize); + gsl_matrix_view kin_sub = gsl_matrix_submatrix( + matrix_kin, 0, ni_test * i_vc, ni_test, ni_test); + eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, + &kin_sub.matrix); + + gsl_matrix_set_zero(&X_sub.matrix); + } + } + } + ns_test++; + } + + for (size_t i_vc = 0; i_vc < n_vc; i_vc++) { + if (ns_vec[i_vc] % msize != 0) { + gsl_matrix_view X_sub = + gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize); + gsl_matrix_view kin_sub = + gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test); + eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0, + &kin_sub.matrix); + } + } + + cout << endl; + + for (size_t t = 0; t < n_vc; t++) { + gsl_vector_set(vector_ns, t, ns_vec[t]); + + for (size_t i = 0; i < ni_test; ++i) { + for (size_t j = 0; j <= i; ++j) { + d = gsl_matrix_get(matrix_kin, j, i + ni_test * t); + d /= (double)ns_vec[t]; + gsl_matrix_set(matrix_kin, i, j + ni_test * t, d); + gsl_matrix_set(matrix_kin, j, i + ni_test * t, d); + } + } + } + + gsl_vector_free(geno); + + gsl_vector_free(Wtx); + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_vector_free(WtWiWtx); + gsl_permutation_free(pmt); + + gsl_matrix_free(Xlarge); + + infile.close(); + infile.clear(); + + return true; } -bool MFILEKin (const size_t mfile_mode, const string &file_mfile, - const int display_pace, const vector<int> &indicator_idv, - const vector<vector<int> > &mindicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<vector<SNPINFO> > &msnpInfo, - const gsl_matrix *W, gsl_matrix *matrix_kin, - gsl_vector *vector_ns) { - size_t n_vc=vector_ns->size, ni_test=matrix_kin->size1; +bool MFILEKin(const size_t mfile_mode, const string &file_mfile, + const int display_pace, const vector<int> &indicator_idv, + const vector<vector<int>> &mindicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<vector<SNPINFO>> &msnpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns) { + size_t n_vc = vector_ns->size, ni_test = matrix_kin->size1; gsl_matrix_set_zero(matrix_kin); gsl_vector_set_zero(vector_ns); - igzstream infile (file_mfile.c_str(), igzstream::in); + igzstream infile(file_mfile.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open mfile file: "<<file_mfile<<endl; + cout << "error! fail to open mfile file: " << file_mfile << endl; return false; } string file_name; - gsl_matrix *kin_tmp=gsl_matrix_alloc (matrix_kin->size1, matrix_kin->size2); - gsl_vector *ns_tmp=gsl_vector_alloc (vector_ns->size); + gsl_matrix *kin_tmp = gsl_matrix_alloc(matrix_kin->size1, matrix_kin->size2); + gsl_vector *ns_tmp = gsl_vector_alloc(vector_ns->size); - size_t l=0; + size_t l = 0; double d; while (!safeGetline(infile, file_name).eof()) { gsl_matrix_set_zero(kin_tmp); gsl_vector_set_zero(ns_tmp); - if (mfile_mode==1) { - file_name+=".bed"; - PlinkKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp); + if (mfile_mode == 1) { + file_name += ".bed"; + PlinkKin(file_name, display_pace, indicator_idv, mindicator_snp[l], + mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp); } else { - BimbamKin (file_name, display_pace, indicator_idv, mindicator_snp[l], mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp); + BimbamKin(file_name, display_pace, indicator_idv, mindicator_snp[l], + mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp); } // Add ns. gsl_vector_add(vector_ns, ns_tmp); // Add kin. - for (size_t t=0; t<n_vc; t++) { - for (size_t i=0; i<ni_test; ++i) { - for (size_t j=0; j<=i; ++j) { - d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)+gsl_matrix_get (kin_tmp, j, i+ni_test*t)*gsl_vector_get(ns_tmp, t); - - gsl_matrix_set (matrix_kin, i, j+ni_test*t, d); - gsl_matrix_set (matrix_kin, j, i+ni_test*t, d); - } + for (size_t t = 0; t < n_vc; t++) { + for (size_t i = 0; i < ni_test; ++i) { + for (size_t j = 0; j <= i; ++j) { + d = gsl_matrix_get(matrix_kin, j, i + ni_test * t) + + gsl_matrix_get(kin_tmp, j, i + ni_test * t) * + gsl_vector_get(ns_tmp, t); + + gsl_matrix_set(matrix_kin, i, j + ni_test * t, d); + gsl_matrix_set(matrix_kin, j, i + ni_test * t, d); + } } } l++; } // Renormalize kin. - for (size_t t=0; t<n_vc; t++) { - for (size_t i=0; i<ni_test; ++i) { - for (size_t j=0; j<=i; ++j) { - d=gsl_matrix_get (matrix_kin, j, i+ni_test*t)/ - gsl_vector_get(vector_ns, t); - - gsl_matrix_set (matrix_kin, i, j+ni_test*t, d); - gsl_matrix_set (matrix_kin, j, i+ni_test*t, d); - + for (size_t t = 0; t < n_vc; t++) { + for (size_t i = 0; i < ni_test; ++i) { + for (size_t j = 0; j <= i; ++j) { + d = gsl_matrix_get(matrix_kin, j, i + ni_test * t) / + gsl_vector_get(vector_ns, t); + + gsl_matrix_set(matrix_kin, i, j + ni_test * t, d); + gsl_matrix_set(matrix_kin, j, i + ni_test * t, d); } } } - cout<<endl; + cout << endl; infile.close(); infile.clear(); @@ -3602,15 +3820,13 @@ bool MFILEKin (const size_t mfile_mode, const string &file_mfile, return true; } - // Read var file, store mapRS2wsnp. -bool ReadFile_wsnp (const string &file_wsnp, - map<string, double> &mapRS2weight) { +bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2weight) { mapRS2weight.clear(); - igzstream infile (file_wsnp.c_str(), igzstream::in); + igzstream infile(file_wsnp.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open snp weight file: "<<file_wsnp<<endl; + cout << "error! fail to open snp weight file: " << file_wsnp << endl; return false; } @@ -3619,29 +3835,29 @@ bool ReadFile_wsnp (const string &file_wsnp, double weight; while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; - ch_ptr=strtok (NULL, " , \t"); - weight=atof(ch_ptr); - mapRS2weight[rs]=weight; + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + weight = atof(ch_ptr); + mapRS2weight[rs] = weight; } return true; } -bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, - map<string, vector<double> > &mapRS2wvector) { +bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc, + map<string, vector<double>> &mapRS2wvector) { mapRS2wvector.clear(); - igzstream infile (file_wcat.c_str(), igzstream::in); + igzstream infile(file_wcat.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open snp weight file: "<<file_wcat<<endl; + cout << "error! fail to open snp weight file: " << file_wcat << endl; return false; } char *ch_ptr; vector<double> weight; - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { weight.push_back(0.0); } @@ -3650,43 +3866,52 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, // Read header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_io (line, header); + ReadHeader_io(line, header); while (!safeGetline(infile, line).eof()) { - if (isBlankLine(line)) {continue;} - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - - size_t t=0; - for (size_t i=0; i<header.coln; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;} - else if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr; } - else if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr; } - else if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr; } - else if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr; } - else if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr; } - else { - weight[t]=atof(ch_ptr); t++; - if (t>n_vc) { - cout<<"error! Number of columns in the wcat file does not "<< - "match that of cat file."; - return false; - } - } - - ch_ptr=strtok (NULL, " , \t"); - } - - if (t!=n_vc) { - cout<<"error! Number of columns in the wcat file does not "<< - "match that of cat file."; + if (isBlankLine(line)) { + continue; + } + ch_ptr = strtok((char *)line.c_str(), " , \t"); + + size_t t = 0; + for (size_t i = 0; i < header.coln; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } else if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } else if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + } else if (header.cm_col != 0 && header.cm_col == i + 1) { + cm = ch_ptr; + } else if (header.a1_col != 0 && header.a1_col == i + 1) { + a1 = ch_ptr; + } else if (header.a0_col != 0 && header.a0_col == i + 1) { + a0 = ch_ptr; + } else { + weight[t] = atof(ch_ptr); + t++; + if (t > n_vc) { + cout << "error! Number of columns in the wcat file does not " + << "match that of cat file."; + return false; + } + } + + ch_ptr = strtok(NULL, " , \t"); + } + + if (t != n_vc) { + cout << "error! Number of columns in the wcat file does not " + << "match that of cat file."; return false; } - if (header.rs_col==0) { - rs=chr+":"+pos; + if (header.rs_col == 0) { + rs = chr + ":" + pos; } - mapRS2wvector[rs]=weight; + mapRS2wvector[rs] = weight; } return true; @@ -3700,18 +3925,23 @@ bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, // the beta file for the second time, compute q, and Vq based on block // jacknife use the mapRS2var to select snps (and to ), calculate q do // a block-wise jacknife, and compute Vq -void ReadFile_beta (const string &file_beta, - const map<string, size_t> &mapRS2cat, - const map<string, double> &mapRS2wA, - vector<size_t> &vec_cat, vector<size_t> &vec_ni, - vector<double> &vec_weight, vector<double> &vec_z2, - size_t &ni_total, size_t &ns_total, size_t &ns_test) { - vec_cat.clear(); vec_ni.clear(); vec_weight.clear(); vec_z2.clear(); - ni_total=0; ns_total=0; ns_test=0; - - igzstream infile (file_beta.c_str(), igzstream::in); +void ReadFile_beta(const string &file_beta, + const map<string, size_t> &mapRS2cat, + const map<string, double> &mapRS2wA, vector<size_t> &vec_cat, + vector<size_t> &vec_ni, vector<double> &vec_weight, + vector<double> &vec_z2, size_t &ni_total, size_t &ns_total, + size_t &ns_test) { + vec_cat.clear(); + vec_ni.clear(); + vec_weight.clear(); + vec_z2.clear(); + ni_total = 0; + ns_total = 0; + ns_test = 0; + + igzstream infile(file_beta.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open beta file: "<<file_beta<<endl; + cout << "error! fail to open beta file: " << file_beta << endl; return; } @@ -3720,110 +3950,158 @@ void ReadFile_beta (const string &file_beta, string type; string rs, chr, a1, a0, pos, cm; - double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0; - size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0; + double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0, + af = 0, var_x = 0; + size_t n_total = 0, n_mis = 0, n_obs = 0, n_case = 0, n_control = 0; // Read header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_io (line, header); + ReadHeader_io(line, header); - if (header.n_col==0 ) { - if ( (header.nobs_col==0 && header.nmis_col==0) && - (header.ncase_col==0 && header.ncontrol_col==0) ) { - cout<<"error! missing sample size in the beta file."<<endl; + if (header.n_col == 0) { + if ((header.nobs_col == 0 && header.nmis_col == 0) && + (header.ncase_col == 0 && header.ncontrol_col == 0)) { + cout << "error! missing sample size in the beta file." << endl; } else { - cout<<"total sample size will be replaced by obs/mis sample size."<<endl; + cout << "total sample size will be replaced by obs/mis sample size." + << endl; } } - if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) && - header.chisq_col==0 && header.p_col==0) { - cout<<"error! missing z scores in the beta file."<<endl; + if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0) && + header.chisq_col == 0 && header.p_col == 0) { + cout << "error! missing z scores in the beta file." << endl; } while (!safeGetline(infile, line).eof()) { - if (isBlankLine(line)) {continue;} - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - - z=0; beta=0; se_beta=0; chisq=0; pvalue=0; - n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; af=0; var_x=0; - for (size_t i=0; i<header.coln; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;} - if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;} - if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;} - if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;} - if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;} - if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;} + if (isBlankLine(line)) { + continue; + } + ch_ptr = strtok((char *)line.c_str(), " , \t"); + + z = 0; + beta = 0; + se_beta = 0; + chisq = 0; + pvalue = 0; + n_total = 0; + n_mis = 0; + n_obs = 0; + n_case = 0; + n_control = 0; + af = 0; + var_x = 0; + for (size_t i = 0; i < header.coln; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } + if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } + if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + } + if (header.cm_col != 0 && header.cm_col == i + 1) { + cm = ch_ptr; + } + if (header.a1_col != 0 && header.a1_col == i + 1) { + a1 = ch_ptr; + } + if (header.a0_col != 0 && header.a0_col == i + 1) { + a0 = ch_ptr; + } - if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);} - if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);} - if (header.sebeta_col!=0 && header.sebeta_col==i+1) { - se_beta=atof(ch_ptr); + if (header.z_col != 0 && header.z_col == i + 1) { + z = atof(ch_ptr); + } + if (header.beta_col != 0 && header.beta_col == i + 1) { + beta = atof(ch_ptr); + } + if (header.sebeta_col != 0 && header.sebeta_col == i + 1) { + se_beta = atof(ch_ptr); + } + if (header.chisq_col != 0 && header.chisq_col == i + 1) { + chisq = atof(ch_ptr); + } + if (header.p_col != 0 && header.p_col == i + 1) { + pvalue = atof(ch_ptr); } - if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);} - if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);} - if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);} - if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);} - if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);} - if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);} - if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) { - n_control=atoi(ch_ptr); + if (header.n_col != 0 && header.n_col == i + 1) { + n_total = atoi(ch_ptr); + } + if (header.nmis_col != 0 && header.nmis_col == i + 1) { + n_mis = atoi(ch_ptr); + } + if (header.nobs_col != 0 && header.nobs_col == i + 1) { + n_obs = atoi(ch_ptr); + } + if (header.ncase_col != 0 && header.ncase_col == i + 1) { + n_case = atoi(ch_ptr); + } + if (header.ncontrol_col != 0 && header.ncontrol_col == i + 1) { + n_control = atoi(ch_ptr); + } + if (header.af_col != 0 && header.af_col == i + 1) { + af = atof(ch_ptr); + } + if (header.var_col != 0 && header.var_col == i + 1) { + var_x = atof(ch_ptr); } - if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);} - if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);} - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } - if (header.rs_col==0) { - rs=chr+":"+pos; + if (header.rs_col == 0) { + rs = chr + ":" + pos; } - if (header.n_col==0) { - if (header.nmis_col!=0 && header.nobs_col!=0) { - n_total=n_mis+n_obs; + if (header.n_col == 0) { + if (header.nmis_col != 0 && header.nobs_col != 0) { + n_total = n_mis + n_obs; } else { - n_total=n_case+n_control; + n_total = n_case + n_control; } } // Both z values and beta/se_beta have directions, while // chisq/pvalue do not. - if (header.z_col!=0) { - zsquare=z*z; - } else if (header.beta_col!=0 && header.sebeta_col!=0) { - z=beta/se_beta; - zsquare=z*z; - } else if (header.chisq_col!=0) { - zsquare=chisq; - } else if (header.p_col!=0) { - zsquare=gsl_cdf_chisq_Qinv (pvalue, 1); - } else {zsquare=0;} + if (header.z_col != 0) { + zsquare = z * z; + } else if (header.beta_col != 0 && header.sebeta_col != 0) { + z = beta / se_beta; + zsquare = z * z; + } else if (header.chisq_col != 0) { + zsquare = chisq; + } else if (header.p_col != 0) { + zsquare = gsl_cdf_chisq_Qinv(pvalue, 1); + } else { + zsquare = 0; + } // Obtain var_x. - if (header.var_col==0 && header.af_col!=0) { - var_x=2.0*af*(1.0-af); + if (header.var_col == 0 && header.af_col != 0) { + var_x = 2.0 * af * (1.0 - af); } // If the SNP is also present in cor file, then do calculations. - if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) && - (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) && zsquare!=0) { - if (mapRS2cat.size()!=0) { - vec_cat.push_back(mapRS2cat.at(rs)); + if ((mapRS2wA.size() == 0 || mapRS2wA.count(rs) != 0) && + (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0) && zsquare != 0) { + if (mapRS2cat.size() != 0) { + vec_cat.push_back(mapRS2cat.at(rs)); } else { - vec_cat.push_back(0); + vec_cat.push_back(0); } vec_ni.push_back(n_total); - if (mapRS2wA.size()==0) { - vec_weight.push_back(1); + if (mapRS2wA.size() == 0) { + vec_weight.push_back(1); } else { - vec_weight.push_back(mapRS2wA.at(rs)); + vec_weight.push_back(mapRS2wA.at(rs)); } vec_z2.push_back(zsquare); - ni_total=max(ni_total, n_total); + ni_total = max(ni_total, n_total); ns_test++; } @@ -3836,15 +4114,15 @@ void ReadFile_beta (const string &file_beta, return; } -void ReadFile_beta (const string &file_beta, - const map<string, double> &mapRS2wA, - map<string, string> &mapRS2A1, - map<string, double> &mapRS2z) { - mapRS2A1.clear(); mapRS2z.clear(); +void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA, + map<string, string> &mapRS2A1, + map<string, double> &mapRS2z) { + mapRS2A1.clear(); + mapRS2z.clear(); - igzstream infile (file_beta.c_str(), igzstream::in); + igzstream infile(file_beta.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open beta file: "<<file_beta<<endl; + cout << "error! fail to open beta file: " << file_beta << endl; return; } @@ -3853,92 +4131,137 @@ void ReadFile_beta (const string &file_beta, string type; string rs, chr, a1, a0, pos, cm; - double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, af=0, var_x=0; - size_t n_total=0, n_mis=0, n_obs=0, n_case=0, n_control=0; - size_t ni_total=0, ns_total=0, ns_test=0; + double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, af = 0, var_x = 0; + size_t n_total = 0, n_mis = 0, n_obs = 0, n_case = 0, n_control = 0; + size_t ni_total = 0, ns_total = 0, ns_test = 0; // Read header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_io (line, header); + ReadHeader_io(line, header); - if (header.n_col==0 ) { - if ((header.nobs_col==0 && header.nmis_col==0) && - (header.ncase_col==0 && header.ncontrol_col==0)) { - cout<<"error! missing sample size in the beta file."<<endl; + if (header.n_col == 0) { + if ((header.nobs_col == 0 && header.nmis_col == 0) && + (header.ncase_col == 0 && header.ncontrol_col == 0)) { + cout << "error! missing sample size in the beta file." << endl; } else { - cout<<"total sample size will be replaced by obs/mis sample size."<<endl; + cout << "total sample size will be replaced by obs/mis sample size." + << endl; } } - if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0)) { - cout<<"error! missing z scores in the beta file."<<endl; + if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0)) { + cout << "error! missing z scores in the beta file." << endl; } while (!safeGetline(infile, line).eof()) { - if (isBlankLine(line)) {continue;} - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - - z=0; beta=0; se_beta=0; chisq=0; pvalue=0; - n_total=0; n_mis=0; n_obs=0; n_case=0; n_control=0; af=0; var_x=0; - for (size_t i=0; i<header.coln; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;} - if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;} - if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;} - if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;} - if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;} - if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;} + if (isBlankLine(line)) { + continue; + } + ch_ptr = strtok((char *)line.c_str(), " , \t"); + + z = 0; + beta = 0; + se_beta = 0; + chisq = 0; + pvalue = 0; + n_total = 0; + n_mis = 0; + n_obs = 0; + n_case = 0; + n_control = 0; + af = 0; + var_x = 0; + for (size_t i = 0; i < header.coln; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } + if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } + if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + } + if (header.cm_col != 0 && header.cm_col == i + 1) { + cm = ch_ptr; + } + if (header.a1_col != 0 && header.a1_col == i + 1) { + a1 = ch_ptr; + } + if (header.a0_col != 0 && header.a0_col == i + 1) { + a0 = ch_ptr; + } - if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);} - if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);} - if (header.sebeta_col!=0 && header.sebeta_col==i+1) { - se_beta=atof(ch_ptr); + if (header.z_col != 0 && header.z_col == i + 1) { + z = atof(ch_ptr); + } + if (header.beta_col != 0 && header.beta_col == i + 1) { + beta = atof(ch_ptr); + } + if (header.sebeta_col != 0 && header.sebeta_col == i + 1) { + se_beta = atof(ch_ptr); + } + if (header.chisq_col != 0 && header.chisq_col == i + 1) { + chisq = atof(ch_ptr); + } + if (header.p_col != 0 && header.p_col == i + 1) { + pvalue = atof(ch_ptr); } - if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);} - if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);} - if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);} - if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);} - if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);} - if (header.ncase_col!=0 && header.ncase_col==i+1) {n_case=atoi(ch_ptr);} - if (header.ncontrol_col!=0 && header.ncontrol_col==i+1) { - n_control=atoi(ch_ptr); + if (header.n_col != 0 && header.n_col == i + 1) { + n_total = atoi(ch_ptr); + } + if (header.nmis_col != 0 && header.nmis_col == i + 1) { + n_mis = atoi(ch_ptr); + } + if (header.nobs_col != 0 && header.nobs_col == i + 1) { + n_obs = atoi(ch_ptr); + } + if (header.ncase_col != 0 && header.ncase_col == i + 1) { + n_case = atoi(ch_ptr); + } + if (header.ncontrol_col != 0 && header.ncontrol_col == i + 1) { + n_control = atoi(ch_ptr); } - if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);} - if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);} + if (header.af_col != 0 && header.af_col == i + 1) { + af = atof(ch_ptr); + } + if (header.var_col != 0 && header.var_col == i + 1) { + var_x = atof(ch_ptr); + } - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } - if (header.rs_col==0) { - rs=chr+":"+pos; + if (header.rs_col == 0) { + rs = chr + ":" + pos; } - if (header.n_col==0) { - if (header.nmis_col!=0 && header.nobs_col!=0) { - n_total=n_mis+n_obs; + if (header.n_col == 0) { + if (header.nmis_col != 0 && header.nobs_col != 0) { + n_total = n_mis + n_obs; } else { - n_total=n_case+n_control; + n_total = n_case + n_control; } } // Both z values and beta/se_beta have directions, while // chisq/pvalue do not. - if (header.z_col!=0) { - z=z; - } else if (header.beta_col!=0 && header.sebeta_col!=0) { - z=beta/se_beta; + if (header.z_col != 0) { + z = z; + } else if (header.beta_col != 0 && header.sebeta_col != 0) { + z = beta / se_beta; } else { - z=0; + z = 0; } // If the snp is also present in cor file, then do calculations. - if ( (mapRS2wA.size()==0 || mapRS2wA.count(rs)!=0) ) { - mapRS2z[rs]=z; - mapRS2A1[rs]=a1; + if ((mapRS2wA.size() == 0 || mapRS2wA.count(rs) != 0)) { + mapRS2z[rs] = z; + mapRS2A1[rs] = a1; - ni_total=max(ni_total, n_total); + ni_total = max(ni_total, n_total); ns_test++; } @@ -3951,139 +4274,155 @@ void ReadFile_beta (const string &file_beta, return; } -void Calcq (const size_t n_block, const vector<size_t> &vec_cat, - const vector<size_t> &vec_ni, const vector<double> &vec_weight, - const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q, - gsl_vector *s) { - gsl_matrix_set_zero (Vq); - gsl_vector_set_zero (q); - gsl_vector_set_zero (s); +void Calcq(const size_t n_block, const vector<size_t> &vec_cat, + const vector<size_t> &vec_ni, const vector<double> &vec_weight, + const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q, + gsl_vector *s) { + gsl_matrix_set_zero(Vq); + gsl_vector_set_zero(q); + gsl_vector_set_zero(s); size_t cat, n_total; double w, zsquare; vector<double> vec_q, vec_s, n_snps; - for (size_t i=0; i<q->size; i++) { + for (size_t i = 0; i < q->size; i++) { vec_q.push_back(0.0); vec_s.push_back(0.0); n_snps.push_back(0.0); } - vector<vector<double> > mat_q, mat_s; - for (size_t i=0; i<n_block; i++) { + vector<vector<double>> mat_q, mat_s; + for (size_t i = 0; i < n_block; i++) { mat_q.push_back(vec_q); mat_s.push_back(vec_s); } // Compute q and s. - for (size_t i=0; i<vec_cat.size(); i++) { + for (size_t i = 0; i < vec_cat.size(); i++) { // Extract quantities. - cat=vec_cat[i]; - n_total=vec_ni[i]; - w=vec_weight[i]; - zsquare=vec_z2[i]; + cat = vec_cat[i]; + n_total = vec_ni[i]; + w = vec_weight[i]; + zsquare = vec_z2[i]; // Compute q and s. - vec_q[cat]+=(zsquare-1.0)*w/(double)n_total; - vec_s[cat]+=w; + vec_q[cat] += (zsquare - 1.0) * w / (double)n_total; + vec_s[cat] += w; n_snps[cat]++; } // Update q; vec_q is used again for computing Vq below. - for (size_t i=0; i<q->size; i++) { - if (vec_s[i]!=0) { - gsl_vector_set(q, i, vec_q[i]/vec_s[i]); + for (size_t i = 0; i < q->size; i++) { + if (vec_s[i] != 0) { + gsl_vector_set(q, i, vec_q[i] / vec_s[i]); } gsl_vector_set(s, i, vec_s[i]); } // Compute Vq; divide SNPs in each category into evenly distributed // blocks. - size_t t=0, b=0, n_snp=0; + size_t t = 0, b = 0, n_snp = 0; double d, m, n; - for (size_t l=0; l<q->size; l++) { - n_snp=floor(n_snps[l]/n_block); t=0; b=0; - if (n_snp==0) {continue;} + for (size_t l = 0; l < q->size; l++) { + n_snp = floor(n_snps[l] / n_block); + t = 0; + b = 0; + if (n_snp == 0) { + continue; + } // Initiate everything to zero. - for (size_t i=0; i<n_block; i++) { - for (size_t j=0; j<q->size; j++) { - mat_q[i][j]=0; - mat_s[i][j]=0; + for (size_t i = 0; i < n_block; i++) { + for (size_t j = 0; j < q->size; j++) { + mat_q[i][j] = 0; + mat_s[i][j] = 0; } } // Record values. - for (size_t i=0; i<vec_cat.size(); i++) { + for (size_t i = 0; i < vec_cat.size(); i++) { // Extract quantities. - cat=vec_cat[i]; - n_total=vec_ni[i]; - w=vec_weight[i]; - zsquare=vec_z2[i]; + cat = vec_cat[i]; + n_total = vec_ni[i]; + w = vec_weight[i]; + zsquare = vec_z2[i]; // Save quantities for computing Vq (which is not divided by // n_total). - mat_q[b][cat]+=(zsquare-1.0)*w; - mat_s[b][cat]+=w; - - if (cat==l) { - if (b<n_block-1) { - if (t<n_snp-1) {t++;} else {b++; t=0;} - } else { - t++; - } + mat_q[b][cat] += (zsquare - 1.0) * w; + mat_s[b][cat] += w; + + if (cat == l) { + if (b < n_block - 1) { + if (t < n_snp - 1) { + t++; + } else { + b++; + t = 0; + } + } else { + t++; + } } } // Center mat_q. - for (size_t i=0; i<q->size; i++) { - m=0; n=0; - for (size_t k=0; k<n_block; k++) { - if (mat_s[k][i]!=0 && vec_s[i]!=mat_s[k][i]) { - d=(vec_q[i]-mat_q[k][i])/(vec_s[i]-mat_s[k][i]); - mat_q[k][i]=d; - m+=d; - n++; - } + for (size_t i = 0; i < q->size; i++) { + m = 0; + n = 0; + for (size_t k = 0; k < n_block; k++) { + if (mat_s[k][i] != 0 && vec_s[i] != mat_s[k][i]) { + d = (vec_q[i] - mat_q[k][i]) / (vec_s[i] - mat_s[k][i]); + mat_q[k][i] = d; + m += d; + n++; + } + } + if (n != 0) { + m /= n; } - if (n!=0) {m/=n;} - for (size_t k=0; k<n_block; k++) { - if (mat_q[k][i]!=0) { - mat_q[k][i]-=m; - } + for (size_t k = 0; k < n_block; k++) { + if (mat_q[k][i] != 0) { + mat_q[k][i] -= m; + } } } // Compute Vq for l'th row and l'th column only. - for (size_t i=0; i<q->size; i++) { - d=0; n=0; - for (size_t k=0; k<n_block; k++) { - if (mat_q[k][l]!=0 && mat_q[k][i]!=0) { - d+=mat_q[k][l]*mat_q[k][i]; - n++; - } - } - if (n!=0) { - d/=n; - d*=n-1; - } - d+=gsl_matrix_get(Vq, i, l); + for (size_t i = 0; i < q->size; i++) { + d = 0; + n = 0; + for (size_t k = 0; k < n_block; k++) { + if (mat_q[k][l] != 0 && mat_q[k][i] != 0) { + d += mat_q[k][l] * mat_q[k][i]; + n++; + } + } + if (n != 0) { + d /= n; + d *= n - 1; + } + d += gsl_matrix_get(Vq, i, l); gsl_matrix_set(Vq, i, l, d); - if (i!=l) {gsl_matrix_set(Vq, l, i, d);} + if (i != l) { + gsl_matrix_set(Vq, l, i, d); + } } - } - //divide the off diagonal elements of Vq by 2 - for (size_t i=0; i<q->size; i++) { - for (size_t j=i; j<q->size; j++) { - if (i==j) {continue;} - d=gsl_matrix_get(Vq, i, j); - gsl_matrix_set(Vq, i, j, d/2); - gsl_matrix_set(Vq, j, i, d/2); + // divide the off diagonal elements of Vq by 2 + for (size_t i = 0; i < q->size; i++) { + for (size_t j = i; j < q->size; j++) { + if (i == j) { + continue; + } + d = gsl_matrix_get(Vq, i, j); + gsl_matrix_set(Vq, i, j, d / 2); + gsl_matrix_set(Vq, j, i, d / 2); } } @@ -4091,20 +4430,19 @@ void Calcq (const size_t n_block, const vector<size_t> &vec_cat, } // Read vector file. -void ReadFile_vector (const string &file_vec, gsl_vector *vec) -{ - igzstream infile (file_vec.c_str(), igzstream::in); +void ReadFile_vector(const string &file_vec, gsl_vector *vec) { + igzstream infile(file_vec.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open vector file: "<<file_vec<<endl; + cout << "error! fail to open vector file: " << file_vec << endl; return; } string line; char *ch_ptr; - for (size_t i=0; i<vec->size; i++) { + for (size_t i = 0; i < vec->size; i++) { !safeGetline(infile, line).eof(); - ch_ptr=strtok ((char *)line.c_str(), " , \t"); + ch_ptr = strtok((char *)line.c_str(), " , \t"); gsl_vector_set(vec, i, atof(ch_ptr)); } @@ -4114,22 +4452,22 @@ void ReadFile_vector (const string &file_vec, gsl_vector *vec) return; } -void ReadFile_matrix (const string &file_mat, gsl_matrix *mat) { - igzstream infile (file_mat.c_str(), igzstream::in); +void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) { + igzstream infile(file_mat.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open matrix file: "<<file_mat<<endl; + cout << "error! fail to open matrix file: " << file_mat << endl; return; } string line; char *ch_ptr; - for (size_t i=0; i<mat->size1; i++) { + for (size_t i = 0; i < mat->size1; i++) { !safeGetline(infile, line).eof(); - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - for (size_t j=0; j<mat->size2; j++) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + for (size_t j = 0; j < mat->size2; j++) { gsl_matrix_set(mat, i, j, atof(ch_ptr)); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } } @@ -4139,32 +4477,32 @@ void ReadFile_matrix (const string &file_mat, gsl_matrix *mat) { return; } -void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1, - gsl_matrix *mat2) { - igzstream infile (file_mat.c_str(), igzstream::in); +void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1, + gsl_matrix *mat2) { + igzstream infile(file_mat.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open matrix file: "<<file_mat<<endl; + cout << "error! fail to open matrix file: " << file_mat << endl; return; } string line; char *ch_ptr; - for (size_t i=0; i<mat1->size1; i++) { + for (size_t i = 0; i < mat1->size1; i++) { !safeGetline(infile, line).eof(); - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - for (size_t j=0; j<mat1->size2; j++) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + for (size_t j = 0; j < mat1->size2; j++) { gsl_matrix_set(mat1, i, j, atof(ch_ptr)); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } } - for (size_t i=0; i<mat2->size1; i++) { + for (size_t i = 0; i < mat2->size1; i++) { !safeGetline(infile, line).eof(); - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - for (size_t j=0; j<mat2->size2; j++) { + ch_ptr = strtok((char *)line.c_str(), " , \t"); + for (size_t j = 0; j < mat2->size2; j++) { gsl_matrix_set(mat2, i, j, atof(ch_ptr)); - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } } @@ -4175,24 +4513,24 @@ void ReadFile_matrix (const string &file_mat, gsl_matrix *mat1, } // Read study file. -void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat, - gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) { - string Vqfile=file_study+".Vq.txt"; - string sfile=file_study+".size.txt"; - string qfile=file_study+".q.txt"; +void ReadFile_study(const string &file_study, gsl_matrix *Vq_mat, + gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) { + string Vqfile = file_study + ".Vq.txt"; + string sfile = file_study + ".size.txt"; + string qfile = file_study + ".q.txt"; - gsl_vector *s=gsl_vector_alloc (s_vec->size+1); + gsl_vector *s = gsl_vector_alloc(s_vec->size + 1); ReadFile_matrix(Vqfile, Vq_mat); ReadFile_vector(sfile, s); ReadFile_vector(qfile, q_vec); double d; - for (size_t i=0; i<s_vec->size; i++) { - d=gsl_vector_get (s, i); - gsl_vector_set (s_vec, i, d); + for (size_t i = 0; i < s_vec->size; i++) { + d = gsl_vector_get(s, i); + gsl_vector_set(s_vec, i, d); } - ni=gsl_vector_get (s, s_vec->size); + ni = gsl_vector_get(s, s_vec->size); gsl_vector_free(s); @@ -4200,22 +4538,22 @@ void ReadFile_study (const string &file_study, gsl_matrix *Vq_mat, } // Read reference file. -void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat, - gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) { - string sfile=file_ref+".size.txt"; - string Sfile=file_ref+".S.txt"; +void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat, + gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) { + string sfile = file_ref + ".size.txt"; + string Sfile = file_ref + ".S.txt"; - gsl_vector *s=gsl_vector_alloc (s_vec->size+1); + gsl_vector *s = gsl_vector_alloc(s_vec->size + 1); ReadFile_vector(sfile, s); ReadFile_matrix(Sfile, S_mat, Svar_mat); double d; - for (size_t i=0; i<s_vec->size; i++) { - d=gsl_vector_get (s, i); - gsl_vector_set (s_vec, i, d); + for (size_t i = 0; i < s_vec->size; i++) { + d = gsl_vector_get(s, i); + gsl_vector_set(s_vec, i, d); } - ni=gsl_vector_get (s, s_vec->size); + ni = gsl_vector_get(s, s_vec->size); gsl_vector_free(s); @@ -4223,20 +4561,20 @@ void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat, } // Read mstudy file. -void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, - gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) { +void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq_mat, + gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni) { gsl_matrix_set_zero(Vq_mat); gsl_vector_set_zero(q_vec); gsl_vector_set_zero(s_vec); - ni=0; + ni = 0; - gsl_matrix *Vq_sub=gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2); - gsl_vector *q_sub=gsl_vector_alloc(q_vec->size); - gsl_vector *s=gsl_vector_alloc (s_vec->size+1); + gsl_matrix *Vq_sub = gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2); + gsl_vector *q_sub = gsl_vector_alloc(q_vec->size); + gsl_vector *s = gsl_vector_alloc(s_vec->size + 1); - igzstream infile (file_mstudy.c_str(), igzstream::in); + igzstream infile(file_mstudy.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open mstudy file: "<<file_mstudy<<endl; + cout << "error! fail to open mstudy file: " << file_mstudy << endl; return; } @@ -4244,51 +4582,64 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, double d1, d2, d; while (!safeGetline(infile, file_name).eof()) { - string Vqfile=file_name+".Vq.txt"; - string sfile=file_name+".size.txt"; - string qfile=file_name+".q.txt"; + string Vqfile = file_name + ".Vq.txt"; + string sfile = file_name + ".size.txt"; + string qfile = file_name + ".q.txt"; ReadFile_matrix(Vqfile, Vq_sub); ReadFile_vector(sfile, s); ReadFile_vector(qfile, q_sub); - ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size)); + ni = max(ni, (size_t)gsl_vector_get(s, s_vec->size)); - for (size_t i=0; i<s_vec->size; i++) { - d1=gsl_vector_get (s, i); - if (d1==0) {continue;} + for (size_t i = 0; i < s_vec->size; i++) { + d1 = gsl_vector_get(s, i); + if (d1 == 0) { + continue; + } - d=gsl_vector_get(q_vec, i)+gsl_vector_get(q_sub, i)*d1; + d = gsl_vector_get(q_vec, i) + gsl_vector_get(q_sub, i) * d1; gsl_vector_set(q_vec, i, d); - d=gsl_vector_get(s_vec, i)+d1; + d = gsl_vector_get(s_vec, i) + d1; gsl_vector_set(s_vec, i, d); - for (size_t j=i; j<s_vec->size; j++) { - d2=gsl_vector_get (s, j); - if (d2==0) {continue;} + for (size_t j = i; j < s_vec->size; j++) { + d2 = gsl_vector_get(s, j); + if (d2 == 0) { + continue; + } - d=gsl_matrix_get(Vq_mat, i, j)+gsl_matrix_get(Vq_sub, i, j)*d1*d2; - gsl_matrix_set(Vq_mat, i, j, d); - if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);} + d = gsl_matrix_get(Vq_mat, i, j) + + gsl_matrix_get(Vq_sub, i, j) * d1 * d2; + gsl_matrix_set(Vq_mat, i, j, d); + if (i != j) { + gsl_matrix_set(Vq_mat, j, i, d); + } } } } - for (size_t i=0; i<s_vec->size; i++) { - d1=gsl_vector_get (s_vec, i); - if (d1==0) {continue;} + for (size_t i = 0; i < s_vec->size; i++) { + d1 = gsl_vector_get(s_vec, i); + if (d1 == 0) { + continue; + } - d=gsl_vector_get (q_vec, i); - gsl_vector_set (q_vec, i, d/d1); + d = gsl_vector_get(q_vec, i); + gsl_vector_set(q_vec, i, d / d1); - for (size_t j=i; j<s_vec->size; j++) { - d2=gsl_vector_get (s_vec, j); - if (d2==0) {continue;} + for (size_t j = i; j < s_vec->size; j++) { + d2 = gsl_vector_get(s_vec, j); + if (d2 == 0) { + continue; + } - d=gsl_matrix_get (Vq_mat, i, j)/(d1*d2); - gsl_matrix_set (Vq_mat, i, j, d); - if (i!=j) {gsl_matrix_set(Vq_mat, j, i, d);} + d = gsl_matrix_get(Vq_mat, i, j) / (d1 * d2); + gsl_matrix_set(Vq_mat, i, j, d); + if (i != j) { + gsl_matrix_set(Vq_mat, j, i, d); + } } } @@ -4300,20 +4651,20 @@ void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq_mat, } // Read reference file. -void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, - gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) { +void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat, + gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni) { gsl_matrix_set_zero(S_mat); gsl_matrix_set_zero(Svar_mat); gsl_vector_set_zero(s_vec); - ni=0; + ni = 0; - gsl_matrix *S_sub=gsl_matrix_alloc (S_mat->size1, S_mat->size2); - gsl_matrix *Svar_sub=gsl_matrix_alloc (Svar_mat->size1, Svar_mat->size2); - gsl_vector *s=gsl_vector_alloc (s_vec->size+1); + gsl_matrix *S_sub = gsl_matrix_alloc(S_mat->size1, S_mat->size2); + gsl_matrix *Svar_sub = gsl_matrix_alloc(Svar_mat->size1, Svar_mat->size2); + gsl_vector *s = gsl_vector_alloc(s_vec->size + 1); - igzstream infile (file_mref.c_str(), igzstream::in); + igzstream infile(file_mref.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open mref file: "<<file_mref<<endl; + cout << "error! fail to open mref file: " << file_mref << endl; return; } @@ -4321,51 +4672,59 @@ void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, double d1, d2, d; while (!safeGetline(infile, file_name).eof()) { - string sfile=file_name+".size.txt"; - string Sfile=file_name+".S.txt"; + string sfile = file_name + ".size.txt"; + string Sfile = file_name + ".S.txt"; ReadFile_vector(sfile, s); ReadFile_matrix(Sfile, S_sub, Svar_sub); // Update s_vec and ni. - for (size_t i=0; i<s_vec->size; i++) { - d=gsl_vector_get (s, i)+gsl_vector_get (s_vec, i); - gsl_vector_set (s_vec, i, d); + for (size_t i = 0; i < s_vec->size; i++) { + d = gsl_vector_get(s, i) + gsl_vector_get(s_vec, i); + gsl_vector_set(s_vec, i, d); } - ni=max(ni, (size_t)gsl_vector_get (s, s_vec->size)); + ni = max(ni, (size_t)gsl_vector_get(s, s_vec->size)); // Update S and Svar from each file. - for (size_t i=0; i<S_mat->size1; i++) { - d1=gsl_vector_get(s, i); - for (size_t j=0; j<S_mat->size2; j++) { - d2=gsl_vector_get(s, j); - - d=gsl_matrix_get(S_sub, i, j)*d1*d2; - gsl_matrix_set(S_sub, i, j, d); - d=gsl_matrix_get(Svar_sub, i, j)*d1*d2*d1*d2; - gsl_matrix_set(Svar_sub, i, j, d); + for (size_t i = 0; i < S_mat->size1; i++) { + d1 = gsl_vector_get(s, i); + for (size_t j = 0; j < S_mat->size2; j++) { + d2 = gsl_vector_get(s, j); + + d = gsl_matrix_get(S_sub, i, j) * d1 * d2; + gsl_matrix_set(S_sub, i, j, d); + d = gsl_matrix_get(Svar_sub, i, j) * d1 * d2 * d1 * d2; + gsl_matrix_set(Svar_sub, i, j, d); } } - gsl_matrix_add (S_mat, S_sub); - gsl_matrix_add (Svar_mat, Svar_sub); + gsl_matrix_add(S_mat, S_sub); + gsl_matrix_add(Svar_mat, Svar_sub); } // Final: update S and Svar. - for (size_t i=0; i<S_mat->size1; i++) { - d1=gsl_vector_get(s_vec, i); - if (d1==0) {continue;} - for (size_t j=i; j<S_mat->size2; j++) { - d2=gsl_vector_get(s_vec, j); - if (d2==0) {continue;} - - d=gsl_matrix_get(S_mat, i, j)/(d1*d2); + for (size_t i = 0; i < S_mat->size1; i++) { + d1 = gsl_vector_get(s_vec, i); + if (d1 == 0) { + continue; + } + for (size_t j = i; j < S_mat->size2; j++) { + d2 = gsl_vector_get(s_vec, j); + if (d2 == 0) { + continue; + } + + d = gsl_matrix_get(S_mat, i, j) / (d1 * d2); gsl_matrix_set(S_mat, i, j, d); - if (i!=j) {gsl_matrix_set(S_mat, j, i, d);} + if (i != j) { + gsl_matrix_set(S_mat, j, i, d); + } - d=gsl_matrix_get(Svar_mat, i, j)/(d1*d2*d1*d2); + d = gsl_matrix_get(Svar_mat, i, j) / (d1 * d2 * d1 * d2); gsl_matrix_set(Svar_mat, i, j, d); - if (i!=j) {gsl_matrix_set(Svar_mat, j, i, d);} + if (i != j) { + gsl_matrix_set(Svar_mat, j, i, d); + } } } @@ -19,195 +19,172 @@ #ifndef __IO_H__ #define __IO_H__ -#include <vector> -#include <map> -#include <algorithm> -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" +#include <algorithm> +#include <map> +#include <vector> #include "gzstream.h" #include "param.h" using namespace std; -void ProgressBar (string str, double p, double total); -void ProgressBar (string str, double p, double total, double ratio); -std::istream& safeGetline(std::istream& is, std::string& t); - -bool ReadFile_snps (const string &file_snps, set<string> &setSnps); -bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps); -bool ReadFile_log (const string &file_log, double &pheno_mean); - -bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo); -bool ReadFile_fam (const string &file_fam, - vector<vector<int> > &indicator_pheno, - vector<vector<double> > &pheno, - map<string, int> &mapID2num, - const vector<size_t> &p_column); - -bool ReadFile_cvt (const string &file_cvt, - vector<int> &indicator_cvt, - vector<vector<double> > &cvt, - size_t &n_cvt); -bool ReadFile_anno (const string &file_bim, map<string, string> &mapRS2chr, - map<string, long int> &mapRS2bp, - map<string, double> &mapRS2cM); -bool ReadFile_pheno (const string &file_pheno, - vector<vector<int> > &indicator_pheno, - vector<vector<double> > &pheno, - const vector<size_t> &p_column); -bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, - vector<double> &pheno, const int &p_column); - -bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, - const gsl_matrix *W, vector<int> &indicator_idv, - vector<int> &indicator_snp, const double &maf_level, - const double &miss_level, const double &hwe_level, - const double &r2_level, map<string, string> &mapRS2chr, - map<string, long int> &mapRS2bp, - map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, - size_t &ns_test); -bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, - const gsl_matrix *W, vector<int> &indicator_idv, - vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, - const double &maf_level, const double &miss_level, - const double &hwe_level, const double &r2_level, - size_t &ns_test); -bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv, - igzstream &infile, gsl_vector *geno, - double &geno_mean); -void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, - ifstream &infile, gsl_vector *geno, double &geno_mean); - -void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, - map<string, int> &mapID2num, const size_t k_mode, - bool &error, gsl_matrix *G); -void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, - map<string, int> &mapID2num, const size_t k_mode, - bool &error, gsl_matrix *G); -void ReadFile_eigenU (const string &file_u, bool &error, gsl_matrix *U); -void ReadFile_eigenD (const string &file_d, bool &error, gsl_vector *eval); - -bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin); -bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin); - -bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, - vector<int> &indicator_snp, gsl_matrix *UtX, - gsl_matrix *K, const bool calc_K); -bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, - vector<int> &indicator_snp, gsl_matrix *UtX, - gsl_matrix *K, const bool calc_K); -bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, - vector<int> &indicator_snp, - vector<vector<unsigned char> > &Xt, gsl_matrix *K, - const bool calc_K, const size_t ni_test, - const size_t ns_test); -bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, - vector<int> &indicator_snp, - vector<vector<unsigned char> > &Xt, gsl_matrix *K, - const bool calc_K, const size_t ni_test, - const size_t ns_test); - -bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, - map<string, double> &mapRS2est); - -bool CountFileLines (const string &file_input, size_t &n_lines); - -bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, - vector<SNPINFO> &snpInfo, size_t &ng_total); - -bool ReadHeader_io (const string &line, HEADER &header); -bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, - size_t &n_vc); -bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, - size_t &n_vc); - -bool ReadFile_catc (const string &file_cat, - map<string, vector<double> > &mapRS2catc, - size_t &n_cat); -bool ReadFile_mcatc (const string &file_mcat, map<string, - vector<double> > &mapRS2catc, size_t &n_cat); - -bool BimbamKin (const string &file_geno, const int display_pace, - const vector<int> &indicator_idv, - const vector<int> &indicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<SNPINFO> &snpInfo, const gsl_matrix *W, - gsl_matrix *matrix_kin, gsl_vector *vector_ns); -bool PlinkKin (const string &file_bed, const int display_pace, - const vector<int> &indicator_idv, - const vector<int> &indicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<SNPINFO> &snpInfo, - const gsl_matrix *W, gsl_matrix *matrix_kin, - gsl_vector *vector_ns); -bool MFILEKin (const size_t mfile_mode, const string &file_mfile, - const int display_pace, const vector<int> &indicator_idv, - const vector<vector<int> > &mindicator_snp, - const map<string, double> &mapRS2weight, - const map<string, size_t> &mapRS2cat, - const vector<vector<SNPINFO> > &msnpInfo, - const gsl_matrix *W, gsl_matrix *matrix_kin, - gsl_vector *vector_ns); - -bool ReadFile_wsnp (const string &file_wsnp, - map<string, double> &mapRS2double); -bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, - map<string, vector<double> > &mapRS2vector); - -void ReadFile_beta (const string &file_beta, - const map<string, size_t> &mapRS2cat, - const map<string, double> &mapRS2wA, - vector<size_t> &vec_cat, vector<size_t> &vec_ni, - vector<double> &vec_weight, vector<double> &vec_z2, - size_t &ni_total, size_t &ns_total, size_t &ns_test); -void ReadFile_beta (const string &file_beta, - const map<string, double> &mapRS2wA, - map<string, string> &mapRS2A1, - map<string, double> &mapRS2z); -void Calcq (const size_t n_block, const vector<size_t> &vec_cat, - const vector<size_t> &vec_ni, - const vector<double> &vec_weight, const vector<double> &vec_z2, - gsl_matrix *Vq, gsl_vector *q, gsl_vector *s); - -void ReadFile_study (const string &file_study, gsl_matrix *Vq, - gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni); -void ReadFile_ref (const string &file_ref, gsl_matrix *S_mat, - gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni); -void ReadFile_mstudy (const string &file_mstudy, gsl_matrix *Vq, - gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni); -void ReadFile_mref (const string &file_mref, gsl_matrix *S_mat, - gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni); +void ProgressBar(string str, double p, double total); +void ProgressBar(string str, double p, double total, double ratio); +std::istream &safeGetline(std::istream &is, std::string &t); + +bool ReadFile_snps(const string &file_snps, set<string> &setSnps); +bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps); +bool ReadFile_log(const string &file_log, double &pheno_mean); + +bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo); +bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno, + vector<vector<double>> &pheno, map<string, int> &mapID2num, + const vector<size_t> &p_column); + +bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt, + vector<vector<double>> &cvt, size_t &n_cvt); +bool ReadFile_anno(const string &file_bim, map<string, string> &mapRS2chr, + map<string, long int> &mapRS2bp, + map<string, double> &mapRS2cM); +bool ReadFile_pheno(const string &file_pheno, + vector<vector<int>> &indicator_pheno, + vector<vector<double>> &pheno, + const vector<size_t> &p_column); +bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv, + vector<double> &pheno, const int &p_column); + +bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, + const gsl_matrix *W, vector<int> &indicator_idv, + vector<int> &indicator_snp, const double &maf_level, + const double &miss_level, const double &hwe_level, + const double &r2_level, map<string, string> &mapRS2chr, + map<string, long int> &mapRS2bp, + map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, + size_t &ns_test); +bool ReadFile_bed(const string &file_bed, const set<string> &setSnps, + const gsl_matrix *W, vector<int> &indicator_idv, + vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, + const double &maf_level, const double &miss_level, + const double &hwe_level, const double &r2_level, + size_t &ns_test); +bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv, + igzstream &infile, gsl_vector *geno, double &geno_mean); +void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv, + ifstream &infile, gsl_vector *geno, double &geno_mean); + +void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv, + map<string, int> &mapID2num, const size_t k_mode, bool &error, + gsl_matrix *G); +void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv, + map<string, int> &mapID2num, const size_t k_mode, bool &error, + gsl_matrix *G); +void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U); +void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval); + +bool BimbamKin(const string &file_geno, vector<int> &indicator_snp, + const int k_mode, const int display_pace, + gsl_matrix *matrix_kin); +bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, + const int k_mode, const int display_pace, gsl_matrix *matrix_kin); + +bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, + vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, + const bool calc_K); +bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, + vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, + const bool calc_K); +bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, + vector<int> &indicator_snp, + vector<vector<unsigned char>> &Xt, gsl_matrix *K, + const bool calc_K, const size_t ni_test, + const size_t ns_test); +bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, + vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt, + gsl_matrix *K, const bool calc_K, const size_t ni_test, + const size_t ns_test); + +bool ReadFile_est(const string &file_est, const vector<size_t> &est_column, + map<string, double> &mapRS2est); + +bool CountFileLines(const string &file_input, size_t &n_lines); + +bool ReadFile_gene(const string &file_gene, vector<double> &vec_read, + vector<SNPINFO> &snpInfo, size_t &ng_total); + +bool ReadHeader_io(const string &line, HEADER &header); +bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat, + size_t &n_vc); +bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat, + size_t &n_vc); + +bool ReadFile_catc(const string &file_cat, + map<string, vector<double>> &mapRS2catc, size_t &n_cat); +bool ReadFile_mcatc(const string &file_mcat, + map<string, vector<double>> &mapRS2catc, size_t &n_cat); + +bool BimbamKin(const string &file_geno, const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns); +bool PlinkKin(const string &file_bed, const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns); +bool MFILEKin(const size_t mfile_mode, const string &file_mfile, + const int display_pace, const vector<int> &indicator_idv, + const vector<vector<int>> &mindicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<vector<SNPINFO>> &msnpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns); + +bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2double); +bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc, + map<string, vector<double>> &mapRS2vector); + +void ReadFile_beta(const string &file_beta, + const map<string, size_t> &mapRS2cat, + const map<string, double> &mapRS2wA, vector<size_t> &vec_cat, + vector<size_t> &vec_ni, vector<double> &vec_weight, + vector<double> &vec_z2, size_t &ni_total, size_t &ns_total, + size_t &ns_test); +void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA, + map<string, string> &mapRS2A1, map<string, double> &mapRS2z); +void Calcq(const size_t n_block, const vector<size_t> &vec_cat, + const vector<size_t> &vec_ni, const vector<double> &vec_weight, + const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q, + gsl_vector *s); + +void ReadFile_study(const string &file_study, gsl_matrix *Vq, gsl_vector *q_vec, + gsl_vector *s_vec, size_t &ni); +void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat, + gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni); +void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq, + gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni); +void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat, + gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni); // WJA added. -bool bgenKin (const string &file_geno, vector<int> &indicator_snp, - const int k_mode, const int display_pace, - gsl_matrix *matrix_kin); +bool bgenKin(const string &file_geno, vector<int> &indicator_snp, + const int k_mode, const int display_pace, gsl_matrix *matrix_kin); bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, - const gsl_matrix *W, vector<int> &indicator_idv, - vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, - const double &maf_level, const double &miss_level, - const double &hwe_level, const double &r2_level, - size_t &ns_test); + const gsl_matrix *W, vector<int> &indicator_idv, + vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, + const double &maf_level, const double &miss_level, + const double &hwe_level, const double &r2_level, + size_t &ns_test); bool ReadFile_sample(const string &file_sample, - vector<vector<int> > &indicator_pheno, - vector<vector<double> > &pheno, - const vector<size_t> &p_column, - vector<int> &indicator_cvt, - vector<vector<double> > &cvt, - size_t &n_cvt); + vector<vector<int>> &indicator_pheno, + vector<vector<double>> &pheno, + const vector<size_t> &p_column, vector<int> &indicator_cvt, + vector<vector<double>> &cvt, size_t &n_cvt); #endif - - - - - - - diff --git a/src/lapack.cpp b/src/lapack.cpp index 05b85f4..8f6e8ff 100644 --- a/src/lapack.cpp +++ b/src/lapack.cpp @@ -16,614 +16,612 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" #include <cmath> +#include <iostream> #include <vector> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" using namespace std; extern "C" void sgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, - float *ALPHA, float *A, int *LDA, float *B, int *LDB, - float *BETA, float *C, int *LDC); + float *ALPHA, float *A, int *LDA, float *B, int *LDB, + float *BETA, float *C, int *LDC); extern "C" void spotrf_(char *UPLO, int *N, float *A, int *LDA, int *INFO); extern "C" void spotrs_(char *UPLO, int *N, int *NRHS, float *A, int *LDA, - float *B, int *LDB, int *INFO); -extern "C" void ssyev_(char* JOBZ, char* UPLO, int *N, float *A, int *LDA, - float *W, float *WORK, int *LWORK, int *INFO); -extern "C" void ssyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N, - float *A, int *LDA, float *VL, float *VU, int *IL, - int *IU, float *ABSTOL, int *M, float *W, float *Z, - int *LDZ, int *ISUPPZ, float *WORK, int *LWORK, - int *IWORK, int *LIWORK, int *INFO); + float *B, int *LDB, int *INFO); +extern "C" void ssyev_(char *JOBZ, char *UPLO, int *N, float *A, int *LDA, + float *W, float *WORK, int *LWORK, int *INFO); +extern "C" void ssyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, float *A, + int *LDA, float *VL, float *VU, int *IL, int *IU, + float *ABSTOL, int *M, float *W, float *Z, int *LDZ, + int *ISUPPZ, float *WORK, int *LWORK, int *IWORK, + int *LIWORK, int *INFO); extern "C" double sdot_(int *N, float *DX, int *INCX, float *DY, int *INCY); extern "C" void dgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, - double *ALPHA, double *A, int *LDA, double *B, - int *LDB, double *BETA, double *C, int *LDC); + double *ALPHA, double *A, int *LDA, double *B, int *LDB, + double *BETA, double *C, int *LDC); extern "C" void dpotrf_(char *UPLO, int *N, double *A, int *LDA, int *INFO); extern "C" void dpotrs_(char *UPLO, int *N, int *NRHS, double *A, int *LDA, - double *B, int *LDB, int *INFO); -extern "C" void dsyev_(char* JOBZ, char* UPLO, int *N, double *A, int *LDA, - double *W, double *WORK, int *LWORK, int *INFO); -extern "C" void dsyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N, - double *A, int *LDA, double *VL, double *VU, - int *IL, int *IU, double *ABSTOL, int *M, - double *W, double *Z, int *LDZ, int *ISUPPZ, - double *WORK, int *LWORK, int *IWORK, - int *LIWORK, int *INFO); + double *B, int *LDB, int *INFO); +extern "C" void dsyev_(char *JOBZ, char *UPLO, int *N, double *A, int *LDA, + double *W, double *WORK, int *LWORK, int *INFO); +extern "C" void dsyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, double *A, + int *LDA, double *VL, double *VU, int *IL, int *IU, + double *ABSTOL, int *M, double *W, double *Z, int *LDZ, + int *ISUPPZ, double *WORK, int *LWORK, int *IWORK, + int *LIWORK, int *INFO); extern "C" double ddot_(int *N, double *DX, int *INCX, double *DY, int *INCY); // Cholesky decomposition, A is destroyed. -void lapack_float_cholesky_decomp (gsl_matrix_float *A) { - int N=A->size1, LDA=A->size1, INFO; - char UPLO='L'; - - if (N!=(int)A->size2) { - cout << "Matrix needs to be symmetric and same dimension in " << - "lapack_cholesky_decomp." << endl; - return; - } - - spotrf_(&UPLO, &N, A->data, &LDA, &INFO); - if (INFO!=0) { - cout << "Cholesky decomposition unsuccessful in " << - "lapack_cholesky_decomp." << endl; - return; - } - - return; +void lapack_float_cholesky_decomp(gsl_matrix_float *A) { + int N = A->size1, LDA = A->size1, INFO; + char UPLO = 'L'; + + if (N != (int)A->size2) { + cout << "Matrix needs to be symmetric and same dimension in " + << "lapack_cholesky_decomp." << endl; + return; + } + + spotrf_(&UPLO, &N, A->data, &LDA, &INFO); + if (INFO != 0) { + cout << "Cholesky decomposition unsuccessful in " + << "lapack_cholesky_decomp." << endl; + return; + } + + return; } // Cholesky decomposition, A is destroyed. -void lapack_cholesky_decomp (gsl_matrix *A) { - int N=A->size1, LDA=A->size1, INFO; - char UPLO='L'; - - if (N!=(int)A->size2) { - cout << "Matrix needs to be symmetric and same dimension in " << - "lapack_cholesky_decomp." << endl; - return; - } - - dpotrf_(&UPLO, &N, A->data, &LDA, &INFO); - if (INFO!=0) { - cout << "Cholesky decomposition unsuccessful in " << - "lapack_cholesky_decomp."<<endl; - return; - } - - return; +void lapack_cholesky_decomp(gsl_matrix *A) { + int N = A->size1, LDA = A->size1, INFO; + char UPLO = 'L'; + + if (N != (int)A->size2) { + cout << "Matrix needs to be symmetric and same dimension in " + << "lapack_cholesky_decomp." << endl; + return; + } + + dpotrf_(&UPLO, &N, A->data, &LDA, &INFO); + if (INFO != 0) { + cout << "Cholesky decomposition unsuccessful in " + << "lapack_cholesky_decomp." << endl; + return; + } + + return; } // Cholesky solve, A is decomposed. -void lapack_float_cholesky_solve (gsl_matrix_float *A, - const gsl_vector_float *b, - gsl_vector_float *x) { - int N=A->size1, NRHS=1, LDA=A->size1, LDB=b->size, INFO; - char UPLO='L'; - - - if (N!=(int)A->size2 || N!=LDB) { - cout << "Matrix needs to be symmetric and same dimension in " << - "lapack_cholesky_solve." << endl; - return; - } - - gsl_vector_float_memcpy (x, b); - spotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO); - if (INFO!=0) { - cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << - endl; - return; - } - - return; +void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b, + gsl_vector_float *x) { + int N = A->size1, NRHS = 1, LDA = A->size1, LDB = b->size, INFO; + char UPLO = 'L'; + + if (N != (int)A->size2 || N != LDB) { + cout << "Matrix needs to be symmetric and same dimension in " + << "lapack_cholesky_solve." << endl; + return; + } + + gsl_vector_float_memcpy(x, b); + spotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO); + if (INFO != 0) { + cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << endl; + return; + } + + return; } // Cholesky solve, A is decomposed. -void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b, - gsl_vector *x) { - int N=A->size1, NRHS=1, LDA=A->size1, LDB=b->size, INFO; - char UPLO='L'; - - if (N!=(int)A->size2 || N!=LDB) { - cout << "Matrix needs to be symmetric and same dimension in " << - "lapack_cholesky_solve." << endl; - return; - } - - gsl_vector_memcpy (x, b); - dpotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO); - if (INFO!=0) { - cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << - endl; - return; - } - - return; -} +void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x) { + int N = A->size1, NRHS = 1, LDA = A->size1, LDB = b->size, INFO; + char UPLO = 'L'; + + if (N != (int)A->size2 || N != LDB) { + cout << "Matrix needs to be symmetric and same dimension in " + << "lapack_cholesky_solve." << endl; + return; + } + + gsl_vector_memcpy(x, b); + dpotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO); + if (INFO != 0) { + cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << endl; + return; + } -void lapack_sgemm (char *TransA, char *TransB, float alpha, - const gsl_matrix_float *A, const gsl_matrix_float *B, - float beta, gsl_matrix_float *C) { - int M, N, K1, K2, LDA=A->size1, LDB=B->size1, LDC=C->size2; - - if (*TransA=='N' || *TransA=='n') {M=A->size1; K1=A->size2;} - else if (*TransA=='T' || *TransA=='t') {M=A->size2; K1=A->size1;} - else {cout<<"need 'N' or 'T' in lapack_sgemm"<<endl; return;} - - if (*TransB=='N' || *TransB=='n') {N=B->size2; K2=B->size1;} - else if (*TransB=='T' || *TransB=='t') {N=B->size1; K2=B->size2;} - else {cout<<"need 'N' or 'T' in lapack_sgemm"<<endl; return;} - - if (K1!=K2) { - cout<<"A and B not compatible in lapack_sgemm"<<endl; - return; - } - if (C->size1!=(size_t)M || C->size2!=(size_t)N) { - cout<<"C not compatible in lapack_sgemm"<<endl; - return; - } - - gsl_matrix_float *A_t=gsl_matrix_float_alloc (A->size2, A->size1); - gsl_matrix_float_transpose_memcpy (A_t, A); - gsl_matrix_float *B_t=gsl_matrix_float_alloc (B->size2, B->size1); - gsl_matrix_float_transpose_memcpy (B_t, B); - gsl_matrix_float *C_t=gsl_matrix_float_alloc (C->size2, C->size1); - gsl_matrix_float_transpose_memcpy (C_t, C); - - sgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, - B_t->data, &LDB, &beta, C_t->data, &LDC); - gsl_matrix_float_transpose_memcpy (C, C_t); - - gsl_matrix_float_free (A_t); - gsl_matrix_float_free (B_t); - gsl_matrix_float_free (C_t); - return; + return; } +void lapack_sgemm(char *TransA, char *TransB, float alpha, + const gsl_matrix_float *A, const gsl_matrix_float *B, + float beta, gsl_matrix_float *C) { + int M, N, K1, K2, LDA = A->size1, LDB = B->size1, LDC = C->size2; + + if (*TransA == 'N' || *TransA == 'n') { + M = A->size1; + K1 = A->size2; + } else if (*TransA == 'T' || *TransA == 't') { + M = A->size2; + K1 = A->size1; + } else { + cout << "need 'N' or 'T' in lapack_sgemm" << endl; + return; + } + if (*TransB == 'N' || *TransB == 'n') { + N = B->size2; + K2 = B->size1; + } else if (*TransB == 'T' || *TransB == 't') { + N = B->size1; + K2 = B->size2; + } else { + cout << "need 'N' or 'T' in lapack_sgemm" << endl; + return; + } -void lapack_dgemm (char *TransA, char *TransB, double alpha, - const gsl_matrix *A, const gsl_matrix *B, - double beta, gsl_matrix *C) { - int M, N, K1, K2, LDA=A->size1, LDB=B->size1, LDC=C->size2; + if (K1 != K2) { + cout << "A and B not compatible in lapack_sgemm" << endl; + return; + } + if (C->size1 != (size_t)M || C->size2 != (size_t)N) { + cout << "C not compatible in lapack_sgemm" << endl; + return; + } - if (*TransA=='N' || *TransA=='n') {M=A->size1; K1=A->size2;} - else if (*TransA=='T' || *TransA=='t') {M=A->size2; K1=A->size1;} - else {cout<<"need 'N' or 'T' in lapack_dgemm"<<endl; return;} + gsl_matrix_float *A_t = gsl_matrix_float_alloc(A->size2, A->size1); + gsl_matrix_float_transpose_memcpy(A_t, A); + gsl_matrix_float *B_t = gsl_matrix_float_alloc(B->size2, B->size1); + gsl_matrix_float_transpose_memcpy(B_t, B); + gsl_matrix_float *C_t = gsl_matrix_float_alloc(C->size2, C->size1); + gsl_matrix_float_transpose_memcpy(C_t, C); + + sgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB, + &beta, C_t->data, &LDC); + gsl_matrix_float_transpose_memcpy(C, C_t); + + gsl_matrix_float_free(A_t); + gsl_matrix_float_free(B_t); + gsl_matrix_float_free(C_t); + return; +} - if (*TransB=='N' || *TransB=='n') {N=B->size2; K2=B->size1;} - else if (*TransB=='T' || *TransB=='t') {N=B->size1; K2=B->size2;} - else {cout<<"need 'N' or 'T' in lapack_dgemm"<<endl; return;} +void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A, + const gsl_matrix *B, double beta, gsl_matrix *C) { + int M, N, K1, K2, LDA = A->size1, LDB = B->size1, LDC = C->size2; + + if (*TransA == 'N' || *TransA == 'n') { + M = A->size1; + K1 = A->size2; + } else if (*TransA == 'T' || *TransA == 't') { + M = A->size2; + K1 = A->size1; + } else { + cout << "need 'N' or 'T' in lapack_dgemm" << endl; + return; + } + + if (*TransB == 'N' || *TransB == 'n') { + N = B->size2; + K2 = B->size1; + } else if (*TransB == 'T' || *TransB == 't') { + N = B->size1; + K2 = B->size2; + } else { + cout << "need 'N' or 'T' in lapack_dgemm" << endl; + return; + } - if (K1!=K2) { - cout << "A and B not compatible in lapack_dgemm"<<endl; - return; - } - if (C->size1!=(size_t)M || C->size2!=(size_t)N) { - cout<<"C not compatible in lapack_dgemm"<<endl; - return; - } + if (K1 != K2) { + cout << "A and B not compatible in lapack_dgemm" << endl; + return; + } + if (C->size1 != (size_t)M || C->size2 != (size_t)N) { + cout << "C not compatible in lapack_dgemm" << endl; + return; + } - gsl_matrix *A_t=gsl_matrix_alloc (A->size2, A->size1); - gsl_matrix_transpose_memcpy (A_t, A); - gsl_matrix *B_t=gsl_matrix_alloc (B->size2, B->size1); - gsl_matrix_transpose_memcpy (B_t, B); - gsl_matrix *C_t=gsl_matrix_alloc (C->size2, C->size1); - gsl_matrix_transpose_memcpy (C_t, C); + gsl_matrix *A_t = gsl_matrix_alloc(A->size2, A->size1); + gsl_matrix_transpose_memcpy(A_t, A); + gsl_matrix *B_t = gsl_matrix_alloc(B->size2, B->size1); + gsl_matrix_transpose_memcpy(B_t, B); + gsl_matrix *C_t = gsl_matrix_alloc(C->size2, C->size1); + gsl_matrix_transpose_memcpy(C_t, C); - dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, - B_t->data, &LDB, &beta, C_t->data, &LDC); + dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB, + &beta, C_t->data, &LDC); - gsl_matrix_transpose_memcpy (C, C_t); + gsl_matrix_transpose_memcpy(C, C_t); - gsl_matrix_free (A_t); - gsl_matrix_free (B_t); - gsl_matrix_free (C_t); - return; + gsl_matrix_free(A_t); + gsl_matrix_free(B_t); + gsl_matrix_free(C_t); + return; } // Eigen value decomposition, matrix A is destroyed, float seems to // have problem with large matrices (in mac). -void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval, - gsl_matrix_float *evec, - const size_t flag_largematrix) { - if (flag_largematrix==1) { - int N=A->size1, LDA=A->size1, INFO, LWORK=-1; - char JOBZ='V', UPLO='L'; - - if (N!=(int)A->size2 || N!=(int)eval->size) { - cout << "Matrix needs to be symmetric and same " << - "dimension in lapack_eigen_symmv."<<endl; - return; - } - - LWORK=3*N; - float *WORK=new float [LWORK]; - ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, - &LWORK, &INFO); - if (INFO!=0) { - cout << "Eigen decomposition unsuccessful in " << - "lapack_eigen_symmv."<<endl; - return; - } - - gsl_matrix_float_view A_sub = - gsl_matrix_float_submatrix(A, 0, 0, N, N); - gsl_matrix_float_memcpy (evec, &A_sub.matrix); - gsl_matrix_float_transpose (evec); - - delete [] WORK; - } else { - int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO, - LWORK=-1, LIWORK=-1; - char JOBZ='V', UPLO='L', RANGE='A'; - float ABSTOL=1.0E-7; - - // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'. - float VL=0.0, VU=0.0; - int IL=0, IU=0, M; - - if (N!=(int)A->size2 || N!=(int)eval->size) { - cout << "Matrix needs to be symmetric and same " << - "dimension in lapack_float_eigen_symmv." << endl; - return; - } - - int *ISUPPZ=new int [2*N]; - - float WORK_temp[1]; - int IWORK_temp[1]; - ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, - &VU, &IL, &IU, &ABSTOL, &M, eval->data, - evec->data, &LDZ, ISUPPZ, WORK_temp, &LWORK, - IWORK_temp, &LIWORK, &INFO); - if (INFO!=0) { - cout << "Work space estimate unsuccessful in " << - "lapack_float_eigen_symmv." << endl; - return; - } - LWORK=(int)WORK_temp[0]; LIWORK=(int)IWORK_temp[0]; - - float *WORK=new float [LWORK]; - int *IWORK=new int [LIWORK]; - - ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, - &VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data, - &LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO); - if (INFO!=0) { - cout << "Eigen decomposition unsuccessful in " << - "lapack_float_eigen_symmv." << endl; - return; - } - - gsl_matrix_float_transpose (evec); - - delete [] ISUPPZ; - delete [] WORK; - delete [] IWORK; - } - - - return; -} - +void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval, + gsl_matrix_float *evec, + const size_t flag_largematrix) { + if (flag_largematrix == 1) { + int N = A->size1, LDA = A->size1, INFO, LWORK = -1; + char JOBZ = 'V', UPLO = 'L'; + + if (N != (int)A->size2 || N != (int)eval->size) { + cout << "Matrix needs to be symmetric and same " + << "dimension in lapack_eigen_symmv." << endl; + return; + } + + LWORK = 3 * N; + float *WORK = new float[LWORK]; + ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, &LWORK, &INFO); + if (INFO != 0) { + cout << "Eigen decomposition unsuccessful in " + << "lapack_eigen_symmv." << endl; + return; + } + + gsl_matrix_float_view A_sub = gsl_matrix_float_submatrix(A, 0, 0, N, N); + gsl_matrix_float_memcpy(evec, &A_sub.matrix); + gsl_matrix_float_transpose(evec); + + delete[] WORK; + } else { + int N = A->size1, LDA = A->size1, LDZ = A->size1, INFO, LWORK = -1, + LIWORK = -1; + char JOBZ = 'V', UPLO = 'L', RANGE = 'A'; + float ABSTOL = 1.0E-7; + + // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'. + float VL = 0.0, VU = 0.0; + int IL = 0, IU = 0, M; + + if (N != (int)A->size2 || N != (int)eval->size) { + cout << "Matrix needs to be symmetric and same " + << "dimension in lapack_float_eigen_symmv." << endl; + return; + } + + int *ISUPPZ = new int[2 * N]; + + float WORK_temp[1]; + int IWORK_temp[1]; + ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU, + &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK_temp, + &LWORK, IWORK_temp, &LIWORK, &INFO); + if (INFO != 0) { + cout << "Work space estimate unsuccessful in " + << "lapack_float_eigen_symmv." << endl; + return; + } + LWORK = (int)WORK_temp[0]; + LIWORK = (int)IWORK_temp[0]; + + float *WORK = new float[LWORK]; + int *IWORK = new int[LIWORK]; + + ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU, + &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK, &LWORK, + IWORK, &LIWORK, &INFO); + if (INFO != 0) { + cout << "Eigen decomposition unsuccessful in " + << "lapack_float_eigen_symmv." << endl; + return; + } + + gsl_matrix_float_transpose(evec); + + delete[] ISUPPZ; + delete[] WORK; + delete[] IWORK; + } + return; +} // Eigenvalue decomposition, matrix A is destroyed. -void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, - const size_t flag_largematrix) { - if (flag_largematrix==1) { - int N=A->size1, LDA=A->size1, INFO, LWORK=-1; - char JOBZ='V', UPLO='L'; - - if (N!=(int)A->size2 || N!=(int)eval->size) { - cout << "Matrix needs to be symmetric and same " << - "dimension in lapack_eigen_symmv." << endl; - return; - } - - LWORK=3*N; - double *WORK=new double [LWORK]; - dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, - &LWORK, &INFO); - if (INFO!=0) { - cout<<"Eigen decomposition unsuccessful in " << - "lapack_eigen_symmv." << endl; - return; - } - - gsl_matrix_view A_sub=gsl_matrix_submatrix(A, 0, 0, N, N); - gsl_matrix_memcpy (evec, &A_sub.matrix); - gsl_matrix_transpose (evec); - - delete [] WORK; - } else { - int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO; - int LWORK=-1, LIWORK=-1; - char JOBZ='V', UPLO='L', RANGE='A'; - double ABSTOL=1.0E-7; - - // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'. - double VL=0.0, VU=0.0; - int IL=0, IU=0, M; - - if (N!=(int)A->size2 || N!=(int)eval->size) { - cout << "Matrix needs to be symmetric and same " << - "dimension in lapack_eigen_symmv." << endl; - return; - } - - int *ISUPPZ=new int [2*N]; - - double WORK_temp[1]; - int IWORK_temp[1]; - - dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, - &IL, &IU, &ABSTOL, &M, eval->data, evec->data, - &LDZ, ISUPPZ, WORK_temp, &LWORK, IWORK_temp, - &LIWORK, &INFO); - if (INFO!=0) { - cout << "Work space estimate unsuccessful in " << - "lapack_eigen_symmv." << endl; - return; - } - LWORK=(int)WORK_temp[0]; LIWORK=(int)IWORK_temp[0]; - - double *WORK=new double [LWORK]; - int *IWORK=new int [LIWORK]; - - dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, - &IL, &IU, &ABSTOL, &M, eval->data, evec->data, - &LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO); - if (INFO!=0) { - cout << "Eigen decomposition unsuccessful in " << - "lapack_eigen_symmv." << endl; - return; - } - - gsl_matrix_transpose (evec); - - delete [] ISUPPZ; - delete [] WORK; - delete [] IWORK; - } - - return; +void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, + const size_t flag_largematrix) { + if (flag_largematrix == 1) { + int N = A->size1, LDA = A->size1, INFO, LWORK = -1; + char JOBZ = 'V', UPLO = 'L'; + + if (N != (int)A->size2 || N != (int)eval->size) { + cout << "Matrix needs to be symmetric and same " + << "dimension in lapack_eigen_symmv." << endl; + return; + } + + LWORK = 3 * N; + double *WORK = new double[LWORK]; + dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, &LWORK, &INFO); + if (INFO != 0) { + cout << "Eigen decomposition unsuccessful in " + << "lapack_eigen_symmv." << endl; + return; + } + + gsl_matrix_view A_sub = gsl_matrix_submatrix(A, 0, 0, N, N); + gsl_matrix_memcpy(evec, &A_sub.matrix); + gsl_matrix_transpose(evec); + + delete[] WORK; + } else { + int N = A->size1, LDA = A->size1, LDZ = A->size1, INFO; + int LWORK = -1, LIWORK = -1; + char JOBZ = 'V', UPLO = 'L', RANGE = 'A'; + double ABSTOL = 1.0E-7; + + // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'. + double VL = 0.0, VU = 0.0; + int IL = 0, IU = 0, M; + + if (N != (int)A->size2 || N != (int)eval->size) { + cout << "Matrix needs to be symmetric and same " + << "dimension in lapack_eigen_symmv." << endl; + return; + } + + int *ISUPPZ = new int[2 * N]; + + double WORK_temp[1]; + int IWORK_temp[1]; + + dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU, + &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK_temp, + &LWORK, IWORK_temp, &LIWORK, &INFO); + if (INFO != 0) { + cout << "Work space estimate unsuccessful in " + << "lapack_eigen_symmv." << endl; + return; + } + LWORK = (int)WORK_temp[0]; + LIWORK = (int)IWORK_temp[0]; + + double *WORK = new double[LWORK]; + int *IWORK = new int[LIWORK]; + + dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU, + &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK, &LWORK, + IWORK, &LIWORK, &INFO); + if (INFO != 0) { + cout << "Eigen decomposition unsuccessful in " + << "lapack_eigen_symmv." << endl; + return; + } + + gsl_matrix_transpose(evec); + + delete[] ISUPPZ; + delete[] WORK; + delete[] IWORK; + } + + return; } // DO NOT set eigenvalues to be positive. -double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, - const size_t flag_largematrix) { - lapack_eigen_symmv (G, eval, U, flag_largematrix); - - // Calculate track_G=mean(diag(G)). - double d=0.0; - for (size_t i=0; i<eval->size; ++i) { - d+=gsl_vector_get(eval, i); - } - d/=(double)eval->size; - - return d; -} +double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, + const size_t flag_largematrix) { + lapack_eigen_symmv(G, eval, U, flag_largematrix); + + // Calculate track_G=mean(diag(G)). + double d = 0.0; + for (size_t i = 0; i < eval->size; ++i) { + d += gsl_vector_get(eval, i); + } + d /= (double)eval->size; + return d; +} // DO NOT set eigen values to be positive. -double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, - gsl_vector_float *eval, const size_t flag_largematrix) { - lapack_float_eigen_symmv (G, eval, U, flag_largematrix); - - // Calculate track_G=mean(diag(G)). - double d = 0.0; - for (size_t i=0; i<eval->size; ++i) { - d+=gsl_vector_float_get(eval, i); - } - d/=(double)eval->size; - - return d; -} +double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U, + gsl_vector_float *eval, const size_t flag_largematrix) { + lapack_float_eigen_symmv(G, eval, U, flag_largematrix); + + // Calculate track_G=mean(diag(G)). + double d = 0.0; + for (size_t i = 0; i < eval->size; ++i) { + d += gsl_vector_float_get(eval, i); + } + d /= (double)eval->size; + return d; +} double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) { - double logdet_O=0.0; + double logdet_O = 0.0; - lapack_cholesky_decomp(Omega); - for (size_t i=0; i<Omega->size1; ++i) { - logdet_O+=log(gsl_matrix_get (Omega, i, i)); - } - logdet_O*=2.0; - lapack_cholesky_solve(Omega, Xty, OiXty); + lapack_cholesky_decomp(Omega); + for (size_t i = 0; i < Omega->size1; ++i) { + logdet_O += log(gsl_matrix_get(Omega, i, i)); + } + logdet_O *= 2.0; + lapack_cholesky_solve(Omega, Xty, OiXty); - return logdet_O; + return logdet_O; } - double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, - gsl_vector_float *OiXty) { - double logdet_O=0.0; + gsl_vector_float *OiXty) { + double logdet_O = 0.0; - lapack_float_cholesky_decomp(Omega); - for (size_t i=0; i<Omega->size1; ++i) { - logdet_O+=log(gsl_matrix_float_get (Omega, i, i)); - } - logdet_O*=2.0; - lapack_float_cholesky_solve(Omega, Xty, OiXty); + lapack_float_cholesky_decomp(Omega); + for (size_t i = 0; i < Omega->size1; ++i) { + logdet_O += log(gsl_matrix_float_get(Omega, i, i)); + } + logdet_O *= 2.0; + lapack_float_cholesky_solve(Omega, Xty, OiXty); - return logdet_O; + return logdet_O; } - // LU decomposition. -void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum) { - gsl_linalg_LU_decomp (LU, p, signum); - return; +void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) { + gsl_linalg_LU_decomp(LU, p, signum); + return; } -void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum) { - gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2); - - // Copy float matrix to double. - for (size_t i=0; i<LU->size1; i++) { - for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_set (LU_double, i, j, - gsl_matrix_float_get(LU, i, j)); - } - } - - // LU decomposition. - gsl_linalg_LU_decomp (LU_double, p, signum); - - // Copy float matrix to double. - for (size_t i=0; i<LU->size1; i++) { - for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_float_set (LU, i, j, - gsl_matrix_get(LU_double, i, j)); - } - } - - // Free matrix. - gsl_matrix_free (LU_double); - return; -} +void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) { + gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2); + + // Copy float matrix to double. + for (size_t i = 0; i < LU->size1; i++) { + for (size_t j = 0; j < LU->size2; j++) { + gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j)); + } + } + // LU decomposition. + gsl_linalg_LU_decomp(LU_double, p, signum); + + // Copy float matrix to double. + for (size_t i = 0; i < LU->size1; i++) { + for (size_t j = 0; j < LU->size2; j++) { + gsl_matrix_float_set(LU, i, j, gsl_matrix_get(LU_double, i, j)); + } + } + + // Free matrix. + gsl_matrix_free(LU_double); + return; +} // LU invert. -void LUInvert (const gsl_matrix *LU, const gsl_permutation *p, - gsl_matrix *inverse) { - gsl_linalg_LU_invert (LU, p, inverse); - return; +void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, + gsl_matrix *inverse) { + gsl_linalg_LU_invert(LU, p, inverse); + return; } -void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p, - gsl_matrix_float *inverse) { - gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2); - gsl_matrix *inverse_double=gsl_matrix_alloc (inverse->size1, - inverse->size2); - - // Copy float matrix to double. - for (size_t i=0; i<LU->size1; i++) { - for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_set (LU_double, i, j, - gsl_matrix_float_get(LU, i, j)); - } - } - - // LU decomposition. - gsl_linalg_LU_invert (LU_double, p, inverse_double); - - // Copy float matrix to double. - for (size_t i=0; i<inverse->size1; i++) { - for (size_t j=0; j<inverse->size2; j++) { - gsl_matrix_float_set (inverse, i, j, - gsl_matrix_get(inverse_double, - i, j)); - } - } - - // Free matrix. - gsl_matrix_free (LU_double); - gsl_matrix_free (inverse_double); - return; +void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p, + gsl_matrix_float *inverse) { + gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2); + gsl_matrix *inverse_double = gsl_matrix_alloc(inverse->size1, inverse->size2); + + // Copy float matrix to double. + for (size_t i = 0; i < LU->size1; i++) { + for (size_t j = 0; j < LU->size2; j++) { + gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j)); + } + } + + // LU decomposition. + gsl_linalg_LU_invert(LU_double, p, inverse_double); + + // Copy float matrix to double. + for (size_t i = 0; i < inverse->size1; i++) { + for (size_t j = 0; j < inverse->size2; j++) { + gsl_matrix_float_set(inverse, i, j, gsl_matrix_get(inverse_double, i, j)); + } + } + + // Free matrix. + gsl_matrix_free(LU_double); + gsl_matrix_free(inverse_double); + return; } // LU lndet. -double LULndet (gsl_matrix *LU) { - double d; - d=gsl_linalg_LU_lndet (LU); - return d; +double LULndet(gsl_matrix *LU) { + double d; + d = gsl_linalg_LU_lndet(LU); + return d; } -double LULndet (gsl_matrix_float *LU) { - gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2); - double d; +double LULndet(gsl_matrix_float *LU) { + gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2); + double d; - // Copy float matrix to double. - for (size_t i=0; i<LU->size1; i++) { - for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_set (LU_double, i, j, gsl_matrix_float_get(LU, i, j)); - } - } + // Copy float matrix to double. + for (size_t i = 0; i < LU->size1; i++) { + for (size_t j = 0; j < LU->size2; j++) { + gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j)); + } + } - // LU decomposition. - d=gsl_linalg_LU_lndet (LU_double); + // LU decomposition. + d = gsl_linalg_LU_lndet(LU_double); - // Free matrix - gsl_matrix_free (LU_double); - return d; + // Free matrix + gsl_matrix_free(LU_double); + return d; } - // LU solve. -void LUSolve (const gsl_matrix *LU, const gsl_permutation *p, - const gsl_vector *b, gsl_vector *x) { - gsl_linalg_LU_solve (LU, p, b, x); - return; +void LUSolve(const gsl_matrix *LU, const gsl_permutation *p, + const gsl_vector *b, gsl_vector *x) { + gsl_linalg_LU_solve(LU, p, b, x); + return; } -void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p, - const gsl_vector_float *b, gsl_vector_float *x) { - gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2); - gsl_vector *b_double=gsl_vector_alloc (b->size); - gsl_vector *x_double=gsl_vector_alloc (x->size); - - // Copy float matrix to double. - for (size_t i=0; i<LU->size1; i++) { - for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_set (LU_double, i, j, - gsl_matrix_float_get(LU, i, j)); - } - } - - for (size_t i=0; i<b->size; i++) { - gsl_vector_set (b_double, i, gsl_vector_float_get(b, i)); - } - - for (size_t i=0; i<x->size; i++) { - gsl_vector_set (x_double, i, gsl_vector_float_get(x, i)); - } - - // LU decomposition. - gsl_linalg_LU_solve (LU_double, p, b_double, x_double); - - // Copy float matrix to double. - for (size_t i=0; i<x->size; i++) { - gsl_vector_float_set (x, i, gsl_vector_get(x_double, i)); - } - - // Free matrix. - gsl_matrix_free (LU_double); - gsl_vector_free (b_double); - gsl_vector_free (x_double); - return; -} +void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p, + const gsl_vector_float *b, gsl_vector_float *x) { + gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2); + gsl_vector *b_double = gsl_vector_alloc(b->size); + gsl_vector *x_double = gsl_vector_alloc(x->size); + + // Copy float matrix to double. + for (size_t i = 0; i < LU->size1; i++) { + for (size_t j = 0; j < LU->size2; j++) { + gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j)); + } + } + for (size_t i = 0; i < b->size; i++) { + gsl_vector_set(b_double, i, gsl_vector_float_get(b, i)); + } + + for (size_t i = 0; i < x->size; i++) { + gsl_vector_set(x_double, i, gsl_vector_float_get(x, i)); + } + + // LU decomposition. + gsl_linalg_LU_solve(LU_double, p, b_double, x_double); + + // Copy float matrix to double. + for (size_t i = 0; i < x->size; i++) { + gsl_vector_float_set(x, i, gsl_vector_get(x_double, i)); + } + + // Free matrix. + gsl_matrix_free(LU_double); + gsl_vector_free(b_double); + gsl_vector_free(x_double); + return; +} bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) { - bool flag=false; - int incx=1; - int incy=1; - int n=(int)x.size(); - if (x.size()==y.size()) { - v=ddot_(&n, &x[0], &incx, &y[0], &incy); - flag=true; + bool flag = false; + int incx = 1; + int incy = 1; + int n = (int)x.size(); + if (x.size() == y.size()) { + v = ddot_(&n, &x[0], &incx, &y[0], &incy); + flag = true; } return flag; } - bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) { - bool flag=false; - int incx=1; - int incy=1; - int n=(int)x.size(); - if (x.size()==y.size()) { - v=sdot_(&n, &x[0], &incx, &y[0], &incy); - flag=true; + bool flag = false; + int incx = 1; + int incy = 1; + int n = (int)x.size(); + if (x.size() == y.size()) { + v = sdot_(&n, &x[0], &incx, &y[0], &incy); + flag = true; } return flag; diff --git a/src/lapack.h b/src/lapack.h index 5e1db35..ff02b96 100644 --- a/src/lapack.h +++ b/src/lapack.h @@ -23,45 +23,43 @@ using namespace std; -void lapack_float_cholesky_decomp (gsl_matrix_float *A); -void lapack_cholesky_decomp (gsl_matrix *A); -void lapack_float_cholesky_solve (gsl_matrix_float *A, - const gsl_vector_float *b, - gsl_vector_float *x); -void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b, gsl_vector *x); -void lapack_sgemm (char *TransA, char *TransB, float alpha, - const gsl_matrix_float *A, const gsl_matrix_float *B, - float beta, gsl_matrix_float *C); -void lapack_dgemm (char *TransA, char *TransB, double alpha, - const gsl_matrix *A, const gsl_matrix *B, - double beta, gsl_matrix *C); -void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval, - gsl_matrix_float *evec, - const size_t flag_largematrix); -void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, - const size_t flag_largematrix); +void lapack_float_cholesky_decomp(gsl_matrix_float *A); +void lapack_cholesky_decomp(gsl_matrix *A); +void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b, + gsl_vector_float *x); +void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x); +void lapack_sgemm(char *TransA, char *TransB, float alpha, + const gsl_matrix_float *A, const gsl_matrix_float *B, + float beta, gsl_matrix_float *C); +void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A, + const gsl_matrix *B, double beta, gsl_matrix *C); +void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval, + gsl_matrix_float *evec, + const size_t flag_largematrix); +void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, + const size_t flag_largematrix); -double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, - const size_t flag_largematrix); -double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, - gsl_vector_float *eval, const size_t flag_largematrix); +double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, + const size_t flag_largematrix); +double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U, + gsl_vector_float *eval, const size_t flag_largematrix); double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty); double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, - gsl_vector_float *OiXty); + gsl_vector_float *OiXty); -void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum); -void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum); -void LUInvert (const gsl_matrix *LU, const gsl_permutation *p, - gsl_matrix *inverse); -void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p, - gsl_matrix_float *inverse); -double LULndet (gsl_matrix *LU); -double LULndet (gsl_matrix_float *LU); -void LUSolve (const gsl_matrix *LU, const gsl_permutation *p, - const gsl_vector *b, gsl_vector *x); -void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p, - const gsl_vector_float *b, gsl_vector_float *x); +void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum); +void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum); +void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, + gsl_matrix *inverse); +void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p, + gsl_matrix_float *inverse); +double LULndet(gsl_matrix *LU); +double LULndet(gsl_matrix_float *LU); +void LUSolve(const gsl_matrix *LU, const gsl_permutation *p, + const gsl_vector *b, gsl_vector *x); +void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p, + const gsl_vector_float *b, gsl_vector_float *x); bool lapack_ddot(vector<double> &x, vector<double> &y, double &v); bool lapack_sdot(vector<float> &x, vector<float> &y, double &v); diff --git a/src/ldr.cpp b/src/ldr.cpp index f0a1b37..3554efa 100644 --- a/src/ldr.cpp +++ b/src/ldr.cpp @@ -16,67 +16,65 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> #include <fstream> +#include <iostream> #include <sstream> -#include <iomanip> +#include <algorithm> #include <cmath> +#include <cstring> +#include <ctime> +#include <iomanip> #include <iostream> #include <stdio.h> #include <stdlib.h> -#include <ctime> -#include <cstring> -#include <algorithm> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" +#include "Eigen/Dense" #include "gsl/gsl_blas.h" +#include "gsl/gsl_cdf.h" #include "gsl/gsl_eigen.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" #include "gsl/gsl_randist.h" -#include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" -#include "Eigen/Dense" +#include "gsl/gsl_vector.h" #include "lapack.h" -#include "param.h" #include "ldr.h" #include "lm.h" #include "mathfunc.h" +#include "param.h" using namespace std; using namespace Eigen; -void LDR::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; +void LDR::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; + d_pace = cPar.d_pace; - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_out = cPar.file_out; + path_out = cPar.path_out; - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ni_test=cPar.ni_test; - ns_test=cPar.ns_test; - n_cvt=cPar.n_cvt; + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ni_test = cPar.ni_test; + ns_test = cPar.ns_test; + n_cvt = cPar.n_cvt; - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; + indicator_idv = cPar.indicator_idv; + indicator_snp = cPar.indicator_snp; + snpInfo = cPar.snpInfo; - return; + return; } -void LDR::CopyToParam (PARAM &cPar) { - return; -} +void LDR::CopyToParam(PARAM &cPar) { return; } -//X is a p by n matrix. -void LDR::VB (const vector<vector<unsigned char> > &Xt, - const gsl_matrix *W_gsl, const gsl_vector *y_gsl) { +// X is a p by n matrix. +void LDR::VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl, + const gsl_vector *y_gsl) { // Save gsl_vector and gsl_matrix into Eigen library formats. MatrixXd W(W_gsl->size1, W_gsl->size2); @@ -84,20 +82,21 @@ void LDR::VB (const vector<vector<unsigned char> > &Xt, VectorXd x_col(y_gsl->size); double d; - for (size_t i=0; i<W_gsl->size1; i++) { - d=gsl_vector_get(y_gsl, i); - y(i)=d; - for (size_t j=0; j<W_gsl->size2; j++) { - W(i,j)=gsl_matrix_get(W_gsl, i, j); + for (size_t i = 0; i < W_gsl->size1; i++) { + d = gsl_vector_get(y_gsl, i); + y(i) = d; + for (size_t j = 0; j < W_gsl->size2; j++) { + W(i, j) = gsl_matrix_get(W_gsl, i, j); } } // Initial VB values by lm. - cout<<indicator_snp[0]<<" "<<indicator_snp[1]<<" "<<indicator_snp[2]<<endl; - uchar_matrix_get_row (Xt, 0, x_col); + cout << indicator_snp[0] << " " << indicator_snp[1] << " " << indicator_snp[2] + << endl; + uchar_matrix_get_row(Xt, 0, x_col); - for (size_t j=0; j<10; j++) { - cout<<x_col(j)<<endl; + for (size_t j = 0; j < 10; j++) { + cout << x_col(j) << endl; } // Run VB iterations. @@ -19,53 +19,51 @@ #ifndef __LDR_H__ #define __LDR_H__ -#include <vector> -#include <map> -#include <gsl/gsl_rng.h> -#include <gsl/gsl_randist.h> #include "param.h" +#include <gsl/gsl_randist.h> +#include <gsl/gsl_rng.h> +#include <map> +#include <vector> using namespace std; class LDR { public: - // IO-related parameters. - int a_mode; - size_t d_pace; + // IO-related parameters. + int a_mode; + size_t d_pace; - string file_bfile; - string file_geno; - string file_out; - string path_out; + string file_bfile; + string file_geno; + string file_out; + string path_out; - // Summary statistics. - size_t ni_total, ns_total; // Total number of individuals & SNPs. - size_t ni_test, ns_test; // Number of individuals & SNPs used - // for analysis - size_t n_cvt; // Number of covariates. + // Summary statistics. + size_t ni_total, ns_total; // Total number of individuals & SNPs. + size_t ni_test, ns_test; // Number of individuals & SNPs used + // for analysis + size_t n_cvt; // Number of covariates. - // Indicator for individuals (phenotypes): 0 missing, 1 - // available for analysis. - vector<int> indicator_idv; + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; - vector<SNPINFO> snpInfo; // Record SNP information. + vector<SNPINFO> snpInfo; // Record SNP information. - // Not included in PARAM. - gsl_rng *gsl_r; + // Not included in PARAM. + gsl_rng *gsl_r; - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); - void VB(const vector<vector<unsigned char> > &Xt, - const gsl_matrix *W_gsl, const gsl_vector *y_gsl); + void VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl, + const gsl_vector *y_gsl); }; #endif - - @@ -16,28 +16,28 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> #include <fstream> +#include <iostream> #include <sstream> -#include <iomanip> +#include <assert.h> +#include <bitset> #include <cmath> +#include <cstring> +#include <iomanip> #include <iostream> #include <stdio.h> #include <stdlib.h> -#include <assert.h> -#include <bitset> -#include <cstring> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" #include "gsl/gsl_cdf.h" -#include "gsl/gsl_roots.h" -#include "gsl/gsl_min.h" #include "gsl/gsl_integration.h" +#include "gsl/gsl_min.h" +#include "gsl/gsl_roots.h" #include "eigenlib.h" #include "gzstream.h" @@ -46,783 +46,835 @@ using namespace std; -void LM::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; +void LM::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; + d_pace = cPar.d_pace; - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; - file_gene=cPar.file_gene; - // WJA added - file_oxford=cPar.file_oxford; + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_out = cPar.file_out; + path_out = cPar.path_out; + file_gene = cPar.file_gene; + // WJA added + file_oxford = cPar.file_oxford; - time_opt=0.0; + time_opt = 0.0; - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ni_test=cPar.ni_test; - ns_test=cPar.ns_test; - n_cvt=cPar.n_cvt; + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ni_test = cPar.ni_test; + ns_test = cPar.ns_test; + n_cvt = cPar.n_cvt; - ng_total=cPar.ng_total; - ng_test=0; + ng_total = cPar.ng_total; + ng_test = 0; - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; + indicator_idv = cPar.indicator_idv; + indicator_snp = cPar.indicator_snp; + snpInfo = cPar.snpInfo; - return; + return; } -void LM::CopyToParam (PARAM &cPar) { - cPar.time_opt=time_opt; - cPar.ng_test=ng_test; - return; +void LM::CopyToParam(PARAM &cPar) { + cPar.time_opt = time_opt; + cPar.ng_test = ng_test; + return; } -void LM::WriteFiles () { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".assoc.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout << "error writing file: " << file_str.c_str() << endl; - return; - } - - if (!file_gene.empty()) { - outfile<<"geneID"<<"\t"; - - if (a_mode==51) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl; - } else if (a_mode==52) { - outfile<<"p_lrt"<<endl; - } else if (a_mode==53) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl; - } else if (a_mode==54) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<< - "\t"<<"p_lrt"<<"\t"<<"p_score"<<endl; - } else {} - - for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) { - outfile<<snpInfo[t].rs_number<<"\t"; - - if (a_mode==51) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].p_wald <<endl; - } else if (a_mode==52) { - outfile<<scientific<<setprecision(6)<< - "\t"<<sumStat[t].p_lrt<<endl; - } else if (a_mode==53) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].p_score<<endl; - } else if (a_mode==54) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].p_wald <<"\t"<< - sumStat[t].p_lrt<<"\t"<< - sumStat[t].p_score<<endl; - } else {} - } - } else { - outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"<< - "\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<< - "af"<<"\t"; - - if (a_mode==51) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl; - } else if (a_mode==52) { - outfile<<"p_lrt"<<endl; - } else if (a_mode==53) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl; - } else if (a_mode==54) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t" - <<"p_lrt"<<"\t"<<"p_score"<<endl; - } else {} - - size_t t=0; - for (size_t i=0; i<snpInfo.size(); ++i) { - if (indicator_snp[i]==0) {continue;} - - outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<< - "\t"<<snpInfo[i].base_position<<"\t"<< - snpInfo[i].n_miss<<"\t"<<ni_test-snpInfo[i].n_miss<< - "\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<< - "\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t"; - - if (a_mode==51) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].p_wald <<endl; - } else if (a_mode==52) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].p_lrt<<endl; - } else if (a_mode==53) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].p_score<<endl; - } else if (a_mode==54) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].p_wald <<"\t"<< - sumStat[t].p_lrt<<"\t"<< - sumStat[t].p_score<<endl; - } else {} - t++; - } - } - - outfile.close(); - outfile.clear(); - return; +void LM::WriteFiles() { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".assoc.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + if (!file_gene.empty()) { + outfile << "geneID" + << "\t"; + + if (a_mode == 51) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "p_wald" << endl; + } else if (a_mode == 52) { + outfile << "p_lrt" << endl; + } else if (a_mode == 53) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "p_score" << endl; + } else if (a_mode == 54) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "p_wald" + << "\t" + << "p_lrt" + << "\t" + << "p_score" << endl; + } else { + } + + for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) { + outfile << snpInfo[t].rs_number << "\t"; + + if (a_mode == 51) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].p_wald << endl; + } else if (a_mode == 52) { + outfile << scientific << setprecision(6) << "\t" << sumStat[t].p_lrt + << endl; + } else if (a_mode == 53) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].p_score << endl; + } else if (a_mode == 54) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].p_wald << "\t" + << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl; + } else { + } + } + } else { + outfile << "chr" + << "\t" + << "rs" + << "\t" + << "ps" + << "\t" + << "n_mis" + << "\t" + << "n_obs" + << "\t" + << "allele1" + << "\t" + << "allele0" + << "\t" + << "af" + << "\t"; + + if (a_mode == 51) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "p_wald" << endl; + } else if (a_mode == 52) { + outfile << "p_lrt" << endl; + } else if (a_mode == 53) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "p_score" << endl; + } else if (a_mode == 54) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "p_wald" + << "\t" + << "p_lrt" + << "\t" + << "p_score" << endl; + } else { + } + + size_t t = 0; + for (size_t i = 0; i < snpInfo.size(); ++i) { + if (indicator_snp[i] == 0) { + continue; + } + + outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" + << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t" + << ni_test - snpInfo[i].n_miss << "\t" << snpInfo[i].a_minor + << "\t" << snpInfo[i].a_major << "\t" << fixed << setprecision(3) + << snpInfo[i].maf << "\t"; + + if (a_mode == 51) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].p_wald << endl; + } else if (a_mode == 52) { + outfile << scientific << setprecision(6) << sumStat[t].p_lrt << endl; + } else if (a_mode == 53) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].p_score << endl; + } else if (a_mode == 54) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].p_wald << "\t" + << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl; + } else { + } + t++; + } + } + + outfile.close(); + outfile.clear(); + return; } void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, - const gsl_vector *Wtx, const gsl_vector *y, - const gsl_vector *x, double &xPwy, double &xPwx) { - size_t c_size=Wty->size; - double d; + const gsl_vector *Wtx, const gsl_vector *y, const gsl_vector *x, + double &xPwy, double &xPwx) { + size_t c_size = Wty->size; + double d; - gsl_vector *WtWiWtx=gsl_vector_alloc (c_size); + gsl_vector *WtWiWtx = gsl_vector_alloc(c_size); - gsl_blas_ddot (x, x, &xPwx); - gsl_blas_ddot (x, y, &xPwy); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); + gsl_blas_ddot(x, x, &xPwx); + gsl_blas_ddot(x, y, &xPwy); + gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); - gsl_blas_ddot (WtWiWtx, Wtx, &d); - xPwx-=d; + gsl_blas_ddot(WtWiWtx, Wtx, &d); + xPwx -= d; - gsl_blas_ddot (WtWiWtx, Wty, &d); - xPwy-=d; + gsl_blas_ddot(WtWiWtx, Wty, &d); + xPwy -= d; - gsl_vector_free (WtWiWtx); + gsl_vector_free(WtWiWtx); - return; + return; } -void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, - const gsl_vector *y, double &yPwy) { - size_t c_size=Wty->size; - double d; +void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y, + double &yPwy) { + size_t c_size = Wty->size; + double d; - gsl_vector *WtWiWty=gsl_vector_alloc (c_size); + gsl_vector *WtWiWty = gsl_vector_alloc(c_size); - gsl_blas_ddot (y, y, &yPwy); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty); + gsl_blas_ddot(y, y, &yPwy); + gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty); - gsl_blas_ddot (WtWiWty, Wty, &d); - yPwy-=d; + gsl_blas_ddot(WtWiWty, Wty, &d); + yPwy -= d; - gsl_vector_free (WtWiWty); + gsl_vector_free(WtWiWty); - return; + return; } // Calculate p-values and beta/se in a linear model. -void LmCalcP (const size_t test_mode, const double yPwy, - const double xPwy, const double xPwx, const double df, - const size_t n_size, double &beta, double &se, - double &p_wald, double &p_lrt, double &p_score) { - double yPxy=yPwy-xPwy*xPwy/xPwx; - double se_wald, se_score; - - beta=xPwy/xPwx; - se_wald=sqrt(yPxy/(df*xPwx) ); - se_score=sqrt(yPwy/((double)n_size*xPwx) ); - - p_wald=gsl_cdf_fdist_Q (beta*beta/(se_wald*se_wald), 1.0, df); - p_score=gsl_cdf_fdist_Q (beta*beta/(se_score*se_score), 1.0, df); - p_lrt=gsl_cdf_chisq_Q ((double)n_size*(log(yPwy)-log(yPxy)), 1); - - if (test_mode==3) {se=se_score;} else {se=se_wald;} - - return; +void LmCalcP(const size_t test_mode, const double yPwy, const double xPwy, + const double xPwx, const double df, const size_t n_size, + double &beta, double &se, double &p_wald, double &p_lrt, + double &p_score) { + double yPxy = yPwy - xPwy * xPwy / xPwx; + double se_wald, se_score; + + beta = xPwy / xPwx; + se_wald = sqrt(yPxy / (df * xPwx)); + se_score = sqrt(yPwy / ((double)n_size * xPwx)); + + p_wald = gsl_cdf_fdist_Q(beta * beta / (se_wald * se_wald), 1.0, df); + p_score = gsl_cdf_fdist_Q(beta * beta / (se_score * se_score), 1.0, df); + p_lrt = gsl_cdf_chisq_Q((double)n_size * (log(yPwy) - log(yPxy)), 1); + + if (test_mode == 3) { + se = se_score; + } else { + se = se_wald; + } + + return; } -void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x) { - ifstream infile (file_gene.c_str(), ifstream::in); - if (!infile) { - cout<<"error reading gene expression file:"<<file_gene<<endl; - return; - } +void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) { + ifstream infile(file_gene.c_str(), ifstream::in); + if (!infile) { + cout << "error reading gene expression file:" << file_gene << endl; + return; + } - clock_t time_start=clock(); + clock_t time_start = clock(); - string line; - char *ch_ptr; + string line; + char *ch_ptr; - double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; - int c_phen; - string rs; // Gene id. - double d; + double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0; + int c_phen; + string rs; // Gene id. + double d; - // Calculate some basic quantities. - double yPwy, xPwy, xPwx; - double df=(double)W->size1-(double)W->size2-1.0; + // Calculate some basic quantities. + double yPwy, xPwy, xPwx; + double df = (double)W->size1 - (double)W->size2 - 1.0; - gsl_vector *y=gsl_vector_alloc (W->size1); + gsl_vector *y = gsl_vector_alloc(W->size1); - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wty=gsl_vector_alloc (W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *Wty = gsl_vector_alloc(W->size2); + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); - gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx); - CalcvPv(WtWi, Wtx, x, xPwx); + gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx); + CalcvPv(WtWi, Wtx, x, xPwx); - // Header. - getline(infile, line); + // Header. + getline(infile, line); - for (size_t t=0; t<ng_total; t++) { - getline(infile, line); - if (t%d_pace==0 || t==ng_total-1) { - ProgressBar ("Performing Analysis ", t, ng_total-1); - } - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; + for (size_t t = 0; t < ng_total; t++) { + getline(infile, line); + if (t % d_pace == 0 || t == ng_total - 1) { + ProgressBar("Performing Analysis ", t, ng_total - 1); + } + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; - c_phen=0; - for (size_t i=0; i<indicator_idv.size(); ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} + c_phen = 0; + for (size_t i = 0; i < indicator_idv.size(); ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } - d=atof(ch_ptr); - gsl_vector_set(y, c_phen, d); + d = atof(ch_ptr); + gsl_vector_set(y, c_phen, d); - c_phen++; - } + c_phen++; + } - // Calculate statistics. - time_start=clock(); + // Calculate statistics. + time_start = clock(); - gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty); - CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy); - LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, - beta, se, p_wald, p_lrt, p_score); + gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty); + CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy); + LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, + p_lrt, p_score); - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - // Store summary data. - SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout<<endl; + // Store summary data. + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + } + cout << endl; - gsl_vector_free(y); + gsl_vector_free(y); - gsl_matrix_free(WtW); - gsl_matrix_free(WtWi); - gsl_vector_free(Wty); - gsl_vector_free(Wtx); - gsl_permutation_free(pmt); + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_vector_free(Wty); + gsl_vector_free(Wtx); + gsl_permutation_free(pmt); - infile.close(); - infile.clear(); + infile.close(); + infile.clear(); - return; + return; } // WJA added -void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) { - string file_bgen=file_oxford+".bgen"; - ifstream infile (file_bgen.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bgen file:"<<file_bgen<<endl; - return; - } - - clock_t time_start=clock(); - - string line; - char *ch_ptr; - - double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; - int n_miss, c_phen; - double geno, x_mean; - - // Calculate some basic quantities. - double yPwy, xPwy, xPwx; - double df=(double)W->size1-(double)W->size2-1.0; - - gsl_vector *x=gsl_vector_alloc (W->size1); - gsl_vector *x_miss=gsl_vector_alloc (W->size1); - - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wty=gsl_vector_alloc (W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty); - CalcvPv(WtWi, Wty, y, yPwy); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4); - infile.read(reinterpret_cast<char*>(&bgen_header_length),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsnps),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsamples),4); - bgen_snp_block_offset-=4; - infile.ignore(4+bgen_header_length-20); - bgen_snp_block_offset-=4+bgen_header_length-20; - infile.read(reinterpret_cast<char*>(&bgen_flags),4); - bgen_snp_block_offset-=4; - bool CompressedSNPBlocks=bgen_flags&0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB; - double bgen_geno_prob_BB, bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - std::cout << "Warning: WJA hard coded SNP missingness " << - "threshold of 10%" << std::endl; - - // Start reading genotypes and analyze. - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - - // Read SNP header. - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char*>(&bgen_N),4); - infile.read(reinterpret_cast<char*>(&bgen_LS),2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char*>(&bgen_LR),2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char*>(&bgen_LC),2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4); - - infile.read(reinterpret_cast<char*>(&bgen_LA),4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - infile.read(reinterpret_cast<char*>(&bgen_LB),4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3*bgen_N]; - - if (indicator_snp[t]==0) { - if(CompressedSNPBlocks) - infile.read(reinterpret_cast<char*>(&bgen_P),4); - else - bgen_P=6*bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if(CompressedSNPBlocks) { - infile.read(reinterpret_cast<char*>(&bgen_P),4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size=6*bgen_N; - - infile.read(reinterpret_cast<char*>(zipped_data), - bgen_P); - - int result= - uncompress(reinterpret_cast<Bytef*>(unzipped_data), - reinterpret_cast<uLongf*>(&unzipped_data_size), - reinterpret_cast<Bytef*>(zipped_data), - static_cast<uLong> (bgen_P)); - assert(result == Z_OK); - - } - else - { - - bgen_P=6*bgen_N; - infile.read(reinterpret_cast<char*>(unzipped_data), - bgen_P); - } - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i<bgen_N; ++i) { - if (indicator_idv[i]==0) {continue;} - - - bgen_geno_prob_AA= - static_cast<double>(unzipped_data[i*3])/32768.0; - bgen_geno_prob_AB= - static_cast<double>(unzipped_data[i*3+1])/32768.0; - bgen_geno_prob_BB= - static_cast<double>(unzipped_data[i*3+2])/32768.0; - - // WJA - bgen_geno_prob_non_miss= - bgen_geno_prob_AA + - bgen_geno_prob_AB + - bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss<0.9) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } - else { - bgen_geno_prob_AA/=bgen_geno_prob_non_miss; - bgen_geno_prob_AB/=bgen_geno_prob_non_miss; - bgen_geno_prob_BB/=bgen_geno_prob_non_miss; - - geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - - x_mean/=static_cast<double>(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) { - gsl_vector_set(x, i, x_mean); - } - geno=gsl_vector_get(x, i); - } - - // Calculate statistics. - time_start=clock(); - - gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx); - CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); - LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, - beta, se, p_wald, p_lrt, p_score); - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout<<endl; - - gsl_vector_free(x); - gsl_vector_free(x_miss); - - gsl_matrix_free(WtW); - gsl_matrix_free(WtWi); - gsl_vector_free(Wty); - gsl_vector_free(Wtx); - gsl_permutation_free(pmt); - - infile.close(); - infile.clear(); - - return; +void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) { + string file_bgen = file_oxford + ".bgen"; + ifstream infile(file_bgen.c_str(), ios::binary); + if (!infile) { + cout << "error reading bgen file:" << file_bgen << endl; + return; + } + + clock_t time_start = clock(); + + string line; + char *ch_ptr; + + double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0; + int n_miss, c_phen; + double geno, x_mean; + + // Calculate some basic quantities. + double yPwy, xPwy, xPwx; + double df = (double)W->size1 - (double)W->size2 - 1.0; + + gsl_vector *x = gsl_vector_alloc(W->size1); + gsl_vector *x_miss = gsl_vector_alloc(W->size1); + + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *Wty = gsl_vector_alloc(W->size2); + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty); + CalcvPv(WtWi, Wty, y, yPwy); + + // Read in header. + uint32_t bgen_snp_block_offset; + uint32_t bgen_header_length; + uint32_t bgen_nsamples; + uint32_t bgen_nsnps; + uint32_t bgen_flags; + infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); + infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); + bgen_snp_block_offset -= 4; + infile.ignore(4 + bgen_header_length - 20); + bgen_snp_block_offset -= 4 + bgen_header_length - 20; + infile.read(reinterpret_cast<char *>(&bgen_flags), 4); + bgen_snp_block_offset -= 4; + bool CompressedSNPBlocks = bgen_flags & 0x1; + + infile.ignore(bgen_snp_block_offset); + + double bgen_geno_prob_AA, bgen_geno_prob_AB; + double bgen_geno_prob_BB, bgen_geno_prob_non_miss; + + uint32_t bgen_N; + uint16_t bgen_LS; + uint16_t bgen_LR; + uint16_t bgen_LC; + uint32_t bgen_SNP_pos; + uint32_t bgen_LA; + std::string bgen_A_allele; + uint32_t bgen_LB; + std::string bgen_B_allele; + uint32_t bgen_P; + size_t unzipped_data_size; + string id; + string rs; + string chr; + std::cout << "Warning: WJA hard coded SNP missingness " + << "threshold of 10%" << std::endl; + + // Start reading genotypes and analyze. + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + + // Read SNP header. + id.clear(); + rs.clear(); + chr.clear(); + bgen_A_allele.clear(); + bgen_B_allele.clear(); + + infile.read(reinterpret_cast<char *>(&bgen_N), 4); + infile.read(reinterpret_cast<char *>(&bgen_LS), 2); + + id.resize(bgen_LS); + infile.read(&id[0], bgen_LS); + + infile.read(reinterpret_cast<char *>(&bgen_LR), 2); + rs.resize(bgen_LR); + infile.read(&rs[0], bgen_LR); + + infile.read(reinterpret_cast<char *>(&bgen_LC), 2); + chr.resize(bgen_LC); + infile.read(&chr[0], bgen_LC); + + infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); + + infile.read(reinterpret_cast<char *>(&bgen_LA), 4); + bgen_A_allele.resize(bgen_LA); + infile.read(&bgen_A_allele[0], bgen_LA); + + infile.read(reinterpret_cast<char *>(&bgen_LB), 4); + bgen_B_allele.resize(bgen_LB); + infile.read(&bgen_B_allele[0], bgen_LB); + + uint16_t unzipped_data[3 * bgen_N]; + + if (indicator_snp[t] == 0) { + if (CompressedSNPBlocks) + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + else + bgen_P = 6 * bgen_N; + + infile.ignore(static_cast<size_t>(bgen_P)); + + continue; + } + + if (CompressedSNPBlocks) { + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + uint8_t zipped_data[bgen_P]; + + unzipped_data_size = 6 * bgen_N; + + infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); + + int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), + reinterpret_cast<uLongf *>(&unzipped_data_size), + reinterpret_cast<Bytef *>(zipped_data), + static_cast<uLong>(bgen_P)); + assert(result == Z_OK); + + } else { + + bgen_P = 6 * bgen_N; + infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); + } + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + gsl_vector_set_zero(x_miss); + for (size_t i = 0; i < bgen_N; ++i) { + if (indicator_idv[i] == 0) { + continue; + } + + bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; + bgen_geno_prob_AB = + static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; + bgen_geno_prob_BB = + static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; + + // WJA + bgen_geno_prob_non_miss = + bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; + if (bgen_geno_prob_non_miss < 0.9) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + bgen_geno_prob_AA /= bgen_geno_prob_non_miss; + bgen_geno_prob_AB /= bgen_geno_prob_non_miss; + bgen_geno_prob_BB /= bgen_geno_prob_non_miss; + + geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + + x_mean /= static_cast<double>(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean); + } + geno = gsl_vector_get(x, i); + } + + // Calculate statistics. + time_start = clock(); + + gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx); + CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); + LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, + p_lrt, p_score); + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(x_miss); + + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_vector_free(Wty); + gsl_vector_free(Wtx); + gsl_permutation_free(pmt); + + infile.close(); + infile.clear(); + + return; } -void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout << "error reading genotype file:" << file_geno << endl; - return; - } - - clock_t time_start=clock(); - - string line; - char *ch_ptr; - - double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; - int n_miss, c_phen; - double geno, x_mean; - - // Calculate some basic quantities. - double yPwy, xPwy, xPwx; - double df=(double)W->size1-(double)W->size2-1.0; - - gsl_vector *x=gsl_vector_alloc (W->size1); - gsl_vector *x_miss=gsl_vector_alloc (W->size1); - - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wty=gsl_vector_alloc (W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty); - CalcvPv(WtWi, Wty, y, yPwy); - - // Start reading genotypes and analyze. - for (size_t t=0; t<indicator_snp.size(); ++t) { - getline(infile, line); - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i<ni_total; ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} - - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } - else { - geno=atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) { - gsl_vector_set(x, i, x_mean); - } - geno=gsl_vector_get(x, i); - } - - // Calculate statistics. - time_start=clock(); - - gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx); - CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); - LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, - beta, se, p_wald, p_lrt, p_score); - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout<<endl; - - gsl_vector_free(x); - gsl_vector_free(x_miss); - - gsl_matrix_free(WtW); - gsl_matrix_free(WtWi); - gsl_vector_free(Wty); - gsl_vector_free(Wtx); - gsl_permutation_free(pmt); - - infile.close(); - infile.clear(); - - return; +void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return; + } + + clock_t time_start = clock(); + + string line; + char *ch_ptr; + + double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0; + int n_miss, c_phen; + double geno, x_mean; + + // Calculate some basic quantities. + double yPwy, xPwy, xPwx; + double df = (double)W->size1 - (double)W->size2 - 1.0; + + gsl_vector *x = gsl_vector_alloc(W->size1); + gsl_vector *x_miss = gsl_vector_alloc(W->size1); + + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *Wty = gsl_vector_alloc(W->size2); + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty); + CalcvPv(WtWi, Wty, y, yPwy); + + // Start reading genotypes and analyze. + for (size_t t = 0; t < indicator_snp.size(); ++t) { + getline(infile, line); + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + gsl_vector_set_zero(x_miss); + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } + + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + geno = atof(ch_ptr); + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean); + } + geno = gsl_vector_get(x, i); + } + + // Calculate statistics. + time_start = clock(); + + gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx); + CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); + LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, + p_lrt, p_score); + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(x_miss); + + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_vector_free(Wty); + gsl_vector_free(Wtx); + gsl_permutation_free(pmt); + + infile.close(); + infile.clear(); + + return; } -void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y) { - string file_bed=file_bfile+".bed"; - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return; - } - - clock_t time_start=clock(); - - char ch[1]; - bitset<8> b; - - double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; - int n_bit, n_miss, ci_total, ci_test; - double geno, x_mean; - - // Calculate some basic quantities. - double yPwy, xPwy, xPwx; - double df=(double)W->size1-(double)W->size2-1.0; - - gsl_vector *x=gsl_vector_alloc (W->size1); - - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wty=gsl_vector_alloc (W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty); - CalcvPv(WtWi, Wty, y, yPwy); - - // Calculate n_bit and c, the number of bit for each SNP. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1;} - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) { - ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - x_mean=0.0; n_miss=0; ci_total=0; ci_test=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0; - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && ci_total==(int)ni_total) { - break; - } - if (indicator_idv[ci_total]==0) { - ci_total++; - continue; - } - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(x, ci_test, 2); - x_mean+=2.0; - } - else { - gsl_vector_set(x, ci_test, 1); - x_mean+=1.0; } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(x, ci_test, 0); - } - else { - gsl_vector_set(x, ci_test, -9); - n_miss++; - } - } - - ci_total++; - ci_test++; - } - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - geno=gsl_vector_get(x,i); - if (geno==-9) { - gsl_vector_set(x, i, x_mean); - geno=x_mean; - } - } - - // Calculate statistics. - time_start=clock(); - - gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx); - CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); - LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, - beta, se, p_wald, p_lrt, p_score); - - //store summary data - SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - } - cout<<endl; - - gsl_vector_free(x); - - gsl_matrix_free(WtW); - gsl_matrix_free(WtWi); - gsl_vector_free(Wty); - gsl_vector_free(Wtx); - gsl_permutation_free(pmt); - - infile.close(); - infile.clear(); - - return; +void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) { + string file_bed = file_bfile + ".bed"; + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return; + } + + clock_t time_start = clock(); + + char ch[1]; + bitset<8> b; + + double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0; + int n_bit, n_miss, ci_total, ci_test; + double geno, x_mean; + + // Calculate some basic quantities. + double yPwy, xPwy, xPwx; + double df = (double)W->size1 - (double)W->size2 - 1.0; + + gsl_vector *x = gsl_vector_alloc(W->size1); + + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *Wty = gsl_vector_alloc(W->size2); + gsl_vector *Wtx = gsl_vector_alloc(W->size2); + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + int sig; + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty); + CalcvPv(WtWi, Wty, y, yPwy); + + // Calculate n_bit and c, the number of bit for each SNP. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { + if (t % d_pace == 0 || t == snpInfo.size() - 1) { + ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + x_mean = 0.0; + n_miss = 0; + ci_total = 0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0; + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == (int)ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(x, ci_test, 2); + x_mean += 2.0; + } else { + gsl_vector_set(x, ci_test, 1); + x_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(x, ci_test, 0); + } else { + gsl_vector_set(x, ci_test, -9); + n_miss++; + } + } + + ci_total++; + ci_test++; + } + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + geno = gsl_vector_get(x, i); + if (geno == -9) { + gsl_vector_set(x, i, x_mean); + geno = x_mean; + } + } + + // Calculate statistics. + time_start = clock(); + + gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx); + CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); + LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, + p_lrt, p_score); + + // store summary data + SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + cout << endl; + + gsl_vector_free(x); + + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_vector_free(Wty); + gsl_vector_free(Wtx); + gsl_permutation_free(pmt); + + infile.close(); + infile.clear(); + + return; } // Make sure that both y and X are centered already. -void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, - vector<pair<size_t, double> > &pos_loglr) { - double yty, xty, xtx, log_lr; - gsl_blas_ddot(y, y, &yty); +void MatrixCalcLmLR(const gsl_matrix *X, const gsl_vector *y, + vector<pair<size_t, double>> &pos_loglr) { + double yty, xty, xtx, log_lr; + gsl_blas_ddot(y, y, &yty); - for (size_t i=0; i<X->size2; ++i) { - gsl_vector_const_view X_col=gsl_matrix_const_column (X, i); - gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx); - gsl_blas_ddot(&X_col.vector, y, &xty); + for (size_t i = 0; i < X->size2; ++i) { + gsl_vector_const_view X_col = gsl_matrix_const_column(X, i); + gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx); + gsl_blas_ddot(&X_col.vector, y, &xty); - log_lr=0.5*(double)y->size*(log(yty)-log(yty-xty*xty/xtx)); - pos_loglr.push_back(make_pair(i,log_lr) ); - } + log_lr = 0.5 * (double)y->size * (log(yty) - log(yty - xty * xty / xtx)); + pos_loglr.push_back(make_pair(i, log_lr)); + } - return; + return; } @@ -19,61 +19,61 @@ #ifndef __LM_H__ #define __LM_H__ -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" -#include "param.h" +#include "gsl/gsl_vector.h" #include "io.h" +#include "param.h" using namespace std; class LM { public: - // IO-related parameters. - int a_mode; // Analysis mode: 50+1/2/3/4 for Frequentist tests. - size_t d_pace; // Display pace. - - string file_bfile; - string file_geno; - string file_oxford; - string file_out; - string path_out; - - string file_gene; - - // Summary statistics. - size_t ni_total, ni_test; // Number of individuals. - size_t ns_total, ns_test; // Number of SNPs. - size_t ng_total, ng_test; // Number of genes. - size_t n_cvt; - double time_opt; // Time spent. - - // Indicator for individuals (phenotypes): 0 missing, 1 - // available for analysis. - vector<int> indicator_idv; - - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; - - vector<SNPINFO> snpInfo; // Record SNP information. - - // Not included in PARAM. - vector<SUMSTAT> sumStat; // Output SNPSummary Data. - - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - void AnalyzeGene (const gsl_matrix *W, const gsl_vector *x); - void AnalyzePlink (const gsl_matrix *W, const gsl_vector *y); - void AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y); - // WJA added. - void Analyzebgen (const gsl_matrix *W, const gsl_vector *y); - - void WriteFiles (); + // IO-related parameters. + int a_mode; // Analysis mode: 50+1/2/3/4 for Frequentist tests. + size_t d_pace; // Display pace. + + string file_bfile; + string file_geno; + string file_oxford; + string file_out; + string path_out; + + string file_gene; + + // Summary statistics. + size_t ni_total, ni_test; // Number of individuals. + size_t ns_total, ns_test; // Number of SNPs. + size_t ng_total, ng_test; // Number of genes. + size_t n_cvt; + double time_opt; // Time spent. + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + vector<SNPINFO> snpInfo; // Record SNP information. + + // Not included in PARAM. + vector<SUMSTAT> sumStat; // Output SNPSummary Data. + + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + void AnalyzeGene(const gsl_matrix *W, const gsl_vector *x); + void AnalyzePlink(const gsl_matrix *W, const gsl_vector *y); + void AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y); + // WJA added. + void Analyzebgen(const gsl_matrix *W, const gsl_vector *y); + + void WriteFiles(); }; -void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, - vector<pair<size_t, double> > &pos_loglr); +void MatrixCalcLmLR(const gsl_matrix *X, const gsl_vector *y, + vector<pair<size_t, double>> &pos_loglr); #endif diff --git a/src/lmm.cpp b/src/lmm.cpp index 2b5ca84..3f51073 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -16,2488 +16,2585 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> #include <fstream> +#include <iostream> #include <sstream> -#include <iomanip> +#include <assert.h> +#include <bitset> #include <cmath> +#include <cstring> +#include <iomanip> #include <iostream> -#include <assert.h> #include <stdio.h> #include <stdlib.h> -#include <bitset> -#include <cstring> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" -#include "gsl/gsl_roots.h" -#include "gsl/gsl_min.h" #include "gsl/gsl_integration.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_min.h" +#include "gsl/gsl_roots.h" +#include "gsl/gsl_vector.h" -#include "io.h" #include "eigenlib.h" -#include "lapack.h" #include "gzstream.h" +#include "io.h" +#include "lapack.h" #include "lmm.h" using namespace std; -void LMM::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; +void LMM::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; + d_pace = cPar.d_pace; - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; - file_gene=cPar.file_gene; + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_out = cPar.file_out; + path_out = cPar.path_out; + file_gene = cPar.file_gene; - // WJA added. - file_oxford=cPar.file_oxford; + // WJA added. + file_oxford = cPar.file_oxford; - l_min=cPar.l_min; - l_max=cPar.l_max; - n_region=cPar.n_region; - l_mle_null=cPar.l_mle_null; - logl_mle_H0=cPar.logl_mle_H0; + l_min = cPar.l_min; + l_max = cPar.l_max; + n_region = cPar.n_region; + l_mle_null = cPar.l_mle_null; + logl_mle_H0 = cPar.logl_mle_H0; - time_UtX=0.0; - time_opt=0.0; + time_UtX = 0.0; + time_opt = 0.0; - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ni_test=cPar.ni_test; - ns_test=cPar.ns_test; - n_cvt=cPar.n_cvt; + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ni_test = cPar.ni_test; + ns_test = cPar.ns_test; + n_cvt = cPar.n_cvt; - ng_total=cPar.ng_total; - ng_test=0; + ng_total = cPar.ng_total; + ng_test = 0; - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; + indicator_idv = cPar.indicator_idv; + indicator_snp = cPar.indicator_snp; + snpInfo = cPar.snpInfo; - return; + return; } -void LMM::CopyToParam (PARAM &cPar) { - cPar.time_UtX=time_UtX; - cPar.time_opt=time_opt; +void LMM::CopyToParam(PARAM &cPar) { + cPar.time_UtX = time_UtX; + cPar.time_opt = time_opt; - cPar.ng_test=ng_test; + cPar.ng_test = ng_test; - return; + return; } -void LMM::WriteFiles () { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".assoc.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - if (!file_gene.empty()) { - outfile<<"geneID"<<"\t"; - - if (a_mode==1) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<< - "\t"<<"p_wald"<<endl; - } else if (a_mode==2) { - outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl; - } else if (a_mode==3) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl; - } else if (a_mode==4) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<< - "\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<< - "\t"<<"p_score"<<endl; - } else {} - - for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) { - outfile<<snpInfo[t].rs_number<<"\t"; - - if (a_mode==1) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<< - sumStat[t].lambda_remle<<"\t"<< - sumStat[t].p_wald <<endl; - } else if (a_mode==2) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].lambda_mle<<"\t"<< - sumStat[t].p_lrt<<endl; - } else if (a_mode==3) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].p_score<<endl; - } else if (a_mode==4) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<< - sumStat[t].lambda_remle<<"\t"<< - sumStat[t].lambda_mle<<"\t"<< - sumStat[t].p_wald <<"\t"<< - sumStat[t].p_lrt<<"\t"<< - sumStat[t].p_score<<endl; - } else {} - } - } else { - outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t" - <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t"; - - if (a_mode==1) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t" - <<"p_wald"<<endl; - } else if (a_mode==2) { - outfile<<"l_mle"<<"\t"<<"p_lrt"<<endl; - } else if (a_mode==3) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_score"<<endl; - } else if (a_mode==4) { - outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t" - <<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<< - "\t"<<"p_score"<<endl; - } else {} - - size_t t=0; - for (size_t i=0; i<snpInfo.size(); ++i) { - if (indicator_snp[i]==0) {continue;} - - outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<< - "\t"<<snpInfo[i].base_position<<"\t"<< - snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<< - snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<< - snpInfo[i].maf<<"\t"; - - if (a_mode==1) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].lambda_remle<<"\t"<< - sumStat[t].p_wald <<endl; - } else if (a_mode==2) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].lambda_mle<<"\t"<< - sumStat[t].p_lrt<<endl; - } else if (a_mode==3) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].p_score<<endl; - } else if (a_mode==4) { - outfile<<scientific<<setprecision(6)<< - sumStat[t].beta<<"\t"<<sumStat[t].se<< - "\t"<<sumStat[t].lambda_remle<<"\t"<< - sumStat[t].lambda_mle<<"\t"<< - sumStat[t].p_wald <<"\t"<< - sumStat[t].p_lrt<<"\t"<< - sumStat[t].p_score<<endl; - } else {} - t++; - } - } - - outfile.close(); - outfile.clear(); - return; +void LMM::WriteFiles() { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".assoc.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + if (!file_gene.empty()) { + outfile << "geneID" + << "\t"; + + if (a_mode == 1) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "l_remle" + << "\t" + << "p_wald" << endl; + } else if (a_mode == 2) { + outfile << "l_mle" + << "\t" + << "p_lrt" << endl; + } else if (a_mode == 3) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "p_score" << endl; + } else if (a_mode == 4) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "l_remle" + << "\t" + << "l_mle" + << "\t" + << "p_wald" + << "\t" + << "p_lrt" + << "\t" + << "p_score" << endl; + } else { + } + + for (vector<SUMSTAT>::size_type t = 0; t < sumStat.size(); ++t) { + outfile << snpInfo[t].rs_number << "\t"; + + if (a_mode == 1) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" + << sumStat[t].p_wald << endl; + } else if (a_mode == 2) { + outfile << scientific << setprecision(6) << sumStat[t].lambda_mle + << "\t" << sumStat[t].p_lrt << endl; + } else if (a_mode == 3) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].p_score << endl; + } else if (a_mode == 4) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" + << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t" + << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl; + } else { + } + } + } else { + outfile << "chr" + << "\t" + << "rs" + << "\t" + << "ps" + << "\t" + << "n_miss" + << "\t" + << "allele1" + << "\t" + << "allele0" + << "\t" + << "af" + << "\t"; + + if (a_mode == 1) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "l_remle" + << "\t" + << "p_wald" << endl; + } else if (a_mode == 2) { + outfile << "l_mle" + << "\t" + << "p_lrt" << endl; + } else if (a_mode == 3) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "p_score" << endl; + } else if (a_mode == 4) { + outfile << "beta" + << "\t" + << "se" + << "\t" + << "l_remle" + << "\t" + << "l_mle" + << "\t" + << "p_wald" + << "\t" + << "p_lrt" + << "\t" + << "p_score" << endl; + } else { + } + + size_t t = 0; + for (size_t i = 0; i < snpInfo.size(); ++i) { + if (indicator_snp[i] == 0) { + continue; + } + + outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" + << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t" + << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" + << fixed << setprecision(3) << snpInfo[i].maf << "\t"; + + if (a_mode == 1) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" + << sumStat[t].p_wald << endl; + } else if (a_mode == 2) { + outfile << scientific << setprecision(6) << sumStat[t].lambda_mle + << "\t" << sumStat[t].p_lrt << endl; + } else if (a_mode == 3) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].p_score << endl; + } else if (a_mode == 4) { + outfile << scientific << setprecision(6) << sumStat[t].beta << "\t" + << sumStat[t].se << "\t" << sumStat[t].lambda_remle << "\t" + << sumStat[t].lambda_mle << "\t" << sumStat[t].p_wald << "\t" + << sumStat[t].p_lrt << "\t" << sumStat[t].p_score << endl; + } else { + } + t++; + } + } + + outfile.close(); + outfile.clear(); + return; } -void CalcPab (const size_t n_cvt, const size_t e_mode, - const gsl_vector *Hi_eval, const gsl_matrix *Uab, - const gsl_vector *ab, gsl_matrix *Pab) { - size_t index_ab, index_aw, index_bw, index_ww; - double p_ab; - double ps_ab, ps_aw, ps_bw, ps_ww; - - for (size_t p=0; p<=n_cvt+1; ++p) { - for (size_t a=p+1; a<=n_cvt+2; ++a) { - for (size_t b=a; b<=n_cvt+2; ++b) { - index_ab=GetabIndex (a, b, n_cvt); - if (p==0) { - gsl_vector_const_view Uab_col= - gsl_matrix_const_column (Uab, index_ab); - gsl_blas_ddot(Hi_eval,&Uab_col.vector,&p_ab); - if (e_mode!=0) { - p_ab=gsl_vector_get (ab, index_ab)-p_ab; - } - gsl_matrix_set (Pab, 0, index_ab, p_ab); - } - else { - index_aw=GetabIndex (a, p, n_cvt); - index_bw=GetabIndex (b, p, n_cvt); - index_ww=GetabIndex (p, p, n_cvt); - - ps_ab=gsl_matrix_get (Pab, p-1, index_ab); - ps_aw=gsl_matrix_get (Pab, p-1, index_aw); - ps_bw=gsl_matrix_get (Pab, p-1, index_bw); - ps_ww=gsl_matrix_get (Pab, p-1, index_ww); - - p_ab=ps_ab-ps_aw*ps_bw/ps_ww; - gsl_matrix_set (Pab, p, index_ab, p_ab); - } - } - } - } - return; +void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, + const gsl_matrix *Uab, const gsl_vector *ab, gsl_matrix *Pab) { + size_t index_ab, index_aw, index_bw, index_ww; + double p_ab; + double ps_ab, ps_aw, ps_bw, ps_ww; + + for (size_t p = 0; p <= n_cvt + 1; ++p) { + for (size_t a = p + 1; a <= n_cvt + 2; ++a) { + for (size_t b = a; b <= n_cvt + 2; ++b) { + index_ab = GetabIndex(a, b, n_cvt); + if (p == 0) { + gsl_vector_const_view Uab_col = + gsl_matrix_const_column(Uab, index_ab); + gsl_blas_ddot(Hi_eval, &Uab_col.vector, &p_ab); + if (e_mode != 0) { + p_ab = gsl_vector_get(ab, index_ab) - p_ab; + } + gsl_matrix_set(Pab, 0, index_ab, p_ab); + } else { + index_aw = GetabIndex(a, p, n_cvt); + index_bw = GetabIndex(b, p, n_cvt); + index_ww = GetabIndex(p, p, n_cvt); + + ps_ab = gsl_matrix_get(Pab, p - 1, index_ab); + ps_aw = gsl_matrix_get(Pab, p - 1, index_aw); + ps_bw = gsl_matrix_get(Pab, p - 1, index_bw); + ps_ww = gsl_matrix_get(Pab, p - 1, index_ww); + + p_ab = ps_ab - ps_aw * ps_bw / ps_ww; + gsl_matrix_set(Pab, p, index_ab, p_ab); + } + } + } + } + return; } -void CalcPPab (const size_t n_cvt, const size_t e_mode, - const gsl_vector *HiHi_eval, const gsl_matrix *Uab, - const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) { - size_t index_ab, index_aw, index_bw, index_ww; - double p2_ab; - double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww; - - for (size_t p=0; p<=n_cvt+1; ++p) { - for (size_t a=p+1; a<=n_cvt+2; ++a) { - for (size_t b=a; b<=n_cvt+2; ++b) { - index_ab=GetabIndex (a, b, n_cvt); - if (p==0) { - gsl_vector_const_view Uab_col= - gsl_matrix_const_column (Uab, index_ab); - gsl_blas_ddot (HiHi_eval, &Uab_col.vector, - &p2_ab); - if (e_mode!=0) { - p2_ab=p2_ab-gsl_vector_get(ab,index_ab) + - 2.0*gsl_matrix_get (Pab, 0, index_ab); - } - gsl_matrix_set (PPab, 0, index_ab, p2_ab); - } - else { - index_aw=GetabIndex (a, p, n_cvt); - index_bw=GetabIndex (b, p, n_cvt); - index_ww=GetabIndex (p, p, n_cvt); - - ps2_ab=gsl_matrix_get (PPab, p-1, index_ab); - ps_aw=gsl_matrix_get (Pab, p-1, index_aw); - ps_bw=gsl_matrix_get (Pab, p-1, index_bw); - ps_ww=gsl_matrix_get (Pab, p-1, index_ww); - ps2_aw=gsl_matrix_get (PPab, p-1, index_aw); - ps2_bw=gsl_matrix_get (PPab, p-1, index_bw); - ps2_ww=gsl_matrix_get (PPab, p-1, index_ww); - - p2_ab=ps2_ab+ps_aw*ps_bw* - ps2_ww/(ps_ww*ps_ww); - p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww; - gsl_matrix_set (PPab, p, index_ab, p2_ab); - } - } - } - } - return; +void CalcPPab(const size_t n_cvt, const size_t e_mode, + const gsl_vector *HiHi_eval, const gsl_matrix *Uab, + const gsl_vector *ab, const gsl_matrix *Pab, gsl_matrix *PPab) { + size_t index_ab, index_aw, index_bw, index_ww; + double p2_ab; + double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww; + + for (size_t p = 0; p <= n_cvt + 1; ++p) { + for (size_t a = p + 1; a <= n_cvt + 2; ++a) { + for (size_t b = a; b <= n_cvt + 2; ++b) { + index_ab = GetabIndex(a, b, n_cvt); + if (p == 0) { + gsl_vector_const_view Uab_col = + gsl_matrix_const_column(Uab, index_ab); + gsl_blas_ddot(HiHi_eval, &Uab_col.vector, &p2_ab); + if (e_mode != 0) { + p2_ab = p2_ab - gsl_vector_get(ab, index_ab) + + 2.0 * gsl_matrix_get(Pab, 0, index_ab); + } + gsl_matrix_set(PPab, 0, index_ab, p2_ab); + } else { + index_aw = GetabIndex(a, p, n_cvt); + index_bw = GetabIndex(b, p, n_cvt); + index_ww = GetabIndex(p, p, n_cvt); + + ps2_ab = gsl_matrix_get(PPab, p - 1, index_ab); + ps_aw = gsl_matrix_get(Pab, p - 1, index_aw); + ps_bw = gsl_matrix_get(Pab, p - 1, index_bw); + ps_ww = gsl_matrix_get(Pab, p - 1, index_ww); + ps2_aw = gsl_matrix_get(PPab, p - 1, index_aw); + ps2_bw = gsl_matrix_get(PPab, p - 1, index_bw); + ps2_ww = gsl_matrix_get(PPab, p - 1, index_ww); + + p2_ab = ps2_ab + ps_aw * ps_bw * ps2_ww / (ps_ww * ps_ww); + p2_ab -= (ps_aw * ps2_bw + ps_bw * ps2_aw) / ps_ww; + gsl_matrix_set(PPab, p, index_ab, p2_ab); + } + } + } + } + return; } -void CalcPPPab (const size_t n_cvt, const size_t e_mode, - const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab, - const gsl_vector *ab, const gsl_matrix *Pab, - const gsl_matrix *PPab, gsl_matrix *PPPab) { - size_t index_ab, index_aw, index_bw, index_ww; - double p3_ab; - double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, - ps3_aw, ps3_bw, ps3_ww; - - for (size_t p=0; p<=n_cvt+1; ++p) { - for (size_t a=p+1; a<=n_cvt+2; ++a) { - for (size_t b=a; b<=n_cvt+2; ++b) { - index_ab=GetabIndex (a, b, n_cvt); - if (p==0) { - gsl_vector_const_view Uab_col= - gsl_matrix_const_column (Uab, index_ab); - gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector, - &p3_ab); - if (e_mode!=0) { - p3_ab=gsl_vector_get (ab, index_ab)- - p3_ab+3.0*gsl_matrix_get(PPab,0,index_ab) - -3.0*gsl_matrix_get (Pab, 0, index_ab); - } - gsl_matrix_set (PPPab, 0, index_ab, p3_ab); - } - else { - index_aw=GetabIndex (a, p, n_cvt); - index_bw=GetabIndex (b, p, n_cvt); - index_ww=GetabIndex (p, p, n_cvt); - - ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab); - ps_aw=gsl_matrix_get (Pab, p-1, index_aw); - ps_bw=gsl_matrix_get (Pab, p-1, index_bw); - ps_ww=gsl_matrix_get (Pab, p-1, index_ww); - ps2_aw=gsl_matrix_get (PPab, p-1, index_aw); - ps2_bw=gsl_matrix_get (PPab, p-1, index_bw); - ps2_ww=gsl_matrix_get (PPab, p-1, index_ww); - ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw); - ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw); - ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww); - - p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww - /(ps_ww*ps_ww*ps_ww); - p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw + - ps2_aw*ps2_bw)/ps_ww; - p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw* - ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/ - (ps_ww*ps_ww); - - gsl_matrix_set (PPPab, p, index_ab, p3_ab); - } - } - } - } - return; +void CalcPPPab(const size_t n_cvt, const size_t e_mode, + const gsl_vector *HiHiHi_eval, const gsl_matrix *Uab, + const gsl_vector *ab, const gsl_matrix *Pab, + const gsl_matrix *PPab, gsl_matrix *PPPab) { + size_t index_ab, index_aw, index_bw, index_ww; + double p3_ab; + double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw, + ps3_ww; + + for (size_t p = 0; p <= n_cvt + 1; ++p) { + for (size_t a = p + 1; a <= n_cvt + 2; ++a) { + for (size_t b = a; b <= n_cvt + 2; ++b) { + index_ab = GetabIndex(a, b, n_cvt); + if (p == 0) { + gsl_vector_const_view Uab_col = + gsl_matrix_const_column(Uab, index_ab); + gsl_blas_ddot(HiHiHi_eval, &Uab_col.vector, &p3_ab); + if (e_mode != 0) { + p3_ab = gsl_vector_get(ab, index_ab) - p3_ab + + 3.0 * gsl_matrix_get(PPab, 0, index_ab) - + 3.0 * gsl_matrix_get(Pab, 0, index_ab); + } + gsl_matrix_set(PPPab, 0, index_ab, p3_ab); + } else { + index_aw = GetabIndex(a, p, n_cvt); + index_bw = GetabIndex(b, p, n_cvt); + index_ww = GetabIndex(p, p, n_cvt); + + ps3_ab = gsl_matrix_get(PPPab, p - 1, index_ab); + ps_aw = gsl_matrix_get(Pab, p - 1, index_aw); + ps_bw = gsl_matrix_get(Pab, p - 1, index_bw); + ps_ww = gsl_matrix_get(Pab, p - 1, index_ww); + ps2_aw = gsl_matrix_get(PPab, p - 1, index_aw); + ps2_bw = gsl_matrix_get(PPab, p - 1, index_bw); + ps2_ww = gsl_matrix_get(PPab, p - 1, index_ww); + ps3_aw = gsl_matrix_get(PPPab, p - 1, index_aw); + ps3_bw = gsl_matrix_get(PPPab, p - 1, index_bw); + ps3_ww = gsl_matrix_get(PPPab, p - 1, index_ww); + + p3_ab = ps3_ab - + ps_aw * ps_bw * ps2_ww * ps2_ww / (ps_ww * ps_ww * ps_ww); + p3_ab -= (ps_aw * ps3_bw + ps_bw * ps3_aw + ps2_aw * ps2_bw) / ps_ww; + p3_ab += (ps_aw * ps2_bw * ps2_ww + ps_bw * ps2_aw * ps2_ww + + ps_aw * ps_bw * ps3_ww) / + (ps_ww * ps_ww); + + gsl_matrix_set(PPPab, p, index_ab, p3_ab); + } + } + } + } + return; } -double LogL_f (double l, void *params) { - FUNC_PARAM *p=(FUNC_PARAM *) params; - size_t n_cvt=p->n_cvt; - size_t ni_test=p->ni_test; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - size_t nc_total; - if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;} - - double f=0.0, logdet_h=0.0, d; - size_t index_yy; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); - - gsl_vector_memcpy (v_temp, p->eval); - gsl_vector_scale (v_temp, l); - if (p->e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - for (size_t i=0; i<(p->eval)->size; ++i) { - d=gsl_vector_get (v_temp, i); - logdet_h+=log(fabs(d)); - } - - CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); - - double c=0.5*(double)ni_test*(log((double)ni_test)-log(2*M_PI)-1.0); - - index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - double P_yy=gsl_matrix_get (Pab, nc_total, index_yy); - f=c-0.5*logdet_h-0.5*(double)ni_test*log(P_yy); - - gsl_matrix_free (Pab); - gsl_vector_free (Hi_eval); - gsl_vector_free (v_temp); - return f; +double LogL_f(double l, void *params) { + FUNC_PARAM *p = (FUNC_PARAM *)params; + size_t n_cvt = p->n_cvt; + size_t ni_test = p->ni_test; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + size_t nc_total; + if (p->calc_null == true) { + nc_total = n_cvt; + } else { + nc_total = n_cvt + 1; + } + + double f = 0.0, logdet_h = 0.0, d; + size_t index_yy; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + + gsl_vector_memcpy(v_temp, p->eval); + gsl_vector_scale(v_temp, l); + if (p->e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + for (size_t i = 0; i < (p->eval)->size; ++i) { + d = gsl_vector_get(v_temp, i); + logdet_h += log(fabs(d)); + } + + CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); + + double c = + 0.5 * (double)ni_test * (log((double)ni_test) - log(2 * M_PI) - 1.0); + + index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + double P_yy = gsl_matrix_get(Pab, nc_total, index_yy); + f = c - 0.5 * logdet_h - 0.5 * (double)ni_test * log(P_yy); + + gsl_matrix_free(Pab); + gsl_vector_free(Hi_eval); + gsl_vector_free(v_temp); + return f; } -double LogL_dev1 (double l, void *params) { - FUNC_PARAM *p=(FUNC_PARAM *) params; - size_t n_cvt=p->n_cvt; - size_t ni_test=p->ni_test; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; +double LogL_dev1(double l, void *params) { + FUNC_PARAM *p = (FUNC_PARAM *)params; + size_t n_cvt = p->n_cvt; + size_t ni_test = p->ni_test; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + size_t nc_total; + if (p->calc_null == true) { + nc_total = n_cvt; + } else { + nc_total = n_cvt + 1; + } + + double dev1 = 0.0, trace_Hi = 0.0; + size_t index_yy; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + + gsl_vector_memcpy(v_temp, p->eval); + gsl_vector_scale(v_temp, l); + if (p->e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + gsl_vector_memcpy(HiHi_eval, Hi_eval); + gsl_vector_mul(HiHi_eval, Hi_eval); + + gsl_vector_set_all(v_temp, 1.0); + gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi); + + if (p->e_mode != 0) { + trace_Hi = (double)ni_test - trace_Hi; + } + + CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); + CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); + + double trace_HiK = ((double)ni_test - trace_Hi) / l; + + index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + + double P_yy = gsl_matrix_get(Pab, nc_total, index_yy); + double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy); + double yPKPy = (P_yy - PP_yy) / l; + dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy; + + gsl_matrix_free(Pab); + gsl_matrix_free(PPab); + gsl_vector_free(Hi_eval); + gsl_vector_free(HiHi_eval); + gsl_vector_free(v_temp); + + return dev1; +} - size_t nc_total; - if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;} +double LogL_dev2(double l, void *params) { + FUNC_PARAM *p = (FUNC_PARAM *)params; + size_t n_cvt = p->n_cvt; + size_t ni_test = p->ni_test; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + size_t nc_total; + if (p->calc_null == true) { + nc_total = n_cvt; + } else { + nc_total = n_cvt + 1; + } + + double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0; + size_t index_yy; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + + gsl_vector_memcpy(v_temp, p->eval); + gsl_vector_scale(v_temp, l); + if (p->e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + gsl_vector_memcpy(HiHi_eval, Hi_eval); + gsl_vector_mul(HiHi_eval, Hi_eval); + gsl_vector_memcpy(HiHiHi_eval, HiHi_eval); + gsl_vector_mul(HiHiHi_eval, Hi_eval); + + gsl_vector_set_all(v_temp, 1.0); + gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi); + gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi); + + if (p->e_mode != 0) { + trace_Hi = (double)ni_test - trace_Hi; + trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test; + } + + CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); + CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); + CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); + + double trace_HiKHiK = ((double)ni_test + trace_HiHi - 2 * trace_Hi) / (l * l); + + index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + double P_yy = gsl_matrix_get(Pab, nc_total, index_yy); + double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy); + double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_yy); + + double yPKPy = (P_yy - PP_yy) / l; + double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l); + + dev2 = 0.5 * trace_HiKHiK - + 0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / + (P_yy * P_yy); + + gsl_matrix_free(Pab); + gsl_matrix_free(PPab); + gsl_matrix_free(PPPab); + gsl_vector_free(Hi_eval); + gsl_vector_free(HiHi_eval); + gsl_vector_free(HiHiHi_eval); + gsl_vector_free(v_temp); + + return dev2; +} - double dev1=0.0, trace_Hi=0.0; - size_t index_yy; +void LogL_dev12(double l, void *params, double *dev1, double *dev2) { + FUNC_PARAM *p = (FUNC_PARAM *)params; + size_t n_cvt = p->n_cvt; + size_t ni_test = p->ni_test; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + size_t nc_total; + if (p->calc_null == true) { + nc_total = n_cvt; + } else { + nc_total = n_cvt + 1; + } + + double trace_Hi = 0.0, trace_HiHi = 0.0; + size_t index_yy; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + + gsl_vector_memcpy(v_temp, p->eval); + gsl_vector_scale(v_temp, l); + if (p->e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + gsl_vector_memcpy(HiHi_eval, Hi_eval); + gsl_vector_mul(HiHi_eval, Hi_eval); + gsl_vector_memcpy(HiHiHi_eval, HiHi_eval); + gsl_vector_mul(HiHiHi_eval, Hi_eval); + + gsl_vector_set_all(v_temp, 1.0); + gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi); + gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi); + + if (p->e_mode != 0) { + trace_Hi = (double)ni_test - trace_Hi; + trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test; + } + + CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); + CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); + CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); + + double trace_HiK = ((double)ni_test - trace_Hi) / l; + double trace_HiKHiK = ((double)ni_test + trace_HiHi - 2 * trace_Hi) / (l * l); + + index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + + double P_yy = gsl_matrix_get(Pab, nc_total, index_yy); + double PP_yy = gsl_matrix_get(PPab, nc_total, index_yy); + double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_yy); + + double yPKPy = (P_yy - PP_yy) / l; + double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l); + + *dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy; + *dev2 = 0.5 * trace_HiKHiK - + 0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / + (P_yy * P_yy); + + gsl_matrix_free(Pab); + gsl_matrix_free(PPab); + gsl_matrix_free(PPPab); + gsl_vector_free(Hi_eval); + gsl_vector_free(HiHi_eval); + gsl_vector_free(HiHiHi_eval); + gsl_vector_free(v_temp); + + return; +} - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); +double LogRL_f(double l, void *params) { + FUNC_PARAM *p = (FUNC_PARAM *)params; + size_t n_cvt = p->n_cvt; + size_t ni_test = p->ni_test; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + double df; + size_t nc_total; + if (p->calc_null == true) { + nc_total = n_cvt; + df = (double)ni_test - (double)n_cvt; + } else { + nc_total = n_cvt + 1; + df = (double)ni_test - (double)n_cvt - 1.0; + } + + double f = 0.0, logdet_h = 0.0, logdet_hiw = 0.0, d; + size_t index_ww; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *Iab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + + gsl_vector_memcpy(v_temp, p->eval); + gsl_vector_scale(v_temp, l); + if (p->e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + for (size_t i = 0; i < (p->eval)->size; ++i) { + d = gsl_vector_get(v_temp, i); + logdet_h += log(fabs(d)); + } + + CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); + gsl_vector_set_all(v_temp, 1.0); + CalcPab(n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab); + + // Calculate |WHiW|-|WW|. + logdet_hiw = 0.0; + for (size_t i = 0; i < nc_total; ++i) { + index_ww = GetabIndex(i + 1, i + 1, n_cvt); + d = gsl_matrix_get(Pab, i, index_ww); + logdet_hiw += log(d); + d = gsl_matrix_get(Iab, i, index_ww); + logdet_hiw -= log(d); + } + index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + double P_yy = gsl_matrix_get(Pab, nc_total, index_ww); + + double c = 0.5 * df * (log(df) - log(2 * M_PI) - 1.0); + f = c - 0.5 * logdet_h - 0.5 * logdet_hiw - 0.5 * df * log(P_yy); + + gsl_matrix_free(Pab); + gsl_matrix_free(Iab); + gsl_vector_free(Hi_eval); + gsl_vector_free(v_temp); + return f; +} - gsl_vector_memcpy (v_temp, p->eval); - gsl_vector_scale (v_temp, l); - if (p->e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); +double LogRL_dev1(double l, void *params) { + FUNC_PARAM *p = (FUNC_PARAM *)params; + size_t n_cvt = p->n_cvt; + size_t ni_test = p->ni_test; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + double df; + size_t nc_total; + if (p->calc_null == true) { + nc_total = n_cvt; + df = (double)ni_test - (double)n_cvt; + } else { + nc_total = n_cvt + 1; + df = (double)ni_test - (double)n_cvt - 1.0; + } + + double dev1 = 0.0, trace_Hi = 0.0; + size_t index_ww; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + + gsl_vector_memcpy(v_temp, p->eval); + gsl_vector_scale(v_temp, l); + if (p->e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + gsl_vector_memcpy(HiHi_eval, Hi_eval); + gsl_vector_mul(HiHi_eval, Hi_eval); + + gsl_vector_set_all(v_temp, 1.0); + gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi); + + if (p->e_mode != 0) { + trace_Hi = (double)ni_test - trace_Hi; + } + + CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); + CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); + + // Calculate tracePK and trace PKPK. + double trace_P = trace_Hi; + double ps_ww, ps2_ww; + for (size_t i = 0; i < nc_total; ++i) { + index_ww = GetabIndex(i + 1, i + 1, n_cvt); + ps_ww = gsl_matrix_get(Pab, i, index_ww); + ps2_ww = gsl_matrix_get(PPab, i, index_ww); + trace_P -= ps2_ww / ps_ww; + } + double trace_PK = (df - trace_P) / l; + + // Calculate yPKPy, yPKPKPy. + index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + double P_yy = gsl_matrix_get(Pab, nc_total, index_ww); + double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww); + double yPKPy = (P_yy - PP_yy) / l; + + dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy; + + gsl_matrix_free(Pab); + gsl_matrix_free(PPab); + gsl_vector_free(Hi_eval); + gsl_vector_free(HiHi_eval); + gsl_vector_free(v_temp); + + return dev1; +} - gsl_vector_memcpy (HiHi_eval, Hi_eval); - gsl_vector_mul (HiHi_eval, Hi_eval); +double LogRL_dev2(double l, void *params) { + FUNC_PARAM *p = (FUNC_PARAM *)params; + size_t n_cvt = p->n_cvt; + size_t ni_test = p->ni_test; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + double df; + size_t nc_total; + if (p->calc_null == true) { + nc_total = n_cvt; + df = (double)ni_test - (double)n_cvt; + } else { + nc_total = n_cvt + 1; + df = (double)ni_test - (double)n_cvt - 1.0; + } + + double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0; + size_t index_ww; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + + gsl_vector_memcpy(v_temp, p->eval); + gsl_vector_scale(v_temp, l); + if (p->e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + gsl_vector_memcpy(HiHi_eval, Hi_eval); + gsl_vector_mul(HiHi_eval, Hi_eval); + gsl_vector_memcpy(HiHiHi_eval, HiHi_eval); + gsl_vector_mul(HiHiHi_eval, Hi_eval); + + gsl_vector_set_all(v_temp, 1.0); + gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi); + gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi); + + if (p->e_mode != 0) { + trace_Hi = (double)ni_test - trace_Hi; + trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test; + } + + CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); + CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); + CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); + + // Calculate tracePK and trace PKPK. + double trace_P = trace_Hi, trace_PP = trace_HiHi; + double ps_ww, ps2_ww, ps3_ww; + for (size_t i = 0; i < nc_total; ++i) { + index_ww = GetabIndex(i + 1, i + 1, n_cvt); + ps_ww = gsl_matrix_get(Pab, i, index_ww); + ps2_ww = gsl_matrix_get(PPab, i, index_ww); + ps3_ww = gsl_matrix_get(PPPab, i, index_ww); + trace_P -= ps2_ww / ps_ww; + trace_PP += ps2_ww * ps2_ww / (ps_ww * ps_ww) - 2.0 * ps3_ww / ps_ww; + } + double trace_PKPK = (df + trace_PP - 2.0 * trace_P) / (l * l); + + // Calculate yPKPy, yPKPKPy. + index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + double P_yy = gsl_matrix_get(Pab, nc_total, index_ww); + double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww); + double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_ww); + double yPKPy = (P_yy - PP_yy) / l; + double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l); + + dev2 = 0.5 * trace_PKPK - + 0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy); + + gsl_matrix_free(Pab); + gsl_matrix_free(PPab); + gsl_matrix_free(PPPab); + gsl_vector_free(Hi_eval); + gsl_vector_free(HiHi_eval); + gsl_vector_free(HiHiHi_eval); + gsl_vector_free(v_temp); + + return dev2; +} - gsl_vector_set_all (v_temp, 1.0); - gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); +void LogRL_dev12(double l, void *params, double *dev1, double *dev2) { + FUNC_PARAM *p = (FUNC_PARAM *)params; + size_t n_cvt = p->n_cvt; + size_t ni_test = p->ni_test; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + double df; + size_t nc_total; + if (p->calc_null == true) { + nc_total = n_cvt; + df = (double)ni_test - (double)n_cvt; + } else { + nc_total = n_cvt + 1; + df = (double)ni_test - (double)n_cvt - 1.0; + } + + double trace_Hi = 0.0, trace_HiHi = 0.0; + size_t index_ww; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size); + gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size); + + gsl_vector_memcpy(v_temp, p->eval); + gsl_vector_scale(v_temp, l); + if (p->e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + gsl_vector_memcpy(HiHi_eval, Hi_eval); + gsl_vector_mul(HiHi_eval, Hi_eval); + gsl_vector_memcpy(HiHiHi_eval, HiHi_eval); + gsl_vector_mul(HiHiHi_eval, Hi_eval); + + gsl_vector_set_all(v_temp, 1.0); + gsl_blas_ddot(Hi_eval, v_temp, &trace_Hi); + gsl_blas_ddot(HiHi_eval, v_temp, &trace_HiHi); + + if (p->e_mode != 0) { + trace_Hi = (double)ni_test - trace_Hi; + trace_HiHi = 2 * trace_Hi + trace_HiHi - (double)ni_test; + } + + CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); + CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); + CalcPPPab(n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab); + + // Calculate tracePK and trace PKPK. + double trace_P = trace_Hi, trace_PP = trace_HiHi; + double ps_ww, ps2_ww, ps3_ww; + for (size_t i = 0; i < nc_total; ++i) { + index_ww = GetabIndex(i + 1, i + 1, n_cvt); + ps_ww = gsl_matrix_get(Pab, i, index_ww); + ps2_ww = gsl_matrix_get(PPab, i, index_ww); + ps3_ww = gsl_matrix_get(PPPab, i, index_ww); + trace_P -= ps2_ww / ps_ww; + trace_PP += ps2_ww * ps2_ww / (ps_ww * ps_ww) - 2.0 * ps3_ww / ps_ww; + } + double trace_PK = (df - trace_P) / l; + double trace_PKPK = (df + trace_PP - 2.0 * trace_P) / (l * l); + + // Calculate yPKPy, yPKPKPy. + index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + double P_yy = gsl_matrix_get(Pab, nc_total, index_ww); + double PP_yy = gsl_matrix_get(PPab, nc_total, index_ww); + double PPP_yy = gsl_matrix_get(PPPab, nc_total, index_ww); + double yPKPy = (P_yy - PP_yy) / l; + double yPKPKPy = (P_yy + PPP_yy - 2.0 * PP_yy) / (l * l); + + *dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy; + *dev2 = 0.5 * trace_PKPK - + 0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy); + + gsl_matrix_free(Pab); + gsl_matrix_free(PPab); + gsl_matrix_free(PPPab); + gsl_vector_free(Hi_eval); + gsl_vector_free(HiHi_eval); + gsl_vector_free(HiHiHi_eval); + gsl_vector_free(v_temp); + + return; +} - if (p->e_mode!=0) {trace_Hi=(double)ni_test-trace_Hi;} +void LMM::CalcRLWald(const double &l, const FUNC_PARAM ¶ms, double &beta, + double &se, double &p_wald) { + size_t n_cvt = params.n_cvt; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + int df = (int)ni_test - (int)n_cvt - 1; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size); + gsl_vector *v_temp = gsl_vector_alloc(params.eval->size); + + gsl_vector_memcpy(v_temp, params.eval); + gsl_vector_scale(v_temp, l); + if (params.e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + CalcPab(n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab); + + size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + size_t index_xx = GetabIndex(n_cvt + 1, n_cvt + 1, n_cvt); + size_t index_xy = GetabIndex(n_cvt + 2, n_cvt + 1, n_cvt); + double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy); + double P_xx = gsl_matrix_get(Pab, n_cvt, index_xx); + double P_xy = gsl_matrix_get(Pab, n_cvt, index_xy); + double Px_yy = gsl_matrix_get(Pab, n_cvt + 1, index_yy); + + beta = P_xy / P_xx; + double tau = (double)df / Px_yy; + se = sqrt(1.0 / (tau * P_xx)); + p_wald = gsl_cdf_fdist_Q((P_yy - Px_yy) * tau, 1.0, df); + + gsl_matrix_free(Pab); + gsl_vector_free(Hi_eval); + gsl_vector_free(v_temp); + return; +} - CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); - CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); +void LMM::CalcRLScore(const double &l, const FUNC_PARAM ¶ms, double &beta, + double &se, double &p_score) { + size_t n_cvt = params.n_cvt; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + int df = (int)ni_test - (int)n_cvt - 1; + + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size); + gsl_vector *v_temp = gsl_vector_alloc(params.eval->size); + + gsl_vector_memcpy(v_temp, params.eval); + gsl_vector_scale(v_temp, l); + if (params.e_mode == 0) { + gsl_vector_set_all(Hi_eval, 1.0); + } else { + gsl_vector_memcpy(Hi_eval, v_temp); + } + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + CalcPab(n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab); + + size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + size_t index_xx = GetabIndex(n_cvt + 1, n_cvt + 1, n_cvt); + size_t index_xy = GetabIndex(n_cvt + 2, n_cvt + 1, n_cvt); + double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy); + double P_xx = gsl_matrix_get(Pab, n_cvt, index_xx); + double P_xy = gsl_matrix_get(Pab, n_cvt, index_xy); + double Px_yy = gsl_matrix_get(Pab, n_cvt + 1, index_yy); + + beta = P_xy / P_xx; + double tau = (double)df / Px_yy; + se = sqrt(1.0 / (tau * P_xx)); + + p_score = + gsl_cdf_fdist_Q((double)ni_test * P_xy * P_xy / (P_yy * P_xx), 1.0, df); + + gsl_matrix_free(Pab); + gsl_vector_free(Hi_eval); + gsl_vector_free(v_temp); + return; +} - double trace_HiK=((double)ni_test-trace_Hi)/l; +void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) { + size_t index_ab; + size_t n_cvt = UtW->size2; - index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); + gsl_vector *u_a = gsl_vector_alloc(Uty->size); - double P_yy=gsl_matrix_get (Pab, nc_total, index_yy); - double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy); - double yPKPy=(P_yy-PP_yy)/l; - dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy; + for (size_t a = 1; a <= n_cvt + 2; ++a) { + if (a == n_cvt + 1) { + continue; + } - gsl_matrix_free (Pab); - gsl_matrix_free (PPab); - gsl_vector_free (Hi_eval); - gsl_vector_free (HiHi_eval); - gsl_vector_free (v_temp); + if (a == n_cvt + 2) { + gsl_vector_memcpy(u_a, Uty); + } else { + gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, a - 1); + gsl_vector_memcpy(u_a, &UtW_col.vector); + } - return dev1; -} + for (size_t b = a; b >= 1; --b) { + if (b == n_cvt + 1) { + continue; + } -double LogL_dev2 (double l, void *params) { - FUNC_PARAM *p=(FUNC_PARAM *) params; - size_t n_cvt=p->n_cvt; - size_t ni_test=p->ni_test; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - size_t nc_total; - if (p->calc_null==true) { - nc_total=n_cvt; - } else { - nc_total=n_cvt+1; - } - - double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0; - size_t index_yy; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); - - gsl_vector_memcpy (v_temp, p->eval); - gsl_vector_scale (v_temp, l); - if (p->e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - gsl_vector_memcpy (HiHi_eval, Hi_eval); - gsl_vector_mul (HiHi_eval, Hi_eval); - gsl_vector_memcpy (HiHiHi_eval, HiHi_eval); - gsl_vector_mul (HiHiHi_eval, Hi_eval); - - gsl_vector_set_all (v_temp, 1.0); - gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); - gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi); - - if (p->e_mode!=0) { - trace_Hi=(double)ni_test-trace_Hi; - trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test; - } - - CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); - CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, - PPPab); - - double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l); - - index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - double P_yy=gsl_matrix_get (Pab, nc_total, index_yy); - double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy); - double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy); - - double yPKPy=(P_yy-PP_yy)/l; - double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); - - dev2=0.5*trace_HiKHiK-0.5*(double)ni_test* - (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); - - gsl_matrix_free (Pab); - gsl_matrix_free (PPab); - gsl_matrix_free (PPPab); - gsl_vector_free (Hi_eval); - gsl_vector_free (HiHi_eval); - gsl_vector_free (HiHiHi_eval); - gsl_vector_free (v_temp); - - return dev2; -} + index_ab = GetabIndex(a, b, n_cvt); + gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab); -void LogL_dev12 (double l, void *params, double *dev1, double *dev2) { - FUNC_PARAM *p=(FUNC_PARAM *) params; - size_t n_cvt=p->n_cvt; - size_t ni_test=p->ni_test; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - size_t nc_total; - if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;} - - double trace_Hi=0.0, trace_HiHi=0.0; - size_t index_yy; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); - - gsl_vector_memcpy (v_temp, p->eval); - gsl_vector_scale (v_temp, l); - if (p->e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - gsl_vector_memcpy (HiHi_eval, Hi_eval); - gsl_vector_mul (HiHi_eval, Hi_eval); - gsl_vector_memcpy (HiHiHi_eval, HiHi_eval); - gsl_vector_mul (HiHiHi_eval, Hi_eval); - - gsl_vector_set_all (v_temp, 1.0); - gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); - gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi); - - if (p->e_mode!=0) { - trace_Hi=(double)ni_test-trace_Hi; - trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test; - } - - CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); - CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, - PPPab); - - double trace_HiK=((double)ni_test-trace_Hi)/l; - double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l); - - index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - - double P_yy=gsl_matrix_get (Pab, nc_total, index_yy); - double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy); - double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy); - - double yPKPy=(P_yy-PP_yy)/l; - double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); - - *dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy; - *dev2=0.5*trace_HiKHiK-0.5*(double)ni_test* - (2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); - - gsl_matrix_free (Pab); - gsl_matrix_free (PPab); - gsl_matrix_free (PPPab); - gsl_vector_free (Hi_eval); - gsl_vector_free (HiHi_eval); - gsl_vector_free (HiHiHi_eval); - gsl_vector_free (v_temp); - - return; -} + if (b == n_cvt + 2) { + gsl_vector_memcpy(&Uab_col.vector, Uty); + } else { + gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1); + gsl_vector_memcpy(&Uab_col.vector, &UtW_col.vector); + } -double LogRL_f (double l, void *params) { - FUNC_PARAM *p=(FUNC_PARAM *) params; - size_t n_cvt=p->n_cvt; - size_t ni_test=p->ni_test; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - double df; - size_t nc_total; - if (p->calc_null==true) { - nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; - } - else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;} - - double f=0.0, logdet_h=0.0, logdet_hiw=0.0, d; - size_t index_ww; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *Iab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); - - gsl_vector_memcpy (v_temp, p->eval); - gsl_vector_scale (v_temp, l); - if (p->e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - for (size_t i=0; i<(p->eval)->size; ++i) { - d=gsl_vector_get (v_temp, i); - logdet_h+=log(fabs(d)); - } - - CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); - gsl_vector_set_all (v_temp, 1.0); - CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab); - - // Calculate |WHiW|-|WW|. - logdet_hiw=0.0; - for (size_t i=0; i<nc_total; ++i) { - index_ww=GetabIndex (i+1, i+1, n_cvt); - d=gsl_matrix_get (Pab, i, index_ww); - logdet_hiw+=log(d); - d=gsl_matrix_get (Iab, i, index_ww); - logdet_hiw-=log(d); - } - index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - double P_yy=gsl_matrix_get (Pab, nc_total, index_ww); - - double c=0.5*df*(log(df)-log(2*M_PI)-1.0); - f=c-0.5*logdet_h-0.5*logdet_hiw-0.5*df*log(P_yy); - - gsl_matrix_free (Pab); - gsl_matrix_free (Iab); - gsl_vector_free (Hi_eval); - gsl_vector_free (v_temp); - return f; -} + gsl_vector_mul(&Uab_col.vector, u_a); + } + } -double LogRL_dev1 (double l, void *params) { - FUNC_PARAM *p=(FUNC_PARAM *) params; - size_t n_cvt=p->n_cvt; - size_t ni_test=p->ni_test; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - double df; - size_t nc_total; - if (p->calc_null==true) { - nc_total=n_cvt; - df=(double)ni_test-(double)n_cvt; - } - else { - nc_total=n_cvt+1; - df=(double)ni_test-(double)n_cvt-1.0; - } - - double dev1=0.0, trace_Hi=0.0; - size_t index_ww; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); - - gsl_vector_memcpy (v_temp, p->eval); - gsl_vector_scale (v_temp, l); - if (p->e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - gsl_vector_memcpy (HiHi_eval, Hi_eval); - gsl_vector_mul (HiHi_eval, Hi_eval); - - gsl_vector_set_all (v_temp, 1.0); - gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); - - if (p->e_mode!=0) { - trace_Hi=(double)ni_test-trace_Hi; - } - - CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); - CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - - // Calculate tracePK and trace PKPK. - double trace_P=trace_Hi; - double ps_ww, ps2_ww; - for (size_t i=0; i<nc_total; ++i) { - index_ww=GetabIndex (i+1, i+1, n_cvt); - ps_ww=gsl_matrix_get (Pab, i, index_ww); - ps2_ww=gsl_matrix_get (PPab, i, index_ww); - trace_P-=ps2_ww/ps_ww; - } - double trace_PK=(df-trace_P)/l; - - // Calculate yPKPy, yPKPKPy. - index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - double P_yy=gsl_matrix_get (Pab, nc_total, index_ww); - double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww); - double yPKPy=(P_yy-PP_yy)/l; - - dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy; - - gsl_matrix_free (Pab); - gsl_matrix_free (PPab); - gsl_vector_free (Hi_eval); - gsl_vector_free (HiHi_eval); - gsl_vector_free (v_temp); - - return dev1; + gsl_vector_free(u_a); + return; } -double LogRL_dev2 (double l, void *params) { - FUNC_PARAM *p=(FUNC_PARAM *) params; - size_t n_cvt=p->n_cvt; - size_t ni_test=p->ni_test; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - double df; - size_t nc_total; - if (p->calc_null==true) { - nc_total=n_cvt; - df=(double)ni_test-(double)n_cvt; - } - else { - nc_total=n_cvt+1; - df=(double)ni_test-(double)n_cvt-1.0; - } - - double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0; - size_t index_ww; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); - - gsl_vector_memcpy (v_temp, p->eval); - gsl_vector_scale (v_temp, l); - if (p->e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - gsl_vector_memcpy (HiHi_eval, Hi_eval); - gsl_vector_mul (HiHi_eval, Hi_eval); - gsl_vector_memcpy (HiHiHi_eval, HiHi_eval); - gsl_vector_mul (HiHiHi_eval, Hi_eval); - - gsl_vector_set_all (v_temp, 1.0); - gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); - gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi); - - if (p->e_mode!=0) { - trace_Hi=(double)ni_test-trace_Hi; - trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test; - } - - CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); - CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, - PPab, PPPab); - - // Calculate tracePK and trace PKPK. - double trace_P=trace_Hi, trace_PP=trace_HiHi; - double ps_ww, ps2_ww, ps3_ww; - for (size_t i=0; i<nc_total; ++i) { - index_ww=GetabIndex (i+1, i+1, n_cvt); - ps_ww=gsl_matrix_get (Pab, i, index_ww); - ps2_ww=gsl_matrix_get (PPab, i, index_ww); - ps3_ww=gsl_matrix_get (PPPab, i, index_ww); - trace_P-=ps2_ww/ps_ww; - trace_PP+=ps2_ww*ps2_ww/(ps_ww*ps_ww)-2.0*ps3_ww/ps_ww; - } - double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l); - - // Calculate yPKPy, yPKPKPy. - index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - double P_yy=gsl_matrix_get (Pab, nc_total, index_ww); - double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww); - double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww); - double yPKPy=(P_yy-PP_yy)/l; - double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); - - dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy); - - gsl_matrix_free (Pab); - gsl_matrix_free (PPab); - gsl_matrix_free (PPPab); - gsl_vector_free (Hi_eval); - gsl_vector_free (HiHi_eval); - gsl_vector_free (HiHiHi_eval); - gsl_vector_free (v_temp); - - return dev2; -} +void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_vector *Utx, gsl_matrix *Uab) { + size_t index_ab; + size_t n_cvt = UtW->size2; + + for (size_t b = 1; b <= n_cvt + 2; ++b) { + index_ab = GetabIndex(n_cvt + 1, b, n_cvt); + gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab); + + if (b == n_cvt + 2) { + gsl_vector_memcpy(&Uab_col.vector, Uty); + } else if (b == n_cvt + 1) { + gsl_vector_memcpy(&Uab_col.vector, Utx); + } else { + gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1); + gsl_vector_memcpy(&Uab_col.vector, &UtW_col.vector); + } -void LogRL_dev12 (double l, void *params, double *dev1, double *dev2) { - FUNC_PARAM *p=(FUNC_PARAM *) params; - size_t n_cvt=p->n_cvt; - size_t ni_test=p->ni_test; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - double df; - size_t nc_total; - if (p->calc_null==true) { - nc_total=n_cvt; - df=(double)ni_test-(double)n_cvt; - } - else { - nc_total=n_cvt+1; - df=(double)ni_test-(double)n_cvt-1.0; - } - - double trace_Hi=0.0, trace_HiHi=0.0; - size_t index_ww; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size); - gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size); - - gsl_vector_memcpy (v_temp, p->eval); - gsl_vector_scale (v_temp, l); - if (p->e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - gsl_vector_memcpy (HiHi_eval, Hi_eval); - gsl_vector_mul (HiHi_eval, Hi_eval); - gsl_vector_memcpy (HiHiHi_eval, HiHi_eval); - gsl_vector_mul (HiHiHi_eval, Hi_eval); - - gsl_vector_set_all (v_temp, 1.0); - gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi); - gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi); - - if (p->e_mode!=0) { - trace_Hi=(double)ni_test-trace_Hi; - trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test; - } - - CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); - CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); - CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, - PPab, PPPab); - - // Calculate tracePK and trace PKPK. - double trace_P=trace_Hi, trace_PP=trace_HiHi; - double ps_ww, ps2_ww, ps3_ww; - for (size_t i=0; i<nc_total; ++i) { - index_ww=GetabIndex (i+1, i+1, n_cvt); - ps_ww=gsl_matrix_get (Pab, i, index_ww); - ps2_ww=gsl_matrix_get (PPab, i, index_ww); - ps3_ww=gsl_matrix_get (PPPab, i, index_ww); - trace_P-=ps2_ww/ps_ww; - trace_PP+=ps2_ww*ps2_ww/(ps_ww*ps_ww)-2.0*ps3_ww/ps_ww; - } - double trace_PK=(df-trace_P)/l; - double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l); - - // Calculate yPKPy, yPKPKPy. - index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - double P_yy=gsl_matrix_get (Pab, nc_total, index_ww); - double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww); - double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww); - double yPKPy=(P_yy-PP_yy)/l; - double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l); - - *dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy; - *dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/ - (P_yy*P_yy); - - gsl_matrix_free (Pab); - gsl_matrix_free (PPab); - gsl_matrix_free (PPPab); - gsl_vector_free (Hi_eval); - gsl_vector_free (HiHi_eval); - gsl_vector_free (HiHiHi_eval); - gsl_vector_free (v_temp); - - return; -} + gsl_vector_mul(&Uab_col.vector, Utx); + } -void LMM::CalcRLWald (const double &l, const FUNC_PARAM ¶ms, - double &beta, double &se, double &p_wald) { - size_t n_cvt=params.n_cvt; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - int df=(int)ni_test-(int)n_cvt-1; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size); - gsl_vector *v_temp=gsl_vector_alloc(params.eval->size); - - gsl_vector_memcpy (v_temp, params.eval); - gsl_vector_scale (v_temp, l); - if (params.e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab); - - size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt); - size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt); - double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy); - double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx); - double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy); - double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy); - - beta=P_xy/P_xx; - double tau=(double)df/Px_yy; - se=sqrt(1.0/(tau*P_xx)); - p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df); - - gsl_matrix_free (Pab); - gsl_vector_free (Hi_eval); - gsl_vector_free (v_temp); - return; + return; } -void LMM::CalcRLScore (const double &l, const FUNC_PARAM ¶ms, - double &beta, double &se, double &p_score) { - size_t n_cvt=params.n_cvt; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - int df=(int)ni_test-(int)n_cvt-1; - - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size); - gsl_vector *v_temp=gsl_vector_alloc(params.eval->size); - - gsl_vector_memcpy (v_temp, params.eval); - gsl_vector_scale (v_temp, l); - if (params.e_mode==0) { - gsl_vector_set_all (Hi_eval, 1.0); - } else { - gsl_vector_memcpy (Hi_eval, v_temp); - } - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab); - - size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt); - size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt); - double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy); - double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx); - double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy); - double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy); - - beta=P_xy/P_xx; - double tau=(double)df/Px_yy; - se=sqrt(1.0/(tau*P_xx)); - - p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), - 1.0, df); - - gsl_matrix_free (Pab); - gsl_vector_free (Hi_eval); - gsl_vector_free (v_temp); - return; -} +void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) { + size_t index_ab; + size_t n_cvt = W->size2; -void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) { - size_t index_ab; - size_t n_cvt=UtW->size2; - - gsl_vector *u_a=gsl_vector_alloc (Uty->size); - - for (size_t a=1; a<=n_cvt+2; ++a) { - if (a==n_cvt+1) {continue;} - - if (a==n_cvt+2) {gsl_vector_memcpy (u_a, Uty);} - else { - gsl_vector_const_view UtW_col= - gsl_matrix_const_column (UtW, a-1); - gsl_vector_memcpy (u_a, &UtW_col.vector); - } - - for (size_t b=a; b>=1; --b) { - if (b==n_cvt+1) {continue;} - - index_ab=GetabIndex (a, b, n_cvt); - gsl_vector_view Uab_col= - gsl_matrix_column (Uab, index_ab); - - if (b==n_cvt+2) { - gsl_vector_memcpy (&Uab_col.vector, Uty); - } - else { - gsl_vector_const_view UtW_col= - gsl_matrix_const_column (UtW, b-1); - gsl_vector_memcpy (&Uab_col.vector, - &UtW_col.vector); - } - - gsl_vector_mul(&Uab_col.vector, u_a); - } - } - - gsl_vector_free (u_a); - return; -} + double d; + gsl_vector *v_a = gsl_vector_alloc(y->size); + gsl_vector *v_b = gsl_vector_alloc(y->size); -void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_vector *Utx, gsl_matrix *Uab) { - size_t index_ab; - size_t n_cvt=UtW->size2; - - for (size_t b=1; b<=n_cvt+2; ++b) { - index_ab=GetabIndex (n_cvt+1, b, n_cvt); - gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab); - - if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);} - else if (b==n_cvt+1) { - gsl_vector_memcpy (&Uab_col.vector, Utx); - } - else { - gsl_vector_const_view UtW_col= - gsl_matrix_const_column (UtW, b-1); - gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector); - } - - gsl_vector_mul(&Uab_col.vector, Utx); - } - - return; -} + for (size_t a = 1; a <= n_cvt + 2; ++a) { + if (a == n_cvt + 1) { + continue; + } -void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) { - size_t index_ab; - size_t n_cvt=W->size2; - - double d; - gsl_vector *v_a=gsl_vector_alloc (y->size); - gsl_vector *v_b=gsl_vector_alloc (y->size); - - for (size_t a=1; a<=n_cvt+2; ++a) { - if (a==n_cvt+1) {continue;} - - if (a==n_cvt+2) { - gsl_vector_memcpy (v_a, y); - } - else { - gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1); - gsl_vector_memcpy (v_a, &W_col.vector); - } - - for (size_t b=a; b>=1; --b) { - if (b==n_cvt+1) {continue;} - - index_ab=GetabIndex (a, b, n_cvt); - - if (b==n_cvt+2) { - gsl_vector_memcpy (v_b, y); - } - else { - gsl_vector_const_view W_col= - gsl_matrix_const_column (W, b-1); - gsl_vector_memcpy (v_b, &W_col.vector); - } - - gsl_blas_ddot (v_a, v_b, &d); - gsl_vector_set(ab, index_ab, d); - } - } - - gsl_vector_free (v_a); - gsl_vector_free (v_b); - return; + if (a == n_cvt + 2) { + gsl_vector_memcpy(v_a, y); + } else { + gsl_vector_const_view W_col = gsl_matrix_const_column(W, a - 1); + gsl_vector_memcpy(v_a, &W_col.vector); + } + + for (size_t b = a; b >= 1; --b) { + if (b == n_cvt + 1) { + continue; + } + + index_ab = GetabIndex(a, b, n_cvt); + + if (b == n_cvt + 2) { + gsl_vector_memcpy(v_b, y); + } else { + gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1); + gsl_vector_memcpy(v_b, &W_col.vector); + } + + gsl_blas_ddot(v_a, v_b, &d); + gsl_vector_set(ab, index_ab, d); + } + } + + gsl_vector_free(v_a); + gsl_vector_free(v_b); + return; } -void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, - gsl_vector *ab) { - size_t index_ab; - size_t n_cvt=W->size2; +void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, + gsl_vector *ab) { + size_t index_ab; + size_t n_cvt = W->size2; - double d; - gsl_vector *v_b=gsl_vector_alloc (y->size); + double d; + gsl_vector *v_b = gsl_vector_alloc(y->size); - for (size_t b=1; b<=n_cvt+2; ++b) { - index_ab=GetabIndex (n_cvt+1, b, n_cvt); + for (size_t b = 1; b <= n_cvt + 2; ++b) { + index_ab = GetabIndex(n_cvt + 1, b, n_cvt); - if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);} - else if (b==n_cvt+1) {gsl_vector_memcpy (v_b, x);} - else { - gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1); - gsl_vector_memcpy (v_b, &W_col.vector); - } + if (b == n_cvt + 2) { + gsl_vector_memcpy(v_b, y); + } else if (b == n_cvt + 1) { + gsl_vector_memcpy(v_b, x); + } else { + gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1); + gsl_vector_memcpy(v_b, &W_col.vector); + } - gsl_blas_ddot (x, v_b, &d); - gsl_vector_set(ab, index_ab, d); - } + gsl_blas_ddot(x, v_b, &d); + gsl_vector_set(ab, index_ab, d); + } - gsl_vector_free (v_b); - return; + gsl_vector_free(v_b); + return; } -void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Utx, - const gsl_matrix *W, const gsl_vector *x) { - igzstream infile (file_gene.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading gene expression file:"<<file_gene<<endl; - return; - } - - clock_t time_start=clock(); - - string line; - char *ch_ptr; - - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; - double p_lrt=0, p_score=0; - double logl_H1=0.0, logl_H0=0.0, l_H0; - int c_phen; - string rs; // Gene id. - double d; - - // Calculate basic quantities. - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - gsl_vector *y=gsl_vector_alloc (U->size1); - gsl_vector *Uty=gsl_vector_alloc (U->size2); - gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); - - // Header. - getline(infile, line); - - for (size_t t=0; t<ng_total; t++) { - !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==ng_total-1) { - ProgressBar ("Performing Analysis ", t, ng_total-1); - } - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; - - c_phen=0; - for (size_t i=0; i<indicator_idv.size(); ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} - - d=atof(ch_ptr); - gsl_vector_set(y, c_phen, d); - - c_phen++; - } - - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Calculate null. - time_start=clock(); - - gsl_matrix_set_zero (Uab); - - CalcUab (UtW, Uty, Uab); - FUNC_PARAM param0={false, ni_test, n_cvt, eval, Uab, ab, 0}; - - if (a_mode==2 || a_mode==3 || a_mode==4) { - CalcLambda('L', param0, l_min, l_max, n_region, - l_H0, logl_H0); - } - - // Calculate alternative. - CalcUab(UtW, Uty, Utx, Uab); - FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0}; - - //3 is before 1. - if (a_mode==3 || a_mode==4) { - CalcRLScore (l_H0, param1, beta, se, p_score); - } - - if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, - lambda_remle, logl_H1); - CalcRLWald (lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, - lambda_mle, logl_H1); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); - } - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout<<endl; - - gsl_vector_free (y); - gsl_vector_free (Uty); - gsl_matrix_free (Uab); - gsl_vector_free (ab); - - infile.close(); - infile.clear(); +void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Utx, + const gsl_matrix *W, const gsl_vector *x) { + igzstream infile(file_gene.c_str(), igzstream::in); + if (!infile) { + cout << "error reading gene expression file:" << file_gene << endl; + return; + } + + clock_t time_start = clock(); + + string line; + char *ch_ptr; + + double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; + double p_lrt = 0, p_score = 0; + double logl_H1 = 0.0, logl_H0 = 0.0, l_H0; + int c_phen; + string rs; // Gene id. + double d; + + // Calculate basic quantities. + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + gsl_vector *y = gsl_vector_alloc(U->size1); + gsl_vector *Uty = gsl_vector_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); + + // Header. + getline(infile, line); + + for (size_t t = 0; t < ng_total; t++) { + !safeGetline(infile, line).eof(); + if (t % d_pace == 0 || t == ng_total - 1) { + ProgressBar("Performing Analysis ", t, ng_total - 1); + } + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; + + c_phen = 0; + for (size_t i = 0; i < indicator_idv.size(); ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } + + d = atof(ch_ptr); + gsl_vector_set(y, c_phen, d); + + c_phen++; + } + + time_start = clock(); + gsl_blas_dgemv(CblasTrans, 1.0, U, y, 0.0, Uty); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Calculate null. + time_start = clock(); + + gsl_matrix_set_zero(Uab); + + CalcUab(UtW, Uty, Uab); + FUNC_PARAM param0 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; + + if (a_mode == 2 || a_mode == 3 || a_mode == 4) { + CalcLambda('L', param0, l_min, l_max, n_region, l_H0, logl_H0); + } + + // Calculate alternative. + CalcUab(UtW, Uty, Utx, Uab); + FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + CalcRLScore(l_H0, param1, beta, se, p_score); + } + + if (a_mode == 1 || a_mode == 4) { + CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); + CalcRLWald(lambda_remle, param1, beta, se, p_wald); + } + + if (a_mode == 2 || a_mode == 4) { + CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1); + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + } + cout << endl; + + gsl_vector_free(y); + gsl_vector_free(Uty); + gsl_matrix_free(Uab); + gsl_vector_free(ab); + + infile.close(); + infile.clear(); - return; + return; } -void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return; - } - - clock_t time_start=clock(); - - string line; - char *ch_ptr; - - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; - double p_lrt=0, p_score=0; - double logl_H1=0.0; - int n_miss, c_phen; - double geno, x_mean; - - // Calculate basic quantities. - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - gsl_vector *x=gsl_vector_alloc (U->size1); - gsl_vector *x_miss=gsl_vector_alloc (U->size1); - gsl_vector *Utx=gsl_vector_alloc (U->size2); - gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - gsl_matrix_set_zero (Uab); - CalcUab (UtW, Uty, Uab); - - //start reading genotypes and analyze - size_t c=0, t_last=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (indicator_snp[t]==0) {continue;} - t_last++; - } - for (size_t t=0; t<indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i<ni_total; ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} - - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(x_miss, c_phen, 0.0); n_miss++; - } - else { - geno=atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) { - gsl_vector_set(x, i, x_mean); - } - } - - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); - gsl_vector_memcpy (&Xlarge_col.vector, x); - c++; - - if (c%msize==0 || c==t_last) { - size_t l=0; - if (c%msize==0) {l=msize;} else {l=c%msize;} - - gsl_matrix_view Xlarge_sub= - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub= - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, - 0.0, &UtXlarge_sub.matrix); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_matrix_set_zero (Xlarge); - - for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col= - gsl_matrix_column (UtXlarge, i); - gsl_vector_memcpy (Utx, &UtXlarge_col.vector); - - CalcUab(UtW, Uty, Utx, Uab); - - time_start=clock(); - FUNC_PARAM param1= - {false, ni_test, n_cvt, eval, Uab, ab, 0}; - - // 3 is before 1. - if (a_mode==3 || a_mode==4) { - CalcRLScore (l_mle_null, param1, beta, se, p_score); - } - - if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, - lambda_remle, logl_H1); - CalcRLWald (lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, - lambda_mle, logl_H1); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1); - } - - time_opt+=(clock()-time_start)/ - (double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; - - sumStat.push_back(SNPs); - } - } - } - cout<<endl; - - gsl_vector_free (x); - gsl_vector_free (x_miss); - gsl_vector_free (Utx); - gsl_matrix_free (Uab); - gsl_vector_free (ab); - - gsl_matrix_free (Xlarge); - gsl_matrix_free (UtXlarge); - - infile.close(); - infile.clear(); - - return; +void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return; + } + + clock_t time_start = clock(); + + string line; + char *ch_ptr; + + double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; + double p_lrt = 0, p_score = 0; + double logl_H1 = 0.0; + int n_miss, c_phen; + double geno, x_mean; + + // Calculate basic quantities. + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + gsl_vector *x = gsl_vector_alloc(U->size1); + gsl_vector *x_miss = gsl_vector_alloc(U->size1); + gsl_vector *Utx = gsl_vector_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix_set_zero(Xlarge); + + gsl_matrix_set_zero(Uab); + CalcUab(UtW, Uty, Uab); + + // start reading genotypes and analyze + size_t c = 0, t_last = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (indicator_snp[t] == 0) { + continue; + } + t_last++; + } + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + gsl_vector_set_zero(x_miss); + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } + + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + geno = atof(ch_ptr); + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean); + } + } + + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); + gsl_vector_memcpy(&Xlarge_col.vector, x); + c++; + + if (c % msize == 0 || c == t_last) { + size_t l = 0; + if (c % msize == 0) { + l = msize; + } else { + l = c % msize; + } + + gsl_matrix_view Xlarge_sub = + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub = + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + + time_start = clock(); + eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, + &UtXlarge_sub.matrix); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Xlarge); + + for (size_t i = 0; i < l; i++) { + gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); + gsl_vector_memcpy(Utx, &UtXlarge_col.vector); + + CalcUab(UtW, Uty, Utx, Uab); + + time_start = clock(); + FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + CalcRLScore(l_mle_null, param1, beta, se, p_score); + } + + if (a_mode == 1 || a_mode == 4) { + CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, + logl_H1); + CalcRLWald(lambda_remle, param1, beta, se, p_wald); + } + + if (a_mode == 2 || a_mode == 4) { + CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; + + sumStat.push_back(SNPs); + } + } + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(x_miss); + gsl_vector_free(Utx); + gsl_matrix_free(Uab); + gsl_vector_free(ab); + + gsl_matrix_free(Xlarge); + gsl_matrix_free(UtXlarge); + + infile.close(); + infile.clear(); + + return; } -void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y) { - string file_bed=file_bfile+".bed"; - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;} - - clock_t time_start=clock(); - - char ch[1]; - bitset<8> b; - - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; - double p_lrt=0, p_score=0; - double logl_H1=0.0; - int n_bit, n_miss, ci_total, ci_test; - double geno, x_mean; - - // Calculate basic quantities. - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - gsl_vector *x=gsl_vector_alloc (U->size1); - gsl_vector *Utx=gsl_vector_alloc (U->size2); - gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - gsl_matrix_set_zero (Uab); - CalcUab (UtW, Uty, Uab); - - // Calculate n_bit and c, the number of bit for each SNP. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - size_t c=0, t_last=0; - for (size_t t=0; t<snpInfo.size(); ++t) { - if (indicator_snp[t]==0) {continue;} - t_last++; - } - for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) { - ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - x_mean=0.0; n_miss=0; ci_total=0; ci_test=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && ci_total==(int)ni_total) { - break; - } - if (indicator_idv[ci_total]==0) { - ci_total++; - continue; - } - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(x, ci_test, 2); - x_mean+=2.0; - } - else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } - } - else { - if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } - else {gsl_vector_set(x, ci_test, -9); n_miss++; } - } - - ci_total++; - ci_test++; - } - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - geno=gsl_vector_get(x,i); - if (geno==-9) { - gsl_vector_set(x, i, x_mean); - geno=x_mean; - } - } - - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); - gsl_vector_memcpy (&Xlarge_col.vector, x); - c++; - - if (c%msize==0 || c==t_last) { - size_t l=0; - if (c%msize==0) {l=msize;} else {l=c%msize;} - - gsl_matrix_view Xlarge_sub= - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub= - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, - 0.0, &UtXlarge_sub.matrix); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_matrix_set_zero (Xlarge); - - for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col= - gsl_matrix_column (UtXlarge, i); - gsl_vector_memcpy (Utx, &UtXlarge_col.vector); - - CalcUab(UtW, Uty, Utx, Uab); - - time_start=clock(); - FUNC_PARAM param1={false, ni_test, n_cvt, eval, - Uab, ab, 0}; - - // 3 is before 1, for beta. - if (a_mode==3 || a_mode==4) { - CalcRLScore (l_mle_null, param1, beta, se, p_score); - } - - if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, - lambda_remle, logl_H1); - CalcRLWald (lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, - lambda_mle, logl_H1); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1); - } - - time_opt+=(clock()-time_start)/ - (double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - } - } - cout<<endl; - - gsl_vector_free (x); - gsl_vector_free (Utx); - gsl_matrix_free (Uab); - gsl_vector_free (ab); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); - - infile.close(); - infile.clear(); - - return; +void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y) { + string file_bed = file_bfile + ".bed"; + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return; + } + + clock_t time_start = clock(); + + char ch[1]; + bitset<8> b; + + double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; + double p_lrt = 0, p_score = 0; + double logl_H1 = 0.0; + int n_bit, n_miss, ci_total, ci_test; + double geno, x_mean; + + // Calculate basic quantities. + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + gsl_vector *x = gsl_vector_alloc(U->size1); + gsl_vector *Utx = gsl_vector_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix_set_zero(Xlarge); + + gsl_matrix_set_zero(Uab); + CalcUab(UtW, Uty, Uab); + + // Calculate n_bit and c, the number of bit for each SNP. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + size_t c = 0, t_last = 0; + for (size_t t = 0; t < snpInfo.size(); ++t) { + if (indicator_snp[t] == 0) { + continue; + } + t_last++; + } + for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { + if (t % d_pace == 0 || t == snpInfo.size() - 1) { + ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + x_mean = 0.0; + n_miss = 0; + ci_total = 0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == (int)ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(x, ci_test, 2); + x_mean += 2.0; + } else { + gsl_vector_set(x, ci_test, 1); + x_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(x, ci_test, 0); + } else { + gsl_vector_set(x, ci_test, -9); + n_miss++; + } + } + + ci_total++; + ci_test++; + } + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + geno = gsl_vector_get(x, i); + if (geno == -9) { + gsl_vector_set(x, i, x_mean); + geno = x_mean; + } + } + + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); + gsl_vector_memcpy(&Xlarge_col.vector, x); + c++; + + if (c % msize == 0 || c == t_last) { + size_t l = 0; + if (c % msize == 0) { + l = msize; + } else { + l = c % msize; + } + + gsl_matrix_view Xlarge_sub = + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub = + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + + time_start = clock(); + eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, + &UtXlarge_sub.matrix); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Xlarge); + + for (size_t i = 0; i < l; i++) { + gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); + gsl_vector_memcpy(Utx, &UtXlarge_col.vector); + + CalcUab(UtW, Uty, Utx, Uab); + + time_start = clock(); + FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; + + // 3 is before 1, for beta. + if (a_mode == 3 || a_mode == 4) { + CalcRLScore(l_mle_null, param1, beta, se, p_score); + } + + if (a_mode == 1 || a_mode == 4) { + CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, + logl_H1); + CalcRLWald(lambda_remle, param1, beta, se, p_wald); + } + + if (a_mode == 2 || a_mode == 4) { + CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + } + } + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(Utx); + gsl_matrix_free(Uab); + gsl_vector_free(ab); + + gsl_matrix_free(Xlarge); + gsl_matrix_free(UtXlarge); + + infile.close(); + infile.clear(); + + return; } // WJA added. -void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y) { - string file_bgen=file_oxford+".bgen"; - ifstream infile (file_bgen.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bgen file:"<<file_bgen<<endl; - return; - } - - clock_t time_start=clock(); - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; - double p_lrt=0, p_score=0; - double logl_H1=0.0; - int n_miss, c_phen; - double geno, x_mean; - - // Calculate basic quantities. - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - gsl_vector *x=gsl_vector_alloc (U->size1); - gsl_vector *x_miss=gsl_vector_alloc (U->size1); - gsl_vector *Utx=gsl_vector_alloc (U->size2); - gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - gsl_matrix_set_zero (Uab); - CalcUab (UtW, Uty, Uab); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4); - infile.read(reinterpret_cast<char*>(&bgen_header_length),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsnps),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsamples),4); - bgen_snp_block_offset-=4; - infile.ignore(4+bgen_header_length-20); - bgen_snp_block_offset-=4+bgen_header_length-20; - infile.read(reinterpret_cast<char*>(&bgen_flags),4); - bgen_snp_block_offset-=4; - bool CompressedSNPBlocks=bgen_flags&0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB; - double bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - std::cout << "Warning: WJA hard coded SNP missingness " << - "threshold of 10%"<<std::endl; - - // Start reading genotypes and analyze. - size_t c=0, t_last=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (indicator_snp[t]==0) {continue;} - t_last++; - } - for (size_t t=0; t<indicator_snp.size(); ++t) - { - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - if (indicator_snp[t]==0) {continue;} - - // Read SNP header. - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char*>(&bgen_N),4); - infile.read(reinterpret_cast<char*>(&bgen_LS),2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char*>(&bgen_LR),2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char*>(&bgen_LC),2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4); - - infile.read(reinterpret_cast<char*>(&bgen_LA),4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - - infile.read(reinterpret_cast<char*>(&bgen_LB),4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3*bgen_N]; - - if (indicator_snp[t]==0) { - if(CompressedSNPBlocks) - infile.read(reinterpret_cast<char*>(&bgen_P),4); - else - bgen_P=6*bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if(CompressedSNPBlocks) { - infile.read(reinterpret_cast<char*>(&bgen_P),4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size=6*bgen_N; - - infile.read(reinterpret_cast<char*>(zipped_data), - bgen_P); - - int result= - uncompress(reinterpret_cast<Bytef*>(unzipped_data), - reinterpret_cast<uLongf*>(&unzipped_data_size), - reinterpret_cast<Bytef*>(zipped_data), - static_cast<uLong> (bgen_P)); - assert(result == Z_OK); - - } - else - { - - bgen_P=6*bgen_N; - infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P); - } - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i<bgen_N; ++i) { - if (indicator_idv[i]==0) {continue;} - - bgen_geno_prob_AA= - static_cast<double>(unzipped_data[i*3])/32768.0; - bgen_geno_prob_AB= - static_cast<double>(unzipped_data[i*3+1])/32768.0; - bgen_geno_prob_BB= - static_cast<double>(unzipped_data[i*3+2])/32768.0; - - // WJA. - bgen_geno_prob_non_miss = bgen_geno_prob_AA + - bgen_geno_prob_AB+bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss<0.9) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } - else { - - bgen_geno_prob_AA/=bgen_geno_prob_non_miss; - bgen_geno_prob_AB/=bgen_geno_prob_non_miss; - bgen_geno_prob_BB/=bgen_geno_prob_non_miss; - - geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - - x_mean/=static_cast<double>(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) { - gsl_vector_set(x, i, x_mean); - } - geno=gsl_vector_get(x, i); - } - - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize); - gsl_vector_memcpy (&Xlarge_col.vector, x); - c++; - - if (c%msize==0 || c==t_last ) { - size_t l=0; - if (c%msize==0) {l=msize;} else {l=c%msize;} - - gsl_matrix_view Xlarge_sub= - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub= - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, - 0.0, &UtXlarge_sub.matrix); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_matrix_set_zero (Xlarge); - - for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col= - gsl_matrix_column (UtXlarge, i); - gsl_vector_memcpy (Utx, &UtXlarge_col.vector); - - CalcUab(UtW, Uty, Utx, Uab); - - time_start=clock(); - FUNC_PARAM param1={false,ni_test,n_cvt,eval,Uab,ab,0}; - - // 3 is before 1. - if (a_mode==3 || a_mode==4) { - CalcRLScore (l_mle_null, param1, beta, se, p_score); - } - - if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, - lambda_remle, logl_H1); - CalcRLWald (lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, - lambda_mle, logl_H1); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1); - } - - time_opt+=(clock()-time_start)/ - (double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - } - } - cout<<endl; - - gsl_vector_free (x); - gsl_vector_free (x_miss); - gsl_vector_free (Utx); - gsl_matrix_free (Uab); - gsl_vector_free (ab); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); - - infile.close(); - infile.clear(); - - return; +void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y) { + string file_bgen = file_oxford + ".bgen"; + ifstream infile(file_bgen.c_str(), ios::binary); + if (!infile) { + cout << "error reading bgen file:" << file_bgen << endl; + return; + } + + clock_t time_start = clock(); + double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; + double p_lrt = 0, p_score = 0; + double logl_H1 = 0.0; + int n_miss, c_phen; + double geno, x_mean; + + // Calculate basic quantities. + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + gsl_vector *x = gsl_vector_alloc(U->size1); + gsl_vector *x_miss = gsl_vector_alloc(U->size1); + gsl_vector *Utx = gsl_vector_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix_set_zero(Xlarge); + + gsl_matrix_set_zero(Uab); + CalcUab(UtW, Uty, Uab); + + // Read in header. + uint32_t bgen_snp_block_offset; + uint32_t bgen_header_length; + uint32_t bgen_nsamples; + uint32_t bgen_nsnps; + uint32_t bgen_flags; + infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); + infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); + bgen_snp_block_offset -= 4; + infile.ignore(4 + bgen_header_length - 20); + bgen_snp_block_offset -= 4 + bgen_header_length - 20; + infile.read(reinterpret_cast<char *>(&bgen_flags), 4); + bgen_snp_block_offset -= 4; + bool CompressedSNPBlocks = bgen_flags & 0x1; + + infile.ignore(bgen_snp_block_offset); + + double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB; + double bgen_geno_prob_non_miss; + + uint32_t bgen_N; + uint16_t bgen_LS; + uint16_t bgen_LR; + uint16_t bgen_LC; + uint32_t bgen_SNP_pos; + uint32_t bgen_LA; + std::string bgen_A_allele; + uint32_t bgen_LB; + std::string bgen_B_allele; + uint32_t bgen_P; + size_t unzipped_data_size; + string id; + string rs; + string chr; + std::cout << "Warning: WJA hard coded SNP missingness " + << "threshold of 10%" << std::endl; + + // Start reading genotypes and analyze. + size_t c = 0, t_last = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (indicator_snp[t] == 0) { + continue; + } + t_last++; + } + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // Read SNP header. + id.clear(); + rs.clear(); + chr.clear(); + bgen_A_allele.clear(); + bgen_B_allele.clear(); + + infile.read(reinterpret_cast<char *>(&bgen_N), 4); + infile.read(reinterpret_cast<char *>(&bgen_LS), 2); + + id.resize(bgen_LS); + infile.read(&id[0], bgen_LS); + + infile.read(reinterpret_cast<char *>(&bgen_LR), 2); + rs.resize(bgen_LR); + infile.read(&rs[0], bgen_LR); + + infile.read(reinterpret_cast<char *>(&bgen_LC), 2); + chr.resize(bgen_LC); + infile.read(&chr[0], bgen_LC); + + infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); + + infile.read(reinterpret_cast<char *>(&bgen_LA), 4); + bgen_A_allele.resize(bgen_LA); + infile.read(&bgen_A_allele[0], bgen_LA); + + infile.read(reinterpret_cast<char *>(&bgen_LB), 4); + bgen_B_allele.resize(bgen_LB); + infile.read(&bgen_B_allele[0], bgen_LB); + + uint16_t unzipped_data[3 * bgen_N]; + + if (indicator_snp[t] == 0) { + if (CompressedSNPBlocks) + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + else + bgen_P = 6 * bgen_N; + + infile.ignore(static_cast<size_t>(bgen_P)); + + continue; + } + + if (CompressedSNPBlocks) { + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + uint8_t zipped_data[bgen_P]; + + unzipped_data_size = 6 * bgen_N; + + infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); + + int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), + reinterpret_cast<uLongf *>(&unzipped_data_size), + reinterpret_cast<Bytef *>(zipped_data), + static_cast<uLong>(bgen_P)); + assert(result == Z_OK); + + } else { + + bgen_P = 6 * bgen_N; + infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); + } + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + gsl_vector_set_zero(x_miss); + for (size_t i = 0; i < bgen_N; ++i) { + if (indicator_idv[i] == 0) { + continue; + } + + bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; + bgen_geno_prob_AB = + static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; + bgen_geno_prob_BB = + static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; + + // WJA. + bgen_geno_prob_non_miss = + bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; + if (bgen_geno_prob_non_miss < 0.9) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + + bgen_geno_prob_AA /= bgen_geno_prob_non_miss; + bgen_geno_prob_AB /= bgen_geno_prob_non_miss; + bgen_geno_prob_BB /= bgen_geno_prob_non_miss; + + geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + + x_mean /= static_cast<double>(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean); + } + geno = gsl_vector_get(x, i); + } + + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); + gsl_vector_memcpy(&Xlarge_col.vector, x); + c++; + + if (c % msize == 0 || c == t_last) { + size_t l = 0; + if (c % msize == 0) { + l = msize; + } else { + l = c % msize; + } + + gsl_matrix_view Xlarge_sub = + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub = + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + + time_start = clock(); + eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, + &UtXlarge_sub.matrix); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Xlarge); + + for (size_t i = 0; i < l; i++) { + gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); + gsl_vector_memcpy(Utx, &UtXlarge_col.vector); + + CalcUab(UtW, Uty, Utx, Uab); + + time_start = clock(); + FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + CalcRLScore(l_mle_null, param1, beta, se, p_score); + } + + if (a_mode == 1 || a_mode == 4) { + CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, + logl_H1); + CalcRLWald(lambda_remle, param1, beta, se, p_wald); + } + + if (a_mode == 2 || a_mode == 4) { + CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, + p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + } + } + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(x_miss); + gsl_vector_free(Utx); + gsl_matrix_free(Uab); + gsl_vector_free(ab); + + gsl_matrix_free(Xlarge); + gsl_matrix_free(UtXlarge); + + infile.close(); + infile.clear(); + + return; } -void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - const double l_min, const double l_max, - const size_t n_region, - vector<pair<size_t, double> > &pos_loglr) { - double logl_H0, logl_H1, log_lr, lambda0, lambda1; +void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const double l_min, const double l_max, const size_t n_region, + vector<pair<size_t, double>> &pos_loglr) { + double logl_H0, logl_H1, log_lr, lambda0, lambda1; - gsl_vector *w=gsl_vector_alloc (Uty->size); - gsl_matrix *Utw=gsl_matrix_alloc (Uty->size, 1); - gsl_matrix *Uab=gsl_matrix_alloc (Uty->size, 6); - gsl_vector *ab=gsl_vector_alloc (6); + gsl_vector *w = gsl_vector_alloc(Uty->size); + gsl_matrix *Utw = gsl_matrix_alloc(Uty->size, 1); + gsl_matrix *Uab = gsl_matrix_alloc(Uty->size, 6); + gsl_vector *ab = gsl_vector_alloc(6); - gsl_vector_set_zero(ab); - gsl_vector_set_all (w, 1.0); - gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0); - gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector); + gsl_vector_set_zero(ab); + gsl_vector_set_all(w, 1.0); + gsl_vector_view Utw_col = gsl_matrix_column(Utw, 0); + gsl_blas_dgemv(CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector); - CalcUab (Utw, Uty, Uab); - FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0}; + CalcUab(Utw, Uty, Uab); + FUNC_PARAM param0 = {true, Uty->size, 1, K_eval, Uab, ab, 0}; - CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0); + CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0); - for (size_t i=0; i<UtX->size2; ++i) { - gsl_vector_const_view UtX_col=gsl_matrix_const_column (UtX, i); - CalcUab(Utw, Uty, &UtX_col.vector, Uab); - FUNC_PARAM param1={false, UtX->size1, 1, K_eval, Uab, ab, 0}; + for (size_t i = 0; i < UtX->size2; ++i) { + gsl_vector_const_view UtX_col = gsl_matrix_const_column(UtX, i); + CalcUab(Utw, Uty, &UtX_col.vector, Uab); + FUNC_PARAM param1 = {false, UtX->size1, 1, K_eval, Uab, ab, 0}; - CalcLambda ('L', param1, l_min, l_max, n_region, lambda1, - logl_H1); - log_lr=logl_H1-logl_H0; + CalcLambda('L', param1, l_min, l_max, n_region, lambda1, logl_H1); + log_lr = logl_H1 - logl_H0; - pos_loglr.push_back(make_pair(i,log_lr) ); - } + pos_loglr.push_back(make_pair(i, log_lr)); + } - gsl_vector_free (w); - gsl_matrix_free (Utw); - gsl_matrix_free (Uab); - gsl_vector_free (ab); + gsl_vector_free(w); + gsl_matrix_free(Utw); + gsl_matrix_free(Uab); + gsl_vector_free(ab); - return; + return; } -void CalcLambda (const char func_name, FUNC_PARAM ¶ms, - const double l_min, const double l_max, - const size_t n_region, double &lambda, double &logf) { - if (func_name!='R' && func_name!='L' && func_name!='r' && - func_name!='l') { - cout << "func_name only takes 'R' or 'L': 'R' for " << - "log-restricted likelihood, 'L' for log-likelihood." << endl; - return; - } - - vector<pair<double, double> > lambda_lh; - - // Evaluate first-order derivates in different intervals. - double lambda_l, lambda_h, lambda_interval= - log(l_max/l_min)/(double)n_region; - double dev1_l, dev1_h, logf_l, logf_h; - - for (size_t i=0; i<n_region; ++i) { - lambda_l=l_min*exp(lambda_interval*i); - lambda_h=l_min*exp(lambda_interval*(i+1.0)); - - if (func_name=='R' || func_name=='r') { - dev1_l=LogRL_dev1 (lambda_l, ¶ms); - dev1_h=LogRL_dev1 (lambda_h, ¶ms); - } - else { - dev1_l=LogL_dev1 (lambda_l, ¶ms); - dev1_h=LogL_dev1 (lambda_h, ¶ms); - } - - if (dev1_l*dev1_h<=0) { - lambda_lh.push_back(make_pair(lambda_l, lambda_h)); - } - } - - // If derivates do not change signs in any interval. - if (lambda_lh.empty()) { - if (func_name=='R' || func_name=='r') { - logf_l=LogRL_f (l_min, ¶ms); - logf_h=LogRL_f (l_max, ¶ms); - } - else { - logf_l=LogL_f (l_min, ¶ms); - logf_h=LogL_f (l_max, ¶ms); - } - - if (logf_l>=logf_h) { - lambda=l_min; - logf=logf_l; - } else { - lambda=l_max; - logf=logf_h; - } - } - else { - - // If derivates change signs. - int status; - int iter=0, max_iter=100; - double l, l_temp; - - gsl_function F; - gsl_function_fdf FDF; - - F.params=¶ms; - FDF.params=¶ms; - - if (func_name=='R' || func_name=='r') { - F.function=&LogRL_dev1; - FDF.f=&LogRL_dev1; - FDF.df=&LogRL_dev2; - FDF.fdf=&LogRL_dev12; - } - else { - F.function=&LogL_dev1; - FDF.f=&LogL_dev1; - FDF.df=&LogL_dev2; - FDF.fdf=&LogL_dev12; - } - - const gsl_root_fsolver_type *T_f; - gsl_root_fsolver *s_f; - T_f=gsl_root_fsolver_brent; - s_f=gsl_root_fsolver_alloc (T_f); - - const gsl_root_fdfsolver_type *T_fdf; - gsl_root_fdfsolver *s_fdf; - T_fdf=gsl_root_fdfsolver_newton; - s_fdf=gsl_root_fdfsolver_alloc(T_fdf); - - for (vector<double>::size_type i=0; i<lambda_lh.size(); ++i) { - lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second; - gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h); - - do { - iter++; - status=gsl_root_fsolver_iterate (s_f); - l=gsl_root_fsolver_root (s_f); - lambda_l=gsl_root_fsolver_x_lower (s_f); - lambda_h=gsl_root_fsolver_x_upper (s_f); - status=gsl_root_test_interval(lambda_l,lambda_h,0,1e-1); - } - while (status==GSL_CONTINUE && iter<max_iter); - - iter=0; - - gsl_root_fdfsolver_set (s_fdf, &FDF, l); - - do { - iter++; - status=gsl_root_fdfsolver_iterate (s_fdf); - l_temp=l; - l=gsl_root_fdfsolver_root (s_fdf); - status=gsl_root_test_delta (l, l_temp, 0, 1e-5); - } - while (status==GSL_CONTINUE && - iter<max_iter && - l>l_min && l<l_max); - - l=l_temp; - if (l<l_min) {l=l_min;} - if (l>l_max) {l=l_max;} - if (func_name=='R' || func_name=='r') { - logf_l=LogRL_f (l, ¶ms); - } else { - logf_l=LogL_f (l, ¶ms); - } - - if (i==0) {logf=logf_l; lambda=l;} - else if (logf<logf_l) {logf=logf_l; lambda=l;} - else {} - } - gsl_root_fsolver_free (s_f); - gsl_root_fdfsolver_free (s_fdf); - - if (func_name=='R' || func_name=='r') { - logf_l=LogRL_f (l_min, ¶ms); - logf_h=LogRL_f (l_max, ¶ms); - } - else { - logf_l=LogL_f (l_min, ¶ms); - logf_h=LogL_f (l_max, ¶ms); - } - - if (logf_l>logf) {lambda=l_min; logf=logf_l;} - if (logf_h>logf) {lambda=l_max; logf=logf_h;} - } - - return; +void CalcLambda(const char func_name, FUNC_PARAM ¶ms, const double l_min, + const double l_max, const size_t n_region, double &lambda, + double &logf) { + if (func_name != 'R' && func_name != 'L' && func_name != 'r' && + func_name != 'l') { + cout << "func_name only takes 'R' or 'L': 'R' for " + << "log-restricted likelihood, 'L' for log-likelihood." << endl; + return; + } + + vector<pair<double, double>> lambda_lh; + + // Evaluate first-order derivates in different intervals. + double lambda_l, lambda_h, + lambda_interval = log(l_max / l_min) / (double)n_region; + double dev1_l, dev1_h, logf_l, logf_h; + + for (size_t i = 0; i < n_region; ++i) { + lambda_l = l_min * exp(lambda_interval * i); + lambda_h = l_min * exp(lambda_interval * (i + 1.0)); + + if (func_name == 'R' || func_name == 'r') { + dev1_l = LogRL_dev1(lambda_l, ¶ms); + dev1_h = LogRL_dev1(lambda_h, ¶ms); + } else { + dev1_l = LogL_dev1(lambda_l, ¶ms); + dev1_h = LogL_dev1(lambda_h, ¶ms); + } + + if (dev1_l * dev1_h <= 0) { + lambda_lh.push_back(make_pair(lambda_l, lambda_h)); + } + } + + // If derivates do not change signs in any interval. + if (lambda_lh.empty()) { + if (func_name == 'R' || func_name == 'r') { + logf_l = LogRL_f(l_min, ¶ms); + logf_h = LogRL_f(l_max, ¶ms); + } else { + logf_l = LogL_f(l_min, ¶ms); + logf_h = LogL_f(l_max, ¶ms); + } + + if (logf_l >= logf_h) { + lambda = l_min; + logf = logf_l; + } else { + lambda = l_max; + logf = logf_h; + } + } else { + + // If derivates change signs. + int status; + int iter = 0, max_iter = 100; + double l, l_temp; + + gsl_function F; + gsl_function_fdf FDF; + + F.params = ¶ms; + FDF.params = ¶ms; + + if (func_name == 'R' || func_name == 'r') { + F.function = &LogRL_dev1; + FDF.f = &LogRL_dev1; + FDF.df = &LogRL_dev2; + FDF.fdf = &LogRL_dev12; + } else { + F.function = &LogL_dev1; + FDF.f = &LogL_dev1; + FDF.df = &LogL_dev2; + FDF.fdf = &LogL_dev12; + } + + const gsl_root_fsolver_type *T_f; + gsl_root_fsolver *s_f; + T_f = gsl_root_fsolver_brent; + s_f = gsl_root_fsolver_alloc(T_f); + + const gsl_root_fdfsolver_type *T_fdf; + gsl_root_fdfsolver *s_fdf; + T_fdf = gsl_root_fdfsolver_newton; + s_fdf = gsl_root_fdfsolver_alloc(T_fdf); + + for (vector<double>::size_type i = 0; i < lambda_lh.size(); ++i) { + lambda_l = lambda_lh[i].first; + lambda_h = lambda_lh[i].second; + gsl_root_fsolver_set(s_f, &F, lambda_l, lambda_h); + + do { + iter++; + status = gsl_root_fsolver_iterate(s_f); + l = gsl_root_fsolver_root(s_f); + lambda_l = gsl_root_fsolver_x_lower(s_f); + lambda_h = gsl_root_fsolver_x_upper(s_f); + status = gsl_root_test_interval(lambda_l, lambda_h, 0, 1e-1); + } while (status == GSL_CONTINUE && iter < max_iter); + + iter = 0; + + gsl_root_fdfsolver_set(s_fdf, &FDF, l); + + do { + iter++; + status = gsl_root_fdfsolver_iterate(s_fdf); + l_temp = l; + l = gsl_root_fdfsolver_root(s_fdf); + status = gsl_root_test_delta(l, l_temp, 0, 1e-5); + } while (status == GSL_CONTINUE && iter < max_iter && l > l_min && + l < l_max); + + l = l_temp; + if (l < l_min) { + l = l_min; + } + if (l > l_max) { + l = l_max; + } + if (func_name == 'R' || func_name == 'r') { + logf_l = LogRL_f(l, ¶ms); + } else { + logf_l = LogL_f(l, ¶ms); + } + + if (i == 0) { + logf = logf_l; + lambda = l; + } else if (logf < logf_l) { + logf = logf_l; + lambda = l; + } else { + } + } + gsl_root_fsolver_free(s_f); + gsl_root_fdfsolver_free(s_fdf); + + if (func_name == 'R' || func_name == 'r') { + logf_l = LogRL_f(l_min, ¶ms); + logf_h = LogRL_f(l_max, ¶ms); + } else { + logf_l = LogL_f(l_min, ¶ms); + logf_h = LogL_f(l_max, ¶ms); + } + + if (logf_l > logf) { + lambda = l_min; + logf = logf_l; + } + if (logf_h > logf) { + lambda = l_max; + logf = logf_h; + } + } + + return; } // Calculate lambda in the null model. -void CalcLambda (const char func_name, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const double l_min, const double l_max, - const size_t n_region, double &lambda, double &logl_H0) { - if (func_name!='R' && func_name!='L' && func_name!='r' && - func_name!='l') { - cout<<"func_name only takes 'R' or 'L': 'R' for " << - "log-restricted likelihood, 'L' for log-likelihood." << endl; - return; - } +void CalcLambda(const char func_name, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const double l_min, const double l_max, const size_t n_region, + double &lambda, double &logl_H0) { + if (func_name != 'R' && func_name != 'L' && func_name != 'r' && + func_name != 'l') { + cout << "func_name only takes 'R' or 'L': 'R' for " + << "log-restricted likelihood, 'L' for log-likelihood." << endl; + return; + } - size_t n_cvt=UtW->size2, ni_test=UtW->size1; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; + size_t n_cvt = UtW->size2, ni_test = UtW->size1; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); + gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); - gsl_matrix_set_zero (Uab); - CalcUab (UtW, Uty, Uab); + gsl_matrix_set_zero(Uab); + CalcUab(UtW, Uty, Uab); - FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0}; + FUNC_PARAM param0 = {true, ni_test, n_cvt, eval, Uab, ab, 0}; - CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0); + CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0); - gsl_matrix_free(Uab); - gsl_vector_free(ab); + gsl_matrix_free(Uab); + gsl_vector_free(ab); - return; + return; } // Obtain REMLE estimate for PVE using lambda_remle. -void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, - const gsl_vector *Uty, const double lambda, - const double trace_G, double &pve, double &pve_se) { - size_t n_cvt=UtW->size2, ni_test=UtW->size1; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; +void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_vector *Uty, const double lambda, const double trace_G, + double &pve, double &pve_se) { + size_t n_cvt = UtW->size2, ni_test = UtW->size1; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); + gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); - gsl_matrix_set_zero (Uab); - CalcUab (UtW, Uty, Uab); + gsl_matrix_set_zero(Uab); + CalcUab(UtW, Uty, Uab); - FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0}; + FUNC_PARAM param0 = {true, ni_test, n_cvt, eval, Uab, ab, 0}; - double se=sqrt(-1.0/LogRL_dev2 (lambda, ¶m0)); + double se = sqrt(-1.0 / LogRL_dev2(lambda, ¶m0)); - pve=trace_G*lambda/(trace_G*lambda+1.0); - pve_se=trace_G/((trace_G*lambda+1.0)*(trace_G*lambda+1.0))*se; + pve = trace_G * lambda / (trace_G * lambda + 1.0); + pve_se = trace_G / ((trace_G * lambda + 1.0) * (trace_G * lambda + 1.0)) * se; - gsl_matrix_free (Uab); - gsl_vector_free (ab); - return; + gsl_matrix_free(Uab); + gsl_vector_free(ab); + return; } // Obtain REML estimate for Vg and Ve using lambda_remle. // Obtain beta and se(beta) for coefficients. // ab is not used when e_mode==0. -void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, - const gsl_vector *Uty, const double lambda, - double &vg, double &ve, gsl_vector *beta, - gsl_vector *se_beta) { - size_t n_cvt=UtW->size2, ni_test=UtW->size1; - size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2; - - gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); - gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index); - gsl_vector *Hi_eval=gsl_vector_alloc(eval->size); - gsl_vector *v_temp=gsl_vector_alloc(eval->size); - gsl_matrix *HiW=gsl_matrix_alloc(eval->size, UtW->size2); - gsl_matrix *WHiW=gsl_matrix_alloc(UtW->size2, UtW->size2); - gsl_vector *WHiy=gsl_vector_alloc(UtW->size2); - gsl_matrix *Vbeta=gsl_matrix_alloc(UtW->size2, UtW->size2); - - gsl_matrix_set_zero (Uab); - CalcUab (UtW, Uty, Uab); - - gsl_vector_memcpy (v_temp, eval); - gsl_vector_scale (v_temp, lambda); - gsl_vector_set_all (Hi_eval, 1.0); - gsl_vector_add_constant (v_temp, 1.0); - gsl_vector_div (Hi_eval, v_temp); - - // Calculate beta. - gsl_matrix_memcpy (HiW, UtW); - for (size_t i=0; i<UtW->size2; i++) { - gsl_vector_view HiW_col=gsl_matrix_column(HiW, i); - gsl_vector_mul(&HiW_col.vector, Hi_eval); - } - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW); - gsl_blas_dgemv (CblasTrans, 1.0, HiW, Uty, 0.0, WHiy); - - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (UtW->size2); - LUDecomp (WHiW, pmt, &sig); - LUSolve (WHiW, pmt, WHiy, beta); - LUInvert (WHiW, pmt, Vbeta); - - // Calculate vg and ve. - CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab); - - size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt); - double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy); - - ve=P_yy/(double)(ni_test-n_cvt); - vg=ve*lambda; - - // With ve, calculate se(beta). - gsl_matrix_scale(Vbeta, ve); - - // Obtain se_beta. - for (size_t i=0; i<Vbeta->size1; i++) { - gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta,i,i))); - } - - gsl_matrix_free(Uab); - gsl_matrix_free(Pab); - gsl_vector_free(ab); - gsl_vector_free(Hi_eval); - gsl_vector_free(v_temp); - gsl_matrix_free(HiW); - gsl_matrix_free(WHiW); - gsl_vector_free(WHiy); - gsl_matrix_free(Vbeta); - - gsl_permutation_free(pmt); - return; +void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_vector *Uty, const double lambda, double &vg, + double &ve, gsl_vector *beta, gsl_vector *se_beta) { + size_t n_cvt = UtW->size2, ni_test = UtW->size1; + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; + + gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); + gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index); + gsl_vector *Hi_eval = gsl_vector_alloc(eval->size); + gsl_vector *v_temp = gsl_vector_alloc(eval->size); + gsl_matrix *HiW = gsl_matrix_alloc(eval->size, UtW->size2); + gsl_matrix *WHiW = gsl_matrix_alloc(UtW->size2, UtW->size2); + gsl_vector *WHiy = gsl_vector_alloc(UtW->size2); + gsl_matrix *Vbeta = gsl_matrix_alloc(UtW->size2, UtW->size2); + + gsl_matrix_set_zero(Uab); + CalcUab(UtW, Uty, Uab); + + gsl_vector_memcpy(v_temp, eval); + gsl_vector_scale(v_temp, lambda); + gsl_vector_set_all(Hi_eval, 1.0); + gsl_vector_add_constant(v_temp, 1.0); + gsl_vector_div(Hi_eval, v_temp); + + // Calculate beta. + gsl_matrix_memcpy(HiW, UtW); + for (size_t i = 0; i < UtW->size2; i++) { + gsl_vector_view HiW_col = gsl_matrix_column(HiW, i); + gsl_vector_mul(&HiW_col.vector, Hi_eval); + } + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW); + gsl_blas_dgemv(CblasTrans, 1.0, HiW, Uty, 0.0, WHiy); + + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(UtW->size2); + LUDecomp(WHiW, pmt, &sig); + LUSolve(WHiW, pmt, WHiy, beta); + LUInvert(WHiW, pmt, Vbeta); + + // Calculate vg and ve. + CalcPab(n_cvt, 0, Hi_eval, Uab, ab, Pab); + + size_t index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); + double P_yy = gsl_matrix_get(Pab, n_cvt, index_yy); + + ve = P_yy / (double)(ni_test - n_cvt); + vg = ve * lambda; + + // With ve, calculate se(beta). + gsl_matrix_scale(Vbeta, ve); + + // Obtain se_beta. + for (size_t i = 0; i < Vbeta->size1; i++) { + gsl_vector_set(se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i))); + } + + gsl_matrix_free(Uab); + gsl_matrix_free(Pab); + gsl_vector_free(ab); + gsl_vector_free(Hi_eval); + gsl_vector_free(v_temp); + gsl_matrix_free(HiW); + gsl_matrix_free(WHiW); + gsl_vector_free(WHiy); + gsl_matrix_free(Vbeta); + + gsl_permutation_free(pmt); + return; } -void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y, - const gsl_vector *env) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return; - } - - clock_t time_start=clock(); - - string line; - char *ch_ptr; - - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; - double p_lrt=0, p_score=0; - double logl_H1=0.0, logl_H0=0.0; - int n_miss, c_phen; - double geno, x_mean; - - // Calculate basic quantities. - size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2; - - gsl_vector *x=gsl_vector_alloc (U->size1); - gsl_vector *x_miss=gsl_vector_alloc (U->size1); - gsl_vector *Utx=gsl_vector_alloc (U->size2); - gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); - - gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2); - gsl_matrix_view UtW_expand_mat= - gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); - gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW); - gsl_vector_view UtW_expand_env= - gsl_matrix_column(UtW_expand, UtW->size2); - gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); - gsl_vector_view UtW_expand_x= - gsl_matrix_column(UtW_expand, UtW->size2+1); - - // Start reading genotypes and analyze. - for (size_t t=0; t<indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i<ni_total; ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} - - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } - else { - geno=atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) { - gsl_vector_set(x, i, x_mean); - } - geno=gsl_vector_get(x, i); - if (x_mean>1) { - gsl_vector_set(x, i, 2-geno); - } - } - - // Calculate statistics. - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, - &UtW_expand_x.vector); - gsl_vector_mul (x, env); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_matrix_set_zero (Uab); - CalcUab (UtW_expand, Uty, Uab); - - if (a_mode==2 || a_mode==4) { - FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0}; - CalcLambda ('L', param0, l_min, l_max, n_region, - lambda_mle, logl_H0); - } - - CalcUab(UtW_expand, Uty, Utx, Uab); - - time_start=clock(); - FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0}; - - // 3 is before 1. - if (a_mode==3 || a_mode==4) { - CalcRLScore (l_mle_null, param1, beta, se, p_score); - } - - if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, - lambda_remle, logl_H1); - CalcRLWald (lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, - lambda_mle, logl_H1); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); - } - - if (x_mean>1) {beta*=-1;} - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout<<endl; - - gsl_vector_free (x); - gsl_vector_free (x_miss); - gsl_vector_free (Utx); - gsl_matrix_free (Uab); - gsl_vector_free (ab); - - gsl_matrix_free (UtW_expand); - - infile.close(); - infile.clear(); - - return; +void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const gsl_vector *env) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return; + } + + clock_t time_start = clock(); + + string line; + char *ch_ptr; + + double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; + double p_lrt = 0, p_score = 0; + double logl_H1 = 0.0, logl_H0 = 0.0; + int n_miss, c_phen; + double geno, x_mean; + + // Calculate basic quantities. + size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2; + + gsl_vector *x = gsl_vector_alloc(U->size1); + gsl_vector *x_miss = gsl_vector_alloc(U->size1); + gsl_vector *Utx = gsl_vector_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); + + gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2); + gsl_matrix_view UtW_expand_mat = + gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); + gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW); + gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2); + gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); + gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1); + + // Start reading genotypes and analyze. + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + gsl_vector_set_zero(x_miss); + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } + + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + geno = atof(ch_ptr); + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean); + } + geno = gsl_vector_get(x, i); + if (x_mean > 1) { + gsl_vector_set(x, i, 2 - geno); + } + } + + // Calculate statistics. + time_start = clock(); + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector); + gsl_vector_mul(x, env); + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Uab); + CalcUab(UtW_expand, Uty, Uab); + + if (a_mode == 2 || a_mode == 4) { + FUNC_PARAM param0 = {true, ni_test, n_cvt + 2, eval, Uab, ab, 0}; + CalcLambda('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0); + } + + CalcUab(UtW_expand, Uty, Utx, Uab); + + time_start = clock(); + FUNC_PARAM param1 = {false, ni_test, n_cvt + 2, eval, Uab, ab, 0}; + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + CalcRLScore(l_mle_null, param1, beta, se, p_score); + } + + if (a_mode == 1 || a_mode == 4) { + CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); + CalcRLWald(lambda_remle, param1, beta, se, p_wald); + } + + if (a_mode == 2 || a_mode == 4) { + CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1); + } + + if (x_mean > 1) { + beta *= -1; + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(x_miss); + gsl_vector_free(Utx); + gsl_matrix_free(Uab); + gsl_vector_free(ab); + + gsl_matrix_free(UtW_expand); + + infile.close(); + infile.clear(); + + return; } -void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y, - const gsl_vector *env) { - string file_bed=file_bfile+".bed"; - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;} - - clock_t time_start=clock(); - - char ch[1]; - bitset<8> b; - - double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0; - double p_lrt=0, p_score=0; - double logl_H1=0.0, logl_H0=0.0; - int n_bit, n_miss, ci_total, ci_test; - double geno, x_mean; - - // Calculate basic quantities. - size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2; - - gsl_vector *x=gsl_vector_alloc (U->size1); - gsl_vector *Utx=gsl_vector_alloc (U->size2); - gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index); - gsl_vector *ab=gsl_vector_alloc (n_index); - - gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2); - gsl_matrix_view UtW_expand_mat= - gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); - gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW); - gsl_vector_view UtW_expand_env= - gsl_matrix_column(UtW_expand, UtW->size2); - gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); - gsl_vector_view UtW_expand_x= - gsl_matrix_column(UtW_expand, UtW->size2+1); - - // Calculate n_bit and c, the number of bit for each SNP. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) { - ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers - infile.seekg(t*n_bit+3); - - // Read genotypes. - x_mean=0.0; n_miss=0; ci_total=0; ci_test=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && ci_total==(int)ni_total) { - break; - } - if (indicator_idv[ci_total]==0) { - ci_total++; - continue; - } - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(x, ci_test, 2); - x_mean+=2.0; - } - else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } - } - else { - if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } - else {gsl_vector_set(x, ci_test, -9); n_miss++; } - } - - ci_total++; - ci_test++; - } - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - geno=gsl_vector_get(x,i); - if (geno==-9) { - gsl_vector_set(x, i, x_mean); - geno=x_mean; - } - if (x_mean>1) { - gsl_vector_set(x, i, 2-geno); - } - } - - // Calculate statistics. - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, - &UtW_expand_x.vector); - gsl_vector_mul (x, env); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_matrix_set_zero (Uab); - CalcUab (UtW_expand, Uty, Uab); - - if (a_mode==2 || a_mode==4) { - FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0}; - CalcLambda ('L', param0, l_min, l_max, n_region, - lambda_mle, logl_H0); - } - - CalcUab(UtW_expand, Uty, Utx, Uab); - - time_start=clock(); - FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0}; - - // 3 is before 1, for beta. - if (a_mode==3 || a_mode==4) { - CalcRLScore (l_mle_null, param1, beta, se, p_score); - } - - if (a_mode==1 || a_mode==4) { - CalcLambda ('R', param1, l_min, l_max, n_region, - lambda_remle, logl_H1); - CalcRLWald (lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode==2 || a_mode==4) { - CalcLambda ('L', param1, l_min, l_max, n_region, - lambda_mle, logl_H1); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1); - } - - if (x_mean>1) {beta*=-1;} - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, - p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout<<endl; - - gsl_vector_free (x); - gsl_vector_free (Utx); - gsl_matrix_free (Uab); - gsl_vector_free (ab); - - gsl_matrix_free (UtW_expand); - - infile.close(); - infile.clear(); - - return; +void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const gsl_vector *env) { + string file_bed = file_bfile + ".bed"; + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return; + } + + clock_t time_start = clock(); + + char ch[1]; + bitset<8> b; + + double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; + double p_lrt = 0, p_score = 0; + double logl_H1 = 0.0, logl_H0 = 0.0; + int n_bit, n_miss, ci_total, ci_test; + double geno, x_mean; + + // Calculate basic quantities. + size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2; + + gsl_vector *x = gsl_vector_alloc(U->size1); + gsl_vector *Utx = gsl_vector_alloc(U->size2); + gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); + gsl_vector *ab = gsl_vector_alloc(n_index); + + gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2); + gsl_matrix_view UtW_expand_mat = + gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2); + gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW); + gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2); + gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); + gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1); + + // Calculate n_bit and c, the number of bit for each SNP. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { + if (t % d_pace == 0 || t == snpInfo.size() - 1) { + ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers + infile.seekg(t * n_bit + 3); + + // Read genotypes. + x_mean = 0.0; + n_miss = 0; + ci_total = 0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == (int)ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(x, ci_test, 2); + x_mean += 2.0; + } else { + gsl_vector_set(x, ci_test, 1); + x_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(x, ci_test, 0); + } else { + gsl_vector_set(x, ci_test, -9); + n_miss++; + } + } + + ci_total++; + ci_test++; + } + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + geno = gsl_vector_get(x, i); + if (geno == -9) { + gsl_vector_set(x, i, x_mean); + geno = x_mean; + } + if (x_mean > 1) { + gsl_vector_set(x, i, 2 - geno); + } + } + + // Calculate statistics. + time_start = clock(); + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector); + gsl_vector_mul(x, env); + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Uab); + CalcUab(UtW_expand, Uty, Uab); + + if (a_mode == 2 || a_mode == 4) { + FUNC_PARAM param0 = {true, ni_test, n_cvt + 2, eval, Uab, ab, 0}; + CalcLambda('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0); + } + + CalcUab(UtW_expand, Uty, Utx, Uab); + + time_start = clock(); + FUNC_PARAM param1 = {false, ni_test, n_cvt + 2, eval, Uab, ab, 0}; + + // 3 is before 1, for beta. + if (a_mode == 3 || a_mode == 4) { + CalcRLScore(l_mle_null, param1, beta, se, p_score); + } + + if (a_mode == 1 || a_mode == 4) { + CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1); + CalcRLWald(lambda_remle, param1, beta, se, p_wald); + } + + if (a_mode == 2 || a_mode == 4) { + CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), 1); + } + + if (x_mean > 1) { + beta *= -1; + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score}; + sumStat.push_back(SNPs); + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(Utx); + gsl_matrix_free(Uab); + gsl_vector_free(ab); + + gsl_matrix_free(UtW_expand); + + infile.close(); + infile.clear(); + + return; } @@ -19,120 +19,117 @@ #ifndef __LMM_H__ #define __LMM_H__ -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" -#include "param.h" +#include "gsl/gsl_vector.h" #include "io.h" +#include "param.h" using namespace std; class FUNC_PARAM { public: - bool calc_null; - size_t ni_test; - size_t n_cvt; - const gsl_vector *eval; - const gsl_matrix *Uab; - const gsl_vector *ab; - size_t e_mode; + bool calc_null; + size_t ni_test; + size_t n_cvt; + const gsl_vector *eval; + const gsl_matrix *Uab; + const gsl_vector *ab; + size_t e_mode; }; class LMM { public: - // IO-related parameters - int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests. - size_t d_pace; // Display pace. - - string file_bfile; - string file_geno; - string file_out; - string path_out; - - string file_gene; - // WJA added - string file_oxford; - - // LMM related parameters - double l_min; - double l_max; - size_t n_region; - double l_mle_null; - double logl_mle_H0; - - // Summary statistics - size_t ni_total, ni_test; // Number of individuals. - size_t ns_total, ns_test; // Number of SNPs. - size_t ng_total, ng_test; // Number of genes. - size_t n_cvt; - double time_UtX; // Time spent on optimization iterations. - double time_opt; // Time spent on optimization iterations. - - // Indicator for individuals (phenotypes): 0 missing, 1 - // available for analysis. - vector<int> indicator_idv; - - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; - - vector<SNPINFO> snpInfo; // Record SNP information. - - // Not included in PARAM. - vector<SUMSTAT> sumStat; // Output SNPSummary Data. - - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - void AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Utx, - const gsl_matrix *W, const gsl_vector *x); - void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y); - // WJA added. - void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y); - void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y); - void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y, - const gsl_vector *env); - void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y, - const gsl_vector *env); - void WriteFiles (); - - void CalcRLWald (const double &lambda, const FUNC_PARAM ¶ms, - double &beta, double &se, double &p_wald); - void CalcRLScore (const double &l, const FUNC_PARAM ¶ms, - double &beta, double &se, double &p_score); + // IO-related parameters + int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests. + size_t d_pace; // Display pace. + + string file_bfile; + string file_geno; + string file_out; + string path_out; + + string file_gene; + // WJA added + string file_oxford; + + // LMM related parameters + double l_min; + double l_max; + size_t n_region; + double l_mle_null; + double logl_mle_H0; + + // Summary statistics + size_t ni_total, ni_test; // Number of individuals. + size_t ns_total, ns_test; // Number of SNPs. + size_t ng_total, ng_test; // Number of genes. + size_t n_cvt; + double time_UtX; // Time spent on optimization iterations. + double time_opt; // Time spent on optimization iterations. + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + vector<SNPINFO> snpInfo; // Record SNP information. + + // Not included in PARAM. + vector<SUMSTAT> sumStat; // Output SNPSummary Data. + + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + void AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Utx, + const gsl_matrix *W, const gsl_vector *x); + void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y); + // WJA added. + void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y); + void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y); + void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const gsl_vector *env); + void AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const gsl_vector *env); + void WriteFiles(); + + void CalcRLWald(const double &lambda, const FUNC_PARAM ¶ms, double &beta, + double &se, double &p_wald); + void CalcRLScore(const double &l, const FUNC_PARAM ¶ms, double &beta, + double &se, double &p_score); }; -void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - const double l_min, const double l_max, - const size_t n_region, - vector<pair<size_t, double> > &pos_loglr); -void CalcLambda (const char func_name, FUNC_PARAM ¶ms, - const double l_min, const double l_max, - const size_t n_region, double &lambda, double &logf); -void CalcLambda (const char func_name, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const double l_min, const double l_max, - const size_t n_region, double &lambda, double &logl_H0); -void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, - const gsl_vector *Uty, const double lambda, - const double trace_G, double &pve, double &pve_se); -void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, - const gsl_vector *Uty, const double lambda, double &vg, - double &ve, gsl_vector *beta, gsl_vector *se_beta); +void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const double l_min, const double l_max, const size_t n_region, + vector<pair<size_t, double>> &pos_loglr); +void CalcLambda(const char func_name, FUNC_PARAM ¶ms, const double l_min, + const double l_max, const size_t n_region, double &lambda, + double &logf); +void CalcLambda(const char func_name, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const double l_min, const double l_max, const size_t n_region, + double &lambda, double &logl_H0); +void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_vector *Uty, const double lambda, const double trace_G, + double &pve, double &pve_se); +void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_vector *Uty, const double lambda, double &vg, + double &ve, gsl_vector *beta, gsl_vector *se_beta); #endif - - diff --git a/src/logistic.cpp b/src/logistic.cpp index f9edc68..2308de7 100644 --- a/src/logistic.cpp +++ b/src/logistic.cpp @@ -1,15 +1,15 @@ -#include <stdio.h> -#include <math.h> +#include "logistic.h" +#include <gsl/gsl_linalg.h> #include <gsl/gsl_matrix.h> -#include <gsl/gsl_rng.h> #include <gsl/gsl_multimin.h> +#include <gsl/gsl_rng.h> #include <gsl/gsl_sf.h> -#include <gsl/gsl_linalg.h> -#include "logistic.h" +#include <math.h> +#include <stdio.h> // I need to bundle all the data that goes to the function to optimze // together. -typedef struct{ +typedef struct { gsl_matrix_int *X; gsl_vector_int *nlev; gsl_vector *y; @@ -18,13 +18,9 @@ typedef struct{ double lambdaL2; } fix_parm_mixed_T; -double fLogit_mixed(gsl_vector *beta, - gsl_matrix_int *X, - gsl_vector_int *nlev, - gsl_matrix *Xc, - gsl_vector *y, - double lambdaL1, - double lambdaL2) { +double fLogit_mixed(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev, + gsl_matrix *Xc, gsl_vector *y, double lambdaL1, + double lambdaL2) { int n = y->size; int npar = beta->size; double total = 0; @@ -33,57 +29,56 @@ double fLogit_mixed(gsl_vector *beta, // Changed loop start at 1 instead of 0 to avoid regularization of // beta_0*\/ // #pragma omp parallel for reduction (+:total) - for(int i = 1; i < npar; ++i) - total += beta->data[i]*beta->data[i]; - total = (-total*lambdaL2/2); + for (int i = 1; i < npar; ++i) + total += beta->data[i] * beta->data[i]; + total = (-total * lambdaL2 / 2); // #pragma omp parallel for reduction (+:aux) - for(int i = 1; i < npar; ++i) - aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]); - total = total-aux*lambdaL1; + for (int i = 1; i < npar; ++i) + aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]); + total = total - aux * lambdaL1; // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) // #reduction (+:total) - for(int i = 0; i < n; ++i) { - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < X->size2; ++k) { - if(gsl_matrix_int_get(X,i,k)>0) - Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm]; - iParm+=nlev->data[k]-1; + for (int i = 0; i < n; ++i) { + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < X->size2; ++k) { + if (gsl_matrix_int_get(X, i, k) > 0) + Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm]; + iParm += nlev->data[k] - 1; } - for(int k = 0; k < (Xc->size2); ++k) - Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++]; - total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai)); + for (int k = 0; k < (Xc->size2); ++k) + Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++]; + total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai)); } return -total; } void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters - // length = 1 + Sum_k(C_k -1) - gsl_matrix_int *X, // Matrix Nobs x K - gsl_vector_int *nlev, // Vector with number categories - gsl_matrix *Xc, // Continuous covariates matrix: - // obs x Kc (NULL if not used). - gsl_vector *yhat){ // Vector of prob. predicted by - // the logistic - for(int i = 0; i < X->size1; ++i) { - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < X->size2; ++k) { - if(gsl_matrix_int_get(X,i,k)>0) - Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm]; - iParm+=nlev->data[k]-1; + // length = 1 + Sum_k(C_k -1) + gsl_matrix_int *X, // Matrix Nobs x K + gsl_vector_int *nlev, // Vector with number categories + gsl_matrix *Xc, // Continuous covariates matrix: + // obs x Kc (NULL if not used). + gsl_vector *yhat) { // Vector of prob. predicted by + // the logistic + for (int i = 0; i < X->size1; ++i) { + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < X->size2; ++k) { + if (gsl_matrix_int_get(X, i, k) > 0) + Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm]; + iParm += nlev->data[k] - 1; } // Adding the continuous. - for(int k = 0; k < (Xc->size2); ++k) - Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++]; - yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai)); + for (int k = 0; k < (Xc->size2); ++k) + Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++]; + yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai)); } } - // The gradient of f, df = (df/dx, df/dy). -void wgsl_mixed_optim_df (const gsl_vector *beta, void *params, - gsl_vector *out) { +void wgsl_mixed_optim_df(const gsl_vector *beta, void *params, + gsl_vector *out) { fix_parm_mixed_T *p = (fix_parm_mixed_T *)params; int n = p->y->size; int K = p->X->size2; @@ -91,50 +86,49 @@ void wgsl_mixed_optim_df (const gsl_vector *beta, void *params, int npar = beta->size; // Intitialize gradient out necessary? - for(int i = 0; i < npar; ++i) - out->data[i]= 0; + for (int i = 0; i < npar; ++i) + out->data[i] = 0; // Changed loop start at 1 instead of 0 to avoid regularization of beta 0. - for(int i = 1; i < npar; ++i) - out->data[i]= p->lambdaL2*beta->data[i]; - for(int i = 1; i < npar; ++i) - out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0)); - - for(int i = 0; i < n; ++i) { - double pn=0; - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < K; ++k) { - if(gsl_matrix_int_get(p->X,i,k)>0) - Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]; - iParm+=p->nlev->data[k]-1; + for (int i = 1; i < npar; ++i) + out->data[i] = p->lambdaL2 * beta->data[i]; + for (int i = 1; i < npar; ++i) + out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0)); + + for (int i = 0; i < n; ++i) { + double pn = 0; + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < K; ++k) { + if (gsl_matrix_int_get(p->X, i, k) > 0) + Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm]; + iParm += p->nlev->data[k] - 1; } // Adding the continuous. - for(int k = 0; k < Kc; ++k) - Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++]; + for (int k = 0; k < Kc; ++k) + Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm++]; - pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) ); + pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai))); - out->data[0]+= pn; - iParm=1; - for(int k = 0; k < K; ++k) { - if(gsl_matrix_int_get(p->X,i,k)>0) - out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn; - iParm+=p->nlev->data[k]-1; + out->data[0] += pn; + iParm = 1; + for (int k = 0; k < K; ++k) { + if (gsl_matrix_int_get(p->X, i, k) > 0) + out->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm] += pn; + iParm += p->nlev->data[k] - 1; } // Adding the continuous. - for(int k = 0; k < Kc; ++k) { - out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn; + for (int k = 0; k < Kc; ++k) { + out->data[iParm++] += gsl_matrix_get(p->Xc, i, k) * pn; } } - } // The Hessian of f. -void wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params, - gsl_matrix *out) { +void wgsl_mixed_optim_hessian(const gsl_vector *beta, void *params, + gsl_matrix *out) { fix_parm_mixed_T *p = (fix_parm_mixed_T *)params; int n = p->y->size; int K = p->X->size2; @@ -146,120 +140,121 @@ void wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params, gsl_matrix_set_zero(out); /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*/ - for(int i = 1; i < npar; ++i) - gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double-check this. + for (int i = 1; i < npar; ++i) + gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this. // L1 penalty not working yet, as not differentiable, I may need to // do coordinate descent (as in glm_net) - for(int i = 0; i < n; ++i) { - double pn=0; - double aux=0; - double Xbetai=beta->data[0]; - int iParm1=1; - for(int k = 0; k < K; ++k) { - if(gsl_matrix_int_get(p->X,i,k)>0) - Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1]; - iParm1+=p->nlev->data[k]-1; //-1? + for (int i = 0; i < n; ++i) { + double pn = 0; + double aux = 0; + double Xbetai = beta->data[0]; + int iParm1 = 1; + for (int k = 0; k < K; ++k) { + if (gsl_matrix_int_get(p->X, i, k) > 0) + Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1]; + iParm1 += p->nlev->data[k] - 1; //-1? } // Adding the continuous. - for(int k = 0; k < Kc; ++k) - Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++]; + for (int k = 0; k < Kc; ++k) + Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm1++]; - pn= 1/(1 + gsl_sf_exp(-Xbetai)); + pn = 1 / (1 + gsl_sf_exp(-Xbetai)); // Add a protection for pn very close to 0 or 1? - aux=pn*(1-pn); + aux = pn * (1 - pn); // Calculate sub-gradient vector gn. gsl_vector_set_zero(gn); - gn->data[0]= 1; - iParm1=1; - for(int k = 0; k < K; ++k) { - if(gsl_matrix_int_get(p->X,i,k)>0) - gn->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1]=1; - iParm1+=p->nlev->data[k]-1; + gn->data[0] = 1; + iParm1 = 1; + for (int k = 0; k < K; ++k) { + if (gsl_matrix_int_get(p->X, i, k) > 0) + gn->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1] = 1; + iParm1 += p->nlev->data[k] - 1; } // Adding the continuous. - for(int k = 0; k < Kc; ++k) { - gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k); + for (int k = 0; k < Kc; ++k) { + gn->data[iParm1++] = gsl_matrix_get(p->Xc, i, k); } - for(int k1=0;k1<npar; ++k1) - if(gn->data[k1]!=0) - for(int k2=0;k2<npar; ++k2) - if(gn->data[k2]!=0) - *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]); + for (int k1 = 0; k1 < npar; ++k1) + if (gn->data[k1] != 0) + for (int k2 = 0; k2 < npar; ++k2) + if (gn->data[k2] != 0) + *gsl_matrix_ptr(out, k1, k2) += (aux * gn->data[k1] * gn->data[k2]); } gsl_vector_free(gn); } double wgsl_mixed_optim_f(gsl_vector *v, void *params) { - double mLogLik=0; + double mLogLik = 0; fix_parm_mixed_T *p = (fix_parm_mixed_T *)params; - mLogLik = fLogit_mixed(v,p->X,p->nlev,p->Xc,p->y,p->lambdaL1,p->lambdaL2); + mLogLik = + fLogit_mixed(v, p->X, p->nlev, p->Xc, p->y, p->lambdaL1, p->lambdaL2); return mLogLik; } // Compute both f and df together. -void -wgsl_mixed_optim_fdf (gsl_vector *x, void *params, double *f, gsl_vector *df) { +void wgsl_mixed_optim_fdf(gsl_vector *x, void *params, double *f, + gsl_vector *df) { *f = wgsl_mixed_optim_f(x, params); wgsl_mixed_optim_df(x, params, df); } // Xc is the matrix of continuous covariates, Nobs x Kc (NULL if not used). int logistic_mixed_fit(gsl_vector *beta, gsl_matrix_int *X, - gsl_vector_int *nlev, gsl_matrix *Xc, - gsl_vector *y, double lambdaL1, double lambdaL2) { - double mLogLik=0; + gsl_vector_int *nlev, gsl_matrix *Xc, gsl_vector *y, + double lambdaL1, double lambdaL2) { + double mLogLik = 0; fix_parm_mixed_T p; int npar = beta->size; - int iter=0; - double maxchange=0; + int iter = 0; + double maxchange = 0; // Intializing fix parameters. - p.X=X; - p.Xc=Xc; - p.nlev=nlev; - p.y=y; - p.lambdaL1=lambdaL1; - p.lambdaL2=lambdaL2; + p.X = X; + p.Xc = Xc; + p.nlev = nlev; + p.y = y; + p.lambdaL1 = lambdaL1; + p.lambdaL2 = lambdaL2; // Initial fit. - mLogLik = wgsl_mixed_optim_f(beta,&p); + mLogLik = wgsl_mixed_optim_f(beta, &p); - gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix. - gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move. + gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix. + gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move. - gsl_vector *myG = gsl_vector_alloc(npar); // Gradient. - gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR. + gsl_vector *myG = gsl_vector_alloc(npar); // Gradient. + gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR. - for(iter=0;iter<100;iter++){ - wgsl_mixed_optim_hessian(beta,&p,myH); // Calculate Hessian. - wgsl_mixed_optim_df(beta,&p,myG); // Calculate Gradient. - gsl_linalg_QR_decomp(myH,tau); // Calculate next beta. - gsl_linalg_QR_solve(myH,tau,myG,stBeta); - gsl_vector_sub(beta,stBeta); + for (iter = 0; iter < 100; iter++) { + wgsl_mixed_optim_hessian(beta, &p, myH); // Calculate Hessian. + wgsl_mixed_optim_df(beta, &p, myG); // Calculate Gradient. + gsl_linalg_QR_decomp(myH, tau); // Calculate next beta. + gsl_linalg_QR_solve(myH, tau, myG, stBeta); + gsl_vector_sub(beta, stBeta); // Monitor convergence. - maxchange=0; - for(int i=0;i<npar; i++) - if(maxchange<fabs(stBeta->data[i])) - maxchange=fabs(stBeta->data[i]); + maxchange = 0; + for (int i = 0; i < npar; i++) + if (maxchange < fabs(stBeta->data[i])) + maxchange = fabs(stBeta->data[i]); - if(maxchange<1E-4) + if (maxchange < 1E-4) break; } // Final fit. - mLogLik = wgsl_mixed_optim_f(beta,&p); + mLogLik = wgsl_mixed_optim_f(beta, &p); - gsl_vector_free (tau); - gsl_vector_free (stBeta); - gsl_vector_free (myG); - gsl_matrix_free (myH); + gsl_vector_free(tau); + gsl_vector_free(stBeta); + gsl_vector_free(myG); + gsl_matrix_free(myH); return 0; } @@ -278,8 +273,8 @@ typedef struct { double lambdaL2; } fix_parm_cat_T; -double fLogit_cat (gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev, - gsl_vector *y, double lambdaL1, double lambdaL2) { +double fLogit_cat(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev, + gsl_vector *y, double lambdaL1, double lambdaL2) { int n = y->size; int npar = beta->size; double total = 0; @@ -288,91 +283,90 @@ double fLogit_cat (gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev, // omp_set_num_threads(ompthr); /\* Changed loop start at 1 instead // of 0 to avoid regularization of beta 0*\/ /\*#pragma omp parallel // for reduction (+:total)*\/ - for(int i = 1; i < npar; ++i) - total += beta->data[i]*beta->data[i]; - total = (-total*lambdaL2/2); + for (int i = 1; i < npar; ++i) + total += beta->data[i] * beta->data[i]; + total = (-total * lambdaL2 / 2); // /\*#pragma omp parallel for reduction (+:aux)*\/ - for(int i = 1; i < npar; ++i) - aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]); - total = total-aux*lambdaL1; + for (int i = 1; i < npar; ++i) + aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]); + total = total - aux * lambdaL1; // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) // #reduction (+:total) - for(int i = 0; i < n; ++i) { - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < X->size2; ++k) { - if(gsl_matrix_int_get(X,i,k)>0) - Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm]; - iParm+=nlev->data[k]-1; + for (int i = 0; i < n; ++i) { + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < X->size2; ++k) { + if (gsl_matrix_int_get(X, i, k) > 0) + Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm]; + iParm += nlev->data[k] - 1; } - total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai)); + total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai)); } return -total; } -void logistic_cat_pred (gsl_vector *beta, // Vector of parameters - // length = 1 + Sum_k(C_k-1). - gsl_matrix_int *X, // Matrix Nobs x K - gsl_vector_int *nlev, // Vector with #categories - gsl_vector *yhat){ // Vector of prob. predicted by - // the logistic. - for(int i = 0; i < X->size1; ++i) { - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < X->size2; ++k) { - if(gsl_matrix_int_get(X,i,k)>0) - Xbetai+=beta->data[gsl_matrix_int_get(X,i,k)-1+iParm]; - iParm+=nlev->data[k]-1; +void logistic_cat_pred(gsl_vector *beta, // Vector of parameters + // length = 1 + Sum_k(C_k-1). + gsl_matrix_int *X, // Matrix Nobs x K + gsl_vector_int *nlev, // Vector with #categories + gsl_vector *yhat) { // Vector of prob. predicted by + // the logistic. + for (int i = 0; i < X->size1; ++i) { + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < X->size2; ++k) { + if (gsl_matrix_int_get(X, i, k) > 0) + Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm]; + iParm += nlev->data[k] - 1; } - yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai)); + yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai)); } } // The gradient of f, df = (df/dx, df/dy). -void wgsl_cat_optim_df (const gsl_vector *beta, void *params, - gsl_vector *out) { +void wgsl_cat_optim_df(const gsl_vector *beta, void *params, gsl_vector *out) { fix_parm_cat_T *p = (fix_parm_cat_T *)params; int n = p->y->size; int K = p->X->size2; int npar = beta->size; // Intitialize gradient out necessary? - for(int i = 0; i < npar; ++i) - out->data[i]= 0; + for (int i = 0; i < npar; ++i) + out->data[i] = 0; // Changed loop start at 1 instead of 0 to avoid regularization of beta 0. - for(int i = 1; i < npar; ++i) - out->data[i]= p->lambdaL2*beta->data[i]; - for(int i = 1; i < npar; ++i) - out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0)); - - for(int i = 0; i < n; ++i) { - double pn=0; - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < K; ++k) { - if(gsl_matrix_int_get(p->X,i,k)>0) - Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]; - iParm+=p->nlev->data[k]-1; + for (int i = 1; i < npar; ++i) + out->data[i] = p->lambdaL2 * beta->data[i]; + for (int i = 1; i < npar; ++i) + out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0)); + + for (int i = 0; i < n; ++i) { + double pn = 0; + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < K; ++k) { + if (gsl_matrix_int_get(p->X, i, k) > 0) + Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm]; + iParm += p->nlev->data[k] - 1; } - pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) ); + pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai))); - out->data[0]+= pn; - iParm=1; - for(int k = 0; k < K; ++k) { - if(gsl_matrix_int_get(p->X,i,k)>0) - out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn; - iParm+=p->nlev->data[k]-1; + out->data[0] += pn; + iParm = 1; + for (int k = 0; k < K; ++k) { + if (gsl_matrix_int_get(p->X, i, k) > 0) + out->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm] += pn; + iParm += p->nlev->data[k] - 1; } } } // The Hessian of f. -void wgsl_cat_optim_hessian (const gsl_vector *beta, void *params, - gsl_matrix *out) { +void wgsl_cat_optim_hessian(const gsl_vector *beta, void *params, + gsl_matrix *out) { fix_parm_cat_T *p = (fix_parm_cat_T *)params; int n = p->y->size; int K = p->X->size2; @@ -382,123 +376,119 @@ void wgsl_cat_optim_hessian (const gsl_vector *beta, void *params, gsl_matrix_set_zero(out); // Changed loop start at 1 instead of 0 to avoid regularization of beta. - for(int i = 1; i < npar; ++i) - gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double-check this. + for (int i = 1; i < npar; ++i) + gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this. // L1 penalty not working yet, as not differentiable, I may need to // do coordinate descent (as in glm_net). - for(int i = 0; i < n; ++i) { - double pn=0; - double aux=0; - double Xbetai=beta->data[0]; - int iParm2=1; - int iParm1=1; - for(int k = 0; k < K; ++k) { - if(gsl_matrix_int_get(p->X,i,k)>0) - Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm1]; - iParm1+=p->nlev->data[k]-1; //-1? + for (int i = 0; i < n; ++i) { + double pn = 0; + double aux = 0; + double Xbetai = beta->data[0]; + int iParm2 = 1; + int iParm1 = 1; + for (int k = 0; k < K; ++k) { + if (gsl_matrix_int_get(p->X, i, k) > 0) + Xbetai += beta->data[gsl_matrix_int_get(p->X, i, k) - 1 + iParm1]; + iParm1 += p->nlev->data[k] - 1; //-1? } - pn= 1/(1 + gsl_sf_exp(-Xbetai)); + pn = 1 / (1 + gsl_sf_exp(-Xbetai)); // Add a protection for pn very close to 0 or 1? - aux=pn*(1-pn); - *gsl_matrix_ptr(out,0,0)+=aux; - iParm2=1; - for(int k2 = 0; k2 < K; ++k2) { - if(gsl_matrix_int_get(p->X,i,k2)>0) - *gsl_matrix_ptr(out,0,gsl_matrix_int_get(p->X,i,k2)-1+iParm2)+=aux; - iParm2+=p->nlev->data[k2]-1; //-1? + aux = pn * (1 - pn); + *gsl_matrix_ptr(out, 0, 0) += aux; + iParm2 = 1; + for (int k2 = 0; k2 < K; ++k2) { + if (gsl_matrix_int_get(p->X, i, k2) > 0) + *gsl_matrix_ptr(out, 0, gsl_matrix_int_get(p->X, i, k2) - 1 + iParm2) += + aux; + iParm2 += p->nlev->data[k2] - 1; //-1? } - iParm1=1; - for(int k1 = 0; k1 < K; ++k1) { - if(gsl_matrix_int_get(p->X,i,k1)>0) - *gsl_matrix_ptr(out,gsl_matrix_int_get(p->X,i,k1)-1+iParm1,0)+=aux; - iParm2=1; - for(int k2 = 0; k2 < K; ++k2) { - if((gsl_matrix_int_get(p->X,i,k1)>0) && - (gsl_matrix_int_get(p->X,i,k2)>0)) - *gsl_matrix_ptr(out - ,gsl_matrix_int_get(p->X,i,k1)-1+iParm1 - ,gsl_matrix_int_get(p->X,i,k2)-1+iParm2 - )+=aux; - iParm2+=p->nlev->data[k2]-1; //-1? + iParm1 = 1; + for (int k1 = 0; k1 < K; ++k1) { + if (gsl_matrix_int_get(p->X, i, k1) > 0) + *gsl_matrix_ptr(out, gsl_matrix_int_get(p->X, i, k1) - 1 + iParm1, 0) += + aux; + iParm2 = 1; + for (int k2 = 0; k2 < K; ++k2) { + if ((gsl_matrix_int_get(p->X, i, k1) > 0) && + (gsl_matrix_int_get(p->X, i, k2) > 0)) + *gsl_matrix_ptr(out, gsl_matrix_int_get(p->X, i, k1) - 1 + iParm1, + gsl_matrix_int_get(p->X, i, k2) - 1 + iParm2) += aux; + iParm2 += p->nlev->data[k2] - 1; //-1? } - iParm1+=p->nlev->data[k1]-1; //-1? + iParm1 += p->nlev->data[k1] - 1; //-1? } } } double wgsl_cat_optim_f(gsl_vector *v, void *params) { - double mLogLik=0; + double mLogLik = 0; fix_parm_cat_T *p = (fix_parm_cat_T *)params; - mLogLik = fLogit_cat(v,p->X,p->nlev,p->y,p->lambdaL1,p->lambdaL2); + mLogLik = fLogit_cat(v, p->X, p->nlev, p->y, p->lambdaL1, p->lambdaL2); return mLogLik; } // Compute both f and df together. -void wgsl_cat_optim_fdf (gsl_vector *x, void *params, double *f, - gsl_vector *df) { +void wgsl_cat_optim_fdf(gsl_vector *x, void *params, double *f, + gsl_vector *df) { *f = wgsl_cat_optim_f(x, params); wgsl_cat_optim_df(x, params, df); } -int logistic_cat_fit(gsl_vector *beta, - gsl_matrix_int *X, - gsl_vector_int *nlev, - gsl_vector *y, - double lambdaL1, - double lambdaL2) { - double mLogLik=0; +int logistic_cat_fit(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev, + gsl_vector *y, double lambdaL1, double lambdaL2) { + double mLogLik = 0; fix_parm_cat_T p; int npar = beta->size; - int iter=0; - double maxchange=0; + int iter = 0; + double maxchange = 0; // Intializing fix parameters. - p.X=X; - p.nlev=nlev; - p.y=y; - p.lambdaL1=lambdaL1; - p.lambdaL2=lambdaL2; + p.X = X; + p.nlev = nlev; + p.y = y; + p.lambdaL1 = lambdaL1; + p.lambdaL2 = lambdaL2; // Initial fit. - mLogLik = wgsl_cat_optim_f(beta,&p); + mLogLik = wgsl_cat_optim_f(beta, &p); - gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix. - gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move. + gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix. + gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move. - gsl_vector *myG = gsl_vector_alloc(npar); // Gradient. - gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR. + gsl_vector *myG = gsl_vector_alloc(npar); // Gradient. + gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR. - for(iter=0;iter<100;iter++){ - wgsl_cat_optim_hessian(beta,&p,myH); // Calculate Hessian. - wgsl_cat_optim_df(beta,&p,myG); // Calculate Gradient. - gsl_linalg_QR_decomp(myH,tau); // Calculate next beta. - gsl_linalg_QR_solve(myH,tau,myG,stBeta); - gsl_vector_sub(beta,stBeta); + for (iter = 0; iter < 100; iter++) { + wgsl_cat_optim_hessian(beta, &p, myH); // Calculate Hessian. + wgsl_cat_optim_df(beta, &p, myG); // Calculate Gradient. + gsl_linalg_QR_decomp(myH, tau); // Calculate next beta. + gsl_linalg_QR_solve(myH, tau, myG, stBeta); + gsl_vector_sub(beta, stBeta); // Monitor convergence. - maxchange=0; - for(int i=0;i<npar; i++) - if(maxchange<fabs(stBeta->data[i])) - maxchange=fabs(stBeta->data[i]); + maxchange = 0; + for (int i = 0; i < npar; i++) + if (maxchange < fabs(stBeta->data[i])) + maxchange = fabs(stBeta->data[i]); #ifdef _RPR_DEBUG_ - mLogLik = wgsl_cat_optim_f(beta,&p); + mLogLik = wgsl_cat_optim_f(beta, &p); #endif - if(maxchange<1E-4) + if (maxchange < 1E-4) break; } // Final fit. - mLogLik = wgsl_cat_optim_f(beta,&p); + mLogLik = wgsl_cat_optim_f(beta, &p); - gsl_vector_free (tau); - gsl_vector_free (stBeta); - gsl_vector_free (myG); - gsl_matrix_free (myH); + gsl_vector_free(tau); + gsl_vector_free(stBeta); + gsl_vector_free(myG); + gsl_matrix_free(myH); return 0; } @@ -509,15 +499,15 @@ int logistic_cat_fit(gsl_vector *beta, // I need to bundle all the data that goes to the function to optimze // together. -typedef struct{ - gsl_matrix *Xc; // continuous covariates; Matrix Nobs x Kc +typedef struct { + gsl_matrix *Xc; // continuous covariates; Matrix Nobs x Kc gsl_vector *y; double lambdaL1; double lambdaL2; -}fix_parm_cont_T; +} fix_parm_cont_T; double fLogit_cont(gsl_vector *beta, gsl_matrix *Xc, gsl_vector *y, - double lambdaL1, double lambdaL2) { + double lambdaL1, double lambdaL2) { int n = y->size; int npar = beta->size; double total = 0; @@ -526,82 +516,81 @@ double fLogit_cont(gsl_vector *beta, gsl_matrix *Xc, gsl_vector *y, // omp_set_num_threads(ompthr); /\* Changed loop start at 1 instead // of 0 to avoid regularization of beta_0*\/ /\*#pragma omp parallel // for reduction (+:total)*\/ - for(int i = 1; i < npar; ++i) - total += beta->data[i]*beta->data[i]; - total = (-total*lambdaL2/2); + for (int i = 1; i < npar; ++i) + total += beta->data[i] * beta->data[i]; + total = (-total * lambdaL2 / 2); // /\*#pragma omp parallel for reduction (+:aux)*\/ - for(int i = 1; i < npar; ++i) - aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]); - total = total-aux*lambdaL1; + for (int i = 1; i < npar; ++i) + aux += (beta->data[i] > 0 ? beta->data[i] : -beta->data[i]); + total = total - aux * lambdaL1; // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) // #reduction (+:total) - for(int i = 0; i < n; ++i) { - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < (Xc->size2); ++k) - Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++]; - total += y->data[i]*Xbetai-gsl_sf_log_1plusx(gsl_sf_exp(Xbetai)); + for (int i = 0; i < n; ++i) { + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < (Xc->size2); ++k) + Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++]; + total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai)); } return -total; } -void logistic_cont_pred(gsl_vector *beta, // Vector of parameters - // length = 1 + Sum_k(C_k-1). - gsl_matrix *Xc, // Continuous covariates matrix, - // Nobs x Kc (NULL if not used). - gsl_vector *yhat) { // Vector of prob. predicted by - // the logistic. - for(int i = 0; i < Xc->size1; ++i) { - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < (Xc->size2); ++k) - Xbetai+= gsl_matrix_get(Xc,i,k)*beta->data[iParm++]; - yhat->data[i]=1/(1 + gsl_sf_exp(-Xbetai)); +void logistic_cont_pred(gsl_vector *beta, // Vector of parameters + // length = 1 + Sum_k(C_k-1). + gsl_matrix *Xc, // Continuous covariates matrix, + // Nobs x Kc (NULL if not used). + gsl_vector *yhat) { // Vector of prob. predicted by + // the logistic. + for (int i = 0; i < Xc->size1; ++i) { + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < (Xc->size2); ++k) + Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++]; + yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai)); } } // The gradient of f, df = (df/dx, df/dy). -void wgsl_cont_optim_df (const gsl_vector *beta, void *params, - gsl_vector *out) { +void wgsl_cont_optim_df(const gsl_vector *beta, void *params, gsl_vector *out) { fix_parm_cont_T *p = (fix_parm_cont_T *)params; int n = p->y->size; int Kc = p->Xc->size2; int npar = beta->size; // Intitialize gradient out necessary? - for(int i = 0; i < npar; ++i) - out->data[i]= 0; + for (int i = 0; i < npar; ++i) + out->data[i] = 0; // Changed loop start at 1 instead of 0 to avoid regularization of beta 0. - for(int i = 1; i < npar; ++i) - out->data[i]= p->lambdaL2*beta->data[i]; - for(int i = 1; i < npar; ++i) - out->data[i]+= p->lambdaL1*((beta->data[i]>0)-(beta->data[i]<0)); + for (int i = 1; i < npar; ++i) + out->data[i] = p->lambdaL2 * beta->data[i]; + for (int i = 1; i < npar; ++i) + out->data[i] += p->lambdaL1 * ((beta->data[i] > 0) - (beta->data[i] < 0)); - for(int i = 0; i < n; ++i) { - double pn=0; - double Xbetai=beta->data[0]; - int iParm=1; - for(int k = 0; k < Kc; ++k) - Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++]; + for (int i = 0; i < n; ++i) { + double pn = 0; + double Xbetai = beta->data[0]; + int iParm = 1; + for (int k = 0; k < Kc; ++k) + Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm++]; - pn= -( p->y->data[i] - 1/(1 + gsl_sf_exp(-Xbetai)) ); + pn = -(p->y->data[i] - 1 / (1 + gsl_sf_exp(-Xbetai))); - out->data[0]+= pn; - iParm=1; + out->data[0] += pn; + iParm = 1; // Adding the continuous. - for(int k = 0; k < Kc; ++k) { - out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn; + for (int k = 0; k < Kc; ++k) { + out->data[iParm++] += gsl_matrix_get(p->Xc, i, k) * pn; } } } // The Hessian of f. -void wgsl_cont_optim_hessian (const gsl_vector *beta, void *params, - gsl_matrix *out) { +void wgsl_cont_optim_hessian(const gsl_vector *beta, void *params, + gsl_matrix *out) { fix_parm_cont_T *p = (fix_parm_cont_T *)params; int n = p->y->size; int Kc = p->Xc->size2; @@ -614,111 +603,109 @@ void wgsl_cont_optim_hessian (const gsl_vector *beta, void *params, // Changed loop start at 1 instead of 0 to avoid regularization of // beta 0. - for(int i = 1; i < npar; ++i) - gsl_matrix_set(out,i,i,(p->lambdaL2)); // Double-check this. + for (int i = 1; i < npar; ++i) + gsl_matrix_set(out, i, i, (p->lambdaL2)); // Double-check this. // L1 penalty not working yet, as not differentiable, I may need to // do coordinate descent (as in glm_net). - for(int i = 0; i < n; ++i) { - double pn=0; - double aux=0; - double Xbetai=beta->data[0]; - int iParm1=1; - for(int k = 0; k < Kc; ++k) - Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm1++]; + for (int i = 0; i < n; ++i) { + double pn = 0; + double aux = 0; + double Xbetai = beta->data[0]; + int iParm1 = 1; + for (int k = 0; k < Kc; ++k) + Xbetai += gsl_matrix_get(p->Xc, i, k) * beta->data[iParm1++]; - pn= 1/(1 + gsl_sf_exp(-Xbetai)); + pn = 1 / (1 + gsl_sf_exp(-Xbetai)); // Add a protection for pn very close to 0 or 1? - aux=pn*(1-pn); + aux = pn * (1 - pn); // Calculate sub-gradient vector gn. gsl_vector_set_zero(gn); - gn->data[0]= 1; - iParm1=1; - for(int k = 0; k < Kc; ++k) { - gn->data[iParm1++] = gsl_matrix_get(p->Xc,i,k); + gn->data[0] = 1; + iParm1 = 1; + for (int k = 0; k < Kc; ++k) { + gn->data[iParm1++] = gsl_matrix_get(p->Xc, i, k); } - for(int k1=0;k1<npar; ++k1) - if(gn->data[k1]!=0) - for(int k2=0;k2<npar; ++k2) - if(gn->data[k2]!=0) - *gsl_matrix_ptr(out,k1,k2) += (aux * gn->data[k1] * gn->data[k2]); + for (int k1 = 0; k1 < npar; ++k1) + if (gn->data[k1] != 0) + for (int k2 = 0; k2 < npar; ++k2) + if (gn->data[k2] != 0) + *gsl_matrix_ptr(out, k1, k2) += (aux * gn->data[k1] * gn->data[k2]); } gsl_vector_free(gn); } double wgsl_cont_optim_f(gsl_vector *v, void *params) { - double mLogLik=0; + double mLogLik = 0; fix_parm_cont_T *p = (fix_parm_cont_T *)params; - mLogLik = fLogit_cont(v,p->Xc,p->y,p->lambdaL1,p->lambdaL2); + mLogLik = fLogit_cont(v, p->Xc, p->y, p->lambdaL1, p->lambdaL2); return mLogLik; } // Compute both f and df together. -void wgsl_cont_optim_fdf (gsl_vector *x, void *params, - double *f, gsl_vector *df) { +void wgsl_cont_optim_fdf(gsl_vector *x, void *params, double *f, + gsl_vector *df) { *f = wgsl_cont_optim_f(x, params); wgsl_cont_optim_df(x, params, df); } -int logistic_cont_fit (gsl_vector *beta, - gsl_matrix *Xc, // Continuous covariates matrix, - // Nobs x Kc (NULL if not used). - gsl_vector *y, - double lambdaL1, - double lambdaL2) { +int logistic_cont_fit(gsl_vector *beta, + gsl_matrix *Xc, // Continuous covariates matrix, + // Nobs x Kc (NULL if not used). + gsl_vector *y, double lambdaL1, double lambdaL2) { - double mLogLik=0; + double mLogLik = 0; fix_parm_cont_T p; int npar = beta->size; - int iter=0; - double maxchange=0; + int iter = 0; + double maxchange = 0; // Initializing fix parameters. - p.Xc=Xc; - p.y=y; - p.lambdaL1=lambdaL1; - p.lambdaL2=lambdaL2; + p.Xc = Xc; + p.y = y; + p.lambdaL1 = lambdaL1; + p.lambdaL2 = lambdaL2; // Initial fit. - mLogLik = wgsl_cont_optim_f(beta,&p); + mLogLik = wgsl_cont_optim_f(beta, &p); - gsl_matrix *myH = gsl_matrix_alloc(npar,npar); // Hessian matrix. - gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move. + gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix. + gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move. - gsl_vector *myG = gsl_vector_alloc(npar); // Gradient. - gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR. + gsl_vector *myG = gsl_vector_alloc(npar); // Gradient. + gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR. - for(iter=0;iter<100;iter++){ - wgsl_cont_optim_hessian(beta,&p,myH); // Calculate Hessian. - wgsl_cont_optim_df(beta,&p,myG); // Calculate Gradient. - gsl_linalg_QR_decomp(myH,tau); // Calculate next beta. - gsl_linalg_QR_solve(myH,tau,myG,stBeta); - gsl_vector_sub(beta,stBeta); + for (iter = 0; iter < 100; iter++) { + wgsl_cont_optim_hessian(beta, &p, myH); // Calculate Hessian. + wgsl_cont_optim_df(beta, &p, myG); // Calculate Gradient. + gsl_linalg_QR_decomp(myH, tau); // Calculate next beta. + gsl_linalg_QR_solve(myH, tau, myG, stBeta); + gsl_vector_sub(beta, stBeta); // Monitor convergence. - maxchange=0; - for(int i=0;i<npar; i++) - if(maxchange<fabs(stBeta->data[i])) - maxchange=fabs(stBeta->data[i]); + maxchange = 0; + for (int i = 0; i < npar; i++) + if (maxchange < fabs(stBeta->data[i])) + maxchange = fabs(stBeta->data[i]); #ifdef _RPR_DEBUG_ - mLogLik = wgsl_cont_optim_f(beta,&p); + mLogLik = wgsl_cont_optim_f(beta, &p); #endif - if(maxchange<1E-4) + if (maxchange < 1E-4) break; } // Final fit. - mLogLik = wgsl_cont_optim_f(beta,&p); + mLogLik = wgsl_cont_optim_f(beta, &p); - gsl_vector_free (tau); - gsl_vector_free (stBeta); - gsl_vector_free (myG); - gsl_matrix_free (myH); + gsl_vector_free(tau); + gsl_vector_free(stBeta); + gsl_vector_free(myG); + gsl_matrix_free(myH); return 0; } diff --git a/src/logistic.h b/src/logistic.h index b61ab14..bebcbf6 100644 --- a/src/logistic.h +++ b/src/logistic.h @@ -3,73 +3,63 @@ // Mixed interface. void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters - // length = 1+Sum_k(C_k-1)+Kc. - gsl_matrix_int *X, // Matrix Nobs x K. - gsl_vector_int *nlev, // Vector with num. categories. - gsl_matrix *Xc, // Continuous covariates matrix - // Nobs x Kc - gsl_vector *yhat); // Vector of prob. predicted by - // the logistic. + // length = 1+Sum_k(C_k-1)+Kc. + gsl_matrix_int *X, // Matrix Nobs x K. + gsl_vector_int *nlev, // Vector with num. categories. + gsl_matrix *Xc, // Continuous covariates matrix + // Nobs x Kc + gsl_vector *yhat); // Vector of prob. predicted by + // the logistic. int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters - // length = 1+Sum_k(C_k-1)+Kc - gsl_matrix_int *X, // Matrix Nobs x K. - gsl_vector_int *nlev, // Vector with number categories. - gsl_matrix *Xc, // Continuous covariates - // matrix Nobs x Kc - gsl_vector *y, // Vector of prob. to predict. - double lambdaL1, // Reg. L1 0.0 if not used. - double lambdaL2); // Reg. L2 0.0 if not used. + // length = 1+Sum_k(C_k-1)+Kc + gsl_matrix_int *X, // Matrix Nobs x K. + gsl_vector_int *nlev, // Vector with number categories. + gsl_matrix *Xc, // Continuous covariates + // matrix Nobs x Kc + gsl_vector *y, // Vector of prob. to predict. + double lambdaL1, // Reg. L1 0.0 if not used. + double lambdaL2); // Reg. L2 0.0 if not used. -double fLogit_mixed(gsl_vector *beta, - gsl_matrix_int *X, - gsl_vector_int *nlev, - gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc - gsl_vector *y, - double lambdaL1, - double lambdaL2); +double fLogit_mixed(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev, + gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc + gsl_vector *y, double lambdaL1, double lambdaL2); // Categorical-only interface. void logistic_cat_pred(gsl_vector *beta, // Vector of parameters - // length = 1+Sum_k(C_k-1)+Kc. - gsl_matrix_int *X, // Matrix Nobs x K. - gsl_vector_int *nlev, // Vector with number categories. - gsl_vector *yhat); // Vector of prob. predicted by - // the logistic. + // length = 1+Sum_k(C_k-1)+Kc. + gsl_matrix_int *X, // Matrix Nobs x K. + gsl_vector_int *nlev, // Vector with number categories. + gsl_vector *yhat); // Vector of prob. predicted by + // the logistic. int logistic_cat_fit(gsl_vector *beta, // Vector of parameters - // length = 1+Sum_k(C_k-1)+Kc. - gsl_matrix_int *X, // Matrix Nobs x K . - gsl_vector_int *nlev, // Vector with number categories. - gsl_vector *y, // Vector of prob. to predict. - double lambdaL1, // Regularization L1, 0 if not used - double lambdaL2); // Regularization L2, 0 if not used + // length = 1+Sum_k(C_k-1)+Kc. + gsl_matrix_int *X, // Matrix Nobs x K . + gsl_vector_int *nlev, // Vector with number categories. + gsl_vector *y, // Vector of prob. to predict. + double lambdaL1, // Regularization L1, 0 if not used + double lambdaL2); // Regularization L2, 0 if not used -double fLogit_cat(gsl_vector *beta, - gsl_matrix_int *X, - gsl_vector_int *nlev, - gsl_vector *y, - double lambdaL1, - double lambdaL2); +double fLogit_cat(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev, + gsl_vector *y, double lambdaL1, double lambdaL2); // Continuous-only interface. -void logistic_cont_pred(gsl_vector *beta, // Vector of parameters - // length = 1 + Sum_k(C_k-1) + Kc. - gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc. - gsl_vector *yhat);// Vector of prob. predicted - // by the logistic. +void logistic_cont_pred(gsl_vector *beta, // Vector of parameters + // length = 1 + Sum_k(C_k-1) + Kc. + gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc. + gsl_vector *yhat); // Vector of prob. predicted + // by the logistic. int logistic_cont_fit(gsl_vector *beta, // Vector of parameters - // length = 1+Sum_k(C_k-1)+Kc. - gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc. - gsl_vector *y, // Vector of prob. to predict. - double lambdaL1, // Regularization L1, 0 if not used. - double lambdaL2); // Regularization L2, 0 if not used. + // length = 1+Sum_k(C_k-1)+Kc. + gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc. + gsl_vector *y, // Vector of prob. to predict. + double lambdaL1, // Regularization L1, 0 if not used. + double lambdaL2); // Regularization L2, 0 if not used. double fLogit_cont(gsl_vector *beta, - gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc. - gsl_vector *y, - double lambdaL1, - double lambdaL2); + gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc. + gsl_vector *y, double lambdaL1, double lambdaL2); #endif diff --git a/src/main.cpp b/src/main.cpp index c7f0573..833136c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -16,57 +16,67 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include "gemma.h" #include <fstream> +#include <iostream> #include <sstream> #include <sys/stat.h> #include <sys/types.h> -#include "gemma.h" using namespace std; -int main(int argc, char * argv[]) { - GEMMA cGemma; - PARAM cPar; - - if (argc <= 1) { - cGemma.PrintHeader(); - return EXIT_SUCCESS; - } - if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'h') { - cGemma.PrintHelp(0); - return EXIT_SUCCESS; - } - if (argc==3 && argv[1][0] == '-' && argv[1][1] == 'h') { - string str; - str.assign(argv[2]); - cGemma.PrintHelp(atoi(str.c_str())); - return EXIT_SUCCESS; - } - if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'l') { - cGemma.PrintLicense(); - return EXIT_SUCCESS; - } - - cGemma.Assign(argc, argv, cPar); - - ifstream check_dir((cPar.path_out).c_str()); - if (!check_dir) { - mkdir((cPar.path_out).c_str(), S_IRWXU|S_IRGRP|S_IROTH); - } - - if (cPar.error==true) {return EXIT_FAILURE;} - - if (cPar.mode_silence) {stringstream ss; cout.rdbuf (ss.rdbuf());} - - cPar.CheckParam(); - - if (cPar.error==true) {return EXIT_FAILURE;} - - cGemma.BatchRun(cPar); - - if (cPar.error==true) {return EXIT_FAILURE;} - - cGemma.WriteLog(argc, argv, cPar); - - return EXIT_SUCCESS; } +int main(int argc, char *argv[]) { + GEMMA cGemma; + PARAM cPar; + + if (argc <= 1) { + cGemma.PrintHeader(); + return EXIT_SUCCESS; + } + if (argc == 2 && argv[1][0] == '-' && argv[1][1] == 'h') { + cGemma.PrintHelp(0); + return EXIT_SUCCESS; + } + if (argc == 3 && argv[1][0] == '-' && argv[1][1] == 'h') { + string str; + str.assign(argv[2]); + cGemma.PrintHelp(atoi(str.c_str())); + return EXIT_SUCCESS; + } + if (argc == 2 && argv[1][0] == '-' && argv[1][1] == 'l') { + cGemma.PrintLicense(); + return EXIT_SUCCESS; + } + + cGemma.Assign(argc, argv, cPar); + + ifstream check_dir((cPar.path_out).c_str()); + if (!check_dir) { + mkdir((cPar.path_out).c_str(), S_IRWXU | S_IRGRP | S_IROTH); + } + + if (cPar.error == true) { + return EXIT_FAILURE; + } + + if (cPar.mode_silence) { + stringstream ss; + cout.rdbuf(ss.rdbuf()); + } + + cPar.CheckParam(); + + if (cPar.error == true) { + return EXIT_FAILURE; + } + + cGemma.BatchRun(cPar); + + if (cPar.error == true) { + return EXIT_FAILURE; + } + + cGemma.WriteLog(argc, argv, cPar); + + return EXIT_SUCCESS; +} diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index 709bdde..9e19bf1 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -16,394 +16,381 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include <bitset> +#include <cmath> +#include <cstring> #include <fstream> -#include <sstream> -#include <string> #include <iomanip> -#include <bitset> -#include <vector> +#include <iostream> +#include <limits.h> #include <map> #include <set> -#include <cstring> -#include <cmath> +#include <sstream> #include <stdio.h> #include <stdlib.h> -#include <limits.h> +#include <string> +#include <vector> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" +#include "Eigen/Dense" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" -#include "Eigen/Dense" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" -#include "lapack.h" #include "eigenlib.h" +#include "lapack.h" #include "mathfunc.h" using namespace std; using namespace Eigen; -//calculate variance of a vector -double VectorVar (const gsl_vector *v) { - double d, m=0.0, m2=0.0; - for (size_t i=0; i<v->size; ++i) { - d=gsl_vector_get (v, i); - m+=d; - m2+=d*d; - } - m/=(double)v->size; - m2/=(double)v->size; - return m2-m*m; +// calculate variance of a vector +double VectorVar(const gsl_vector *v) { + double d, m = 0.0, m2 = 0.0; + for (size_t i = 0; i < v->size; ++i) { + d = gsl_vector_get(v, i); + m += d; + m2 += d * d; + } + m /= (double)v->size; + m2 /= (double)v->size; + return m2 - m * m; } // Center the matrix G. -void CenterMatrix (gsl_matrix *G) { - double d; - gsl_vector *w=gsl_vector_alloc (G->size1); - gsl_vector *Gw=gsl_vector_alloc (G->size1); - gsl_vector_set_all (w, 1.0); - - gsl_blas_dgemv (CblasNoTrans, 1.0, G, w, 0.0, Gw); - gsl_blas_dsyr2 (CblasUpper, -1.0/(double)G->size1, Gw, w, G); - gsl_blas_ddot (w, Gw, &d); - gsl_blas_dsyr (CblasUpper, d/((double)G->size1*(double)G->size1), - w, G); - - for (size_t i=0; i<G->size1; ++i) { - for (size_t j=0; j<i; ++j) { - d=gsl_matrix_get (G, j, i); - gsl_matrix_set (G, i, j, d); - } - } - - gsl_vector_free(w); - gsl_vector_free(Gw); - - return; +void CenterMatrix(gsl_matrix *G) { + double d; + gsl_vector *w = gsl_vector_alloc(G->size1); + gsl_vector *Gw = gsl_vector_alloc(G->size1); + gsl_vector_set_all(w, 1.0); + + gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw); + gsl_blas_dsyr2(CblasUpper, -1.0 / (double)G->size1, Gw, w, G); + gsl_blas_ddot(w, Gw, &d); + gsl_blas_dsyr(CblasUpper, d / ((double)G->size1 * (double)G->size1), w, G); + + for (size_t i = 0; i < G->size1; ++i) { + for (size_t j = 0; j < i; ++j) { + d = gsl_matrix_get(G, j, i); + gsl_matrix_set(G, i, j, d); + } + } + + gsl_vector_free(w); + gsl_vector_free(Gw); + + return; } // Center the matrix G. -void CenterMatrix (gsl_matrix *G, const gsl_vector *w) { - double d, wtw; - gsl_vector *Gw=gsl_vector_alloc (G->size1); - - gsl_blas_ddot (w, w, &wtw); - gsl_blas_dgemv (CblasNoTrans, 1.0, G, w, 0.0, Gw); - gsl_blas_dsyr2 (CblasUpper, -1.0/wtw, Gw, w, G); - gsl_blas_ddot (w, Gw, &d); - gsl_blas_dsyr (CblasUpper, d/(wtw*wtw), w, G); - - for (size_t i=0; i<G->size1; ++i) { - for (size_t j=0; j<i; ++j) { - d=gsl_matrix_get (G, j, i); - gsl_matrix_set (G, i, j, d); - } - } - - gsl_vector_free(Gw); - - return; +void CenterMatrix(gsl_matrix *G, const gsl_vector *w) { + double d, wtw; + gsl_vector *Gw = gsl_vector_alloc(G->size1); + + gsl_blas_ddot(w, w, &wtw); + gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw); + gsl_blas_dsyr2(CblasUpper, -1.0 / wtw, Gw, w, G); + gsl_blas_ddot(w, Gw, &d); + gsl_blas_dsyr(CblasUpper, d / (wtw * wtw), w, G); + + for (size_t i = 0; i < G->size1; ++i) { + for (size_t j = 0; j < i; ++j) { + d = gsl_matrix_get(G, j, i); + gsl_matrix_set(G, i, j, d); + } + } + + gsl_vector_free(Gw); + + return; } // Center the matrix G. -void CenterMatrix (gsl_matrix *G, const gsl_matrix *W) { - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWiWt=gsl_matrix_alloc (W->size2, G->size1); - gsl_matrix *GW=gsl_matrix_alloc (G->size1, W->size2); - gsl_matrix *WtGW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *Gtmp=gsl_matrix_alloc (G->size1, G->size1); - - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, WtWi, W, 0.0, WtWiWt); - gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, G, W, 0.0, GW); - gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, - Gtmp); - - gsl_matrix_sub (G, Gtmp); - gsl_matrix_transpose (Gtmp); - gsl_matrix_sub (G, Gtmp); - - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, GW, 0.0, WtGW); - //GW is destroyed. - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, WtWiWt, WtGW, 0.0, GW); - gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, - Gtmp); - - gsl_matrix_add (G, Gtmp); - - gsl_matrix_free(WtW); - gsl_matrix_free(WtWi); - gsl_matrix_free(WtWiWt); - gsl_matrix_free(GW); - gsl_matrix_free(WtGW); - gsl_matrix_free(Gtmp); - - return; +void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) { + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *WtWiWt = gsl_matrix_alloc(W->size2, G->size1); + gsl_matrix *GW = gsl_matrix_alloc(G->size1, W->size2); + gsl_matrix *WtGW = gsl_matrix_alloc(W->size2, W->size2); + gsl_matrix *Gtmp = gsl_matrix_alloc(G->size1, G->size1); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + LUDecomp(WtW, pmt, &sig); + LUInvert(WtW, pmt, WtWi); + + gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, WtWi, W, 0.0, WtWiWt); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, G, W, 0.0, GW); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp); + + gsl_matrix_sub(G, Gtmp); + gsl_matrix_transpose(Gtmp); + gsl_matrix_sub(G, Gtmp); + + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, GW, 0.0, WtGW); + // GW is destroyed. + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, WtWiWt, WtGW, 0.0, GW); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, GW, WtWiWt, 0.0, Gtmp); + + gsl_matrix_add(G, Gtmp); + + gsl_matrix_free(WtW); + gsl_matrix_free(WtWi); + gsl_matrix_free(WtWiWt); + gsl_matrix_free(GW); + gsl_matrix_free(WtGW); + gsl_matrix_free(Gtmp); + + return; } // "Standardize" the matrix G such that all diagonal elements = 1. -void StandardizeMatrix (gsl_matrix *G) { - double d=0.0; - vector<double> vec_d; - - for (size_t i=0; i<G->size1; ++i) { - vec_d.push_back(gsl_matrix_get(G, i, i)); - } - for (size_t i=0; i<G->size1; ++i) { - for (size_t j=i; j<G->size2; ++j) { - if (j==i) { - gsl_matrix_set(G, i, j, 1); - } else { - d=gsl_matrix_get(G, i, j); - d/=sqrt(vec_d[i]*vec_d[j]); - gsl_matrix_set(G, i, j, d); - gsl_matrix_set(G, j, i, d); - } - } - } - - return; +void StandardizeMatrix(gsl_matrix *G) { + double d = 0.0; + vector<double> vec_d; + + for (size_t i = 0; i < G->size1; ++i) { + vec_d.push_back(gsl_matrix_get(G, i, i)); + } + for (size_t i = 0; i < G->size1; ++i) { + for (size_t j = i; j < G->size2; ++j) { + if (j == i) { + gsl_matrix_set(G, i, j, 1); + } else { + d = gsl_matrix_get(G, i, j); + d /= sqrt(vec_d[i] * vec_d[j]); + gsl_matrix_set(G, i, j, d); + gsl_matrix_set(G, j, i, d); + } + } + } + + return; } // Scale the matrix G such that the mean diagonal = 1. -double ScaleMatrix (gsl_matrix *G) { - double d=0.0; +double ScaleMatrix(gsl_matrix *G) { + double d = 0.0; - for (size_t i=0; i<G->size1; ++i) { - d+=gsl_matrix_get(G, i, i); - } - d/=(double)G->size1; + for (size_t i = 0; i < G->size1; ++i) { + d += gsl_matrix_get(G, i, i); + } + d /= (double)G->size1; - if (d!=0) { - gsl_matrix_scale (G, 1.0/d); - } + if (d != 0) { + gsl_matrix_scale(G, 1.0 / d); + } - return d; + return d; } // Center the vector y. -double CenterVector (gsl_vector *y) { - double d=0.0; +double CenterVector(gsl_vector *y) { + double d = 0.0; - for (size_t i=0; i<y->size; ++i) { - d+=gsl_vector_get (y, i); - } - d/=(double)y->size; + for (size_t i = 0; i < y->size; ++i) { + d += gsl_vector_get(y, i); + } + d /= (double)y->size; - gsl_vector_add_constant (y, -1.0*d); + gsl_vector_add_constant(y, -1.0 * d); - return d; + return d; } // Center the vector y. -void CenterVector (gsl_vector *y, const gsl_matrix *W) { - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wty=gsl_vector_alloc (W->size2); - gsl_vector *WtWiWty=gsl_vector_alloc (W->size2); +void CenterVector(gsl_vector *y, const gsl_matrix *W) { + gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); + gsl_vector *Wty = gsl_vector_alloc(W->size2); + gsl_vector *WtWiWty = gsl_vector_alloc(W->size2); - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); + gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty); - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - LUDecomp (WtW, pmt, &sig); - LUSolve (WtW, pmt, Wty, WtWiWty); + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(W->size2); + LUDecomp(WtW, pmt, &sig); + LUSolve(WtW, pmt, Wty, WtWiWty); - gsl_blas_dgemv (CblasNoTrans, -1.0, W, WtWiWty, 1.0, y); + gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y); - gsl_matrix_free(WtW); - gsl_vector_free(Wty); - gsl_vector_free(WtWiWty); + gsl_matrix_free(WtW); + gsl_vector_free(Wty); + gsl_vector_free(WtWiWty); - return; + return; } // "Standardize" vector y to have mean 0 and y^ty/n=1. -void StandardizeVector (gsl_vector *y) { - double d=0.0, m=0.0, v=0.0; +void StandardizeVector(gsl_vector *y) { + double d = 0.0, m = 0.0, v = 0.0; - for (size_t i=0; i<y->size; ++i) { - d=gsl_vector_get (y, i); - m+=d; - v+=d*d; + for (size_t i = 0; i < y->size; ++i) { + d = gsl_vector_get(y, i); + m += d; + v += d * d; } - m/=(double)y->size; - v/=(double)y->size; - v-=m*m; + m /= (double)y->size; + v /= (double)y->size; + v -= m * m; - gsl_vector_add_constant (y, -1.0*m); - gsl_vector_scale (y, 1.0/sqrt(v)); + gsl_vector_add_constant(y, -1.0 * m); + gsl_vector_scale(y, 1.0 / sqrt(v)); return; } // Calculate UtX. -void CalcUtX (const gsl_matrix *U, gsl_matrix *UtX) { - gsl_matrix *X=gsl_matrix_alloc (UtX->size1, UtX->size2); - gsl_matrix_memcpy (X, UtX); - eigenlib_dgemm ("T", "N", 1.0, U, X, 0.0, UtX); - gsl_matrix_free (X); +void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) { + gsl_matrix *X = gsl_matrix_alloc(UtX->size1, UtX->size2); + gsl_matrix_memcpy(X, UtX); + eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX); + gsl_matrix_free(X); - return; + return; } -void CalcUtX (const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) { - eigenlib_dgemm ("T", "N", 1.0, U, X, 0.0, UtX); - return; +void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) { + eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX); + return; } -void CalcUtX (const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) { - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx); - return; +void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) { + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx); + return; } // Kronecker product. void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) { - for (size_t i=0; i<K->size1; i++) { - for (size_t j=0; j<K->size2; j++) { - gsl_matrix_view H_sub= - gsl_matrix_submatrix (H, i*V->size1, j*V->size2, - V->size1, V->size2); - gsl_matrix_memcpy (&H_sub.matrix, V); - gsl_matrix_scale (&H_sub.matrix, - gsl_matrix_get (K, i, j)); - } - } - return; + for (size_t i = 0; i < K->size1; i++) { + for (size_t j = 0; j < K->size2; j++) { + gsl_matrix_view H_sub = gsl_matrix_submatrix( + H, i * V->size1, j * V->size2, V->size1, V->size2); + gsl_matrix_memcpy(&H_sub.matrix, V); + gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j)); + } + } + return; } // Symmetric K matrix. void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) { - for (size_t i=0; i<K->size1; i++) { - for (size_t j=i; j<K->size2; j++) { - gsl_matrix_view H_sub= - gsl_matrix_submatrix (H, i*V->size1, j*V->size2, - V->size1, V->size2); - gsl_matrix_memcpy (&H_sub.matrix, V); - gsl_matrix_scale (&H_sub.matrix, - gsl_matrix_get (K, i, j)); - - if (i!=j) { - gsl_matrix_view H_sub_sym= - gsl_matrix_submatrix (H, j*V->size1, - i*V->size2, V->size1, - V->size2); - gsl_matrix_memcpy (&H_sub_sym.matrix, - &H_sub.matrix); - } - } - } - return; + for (size_t i = 0; i < K->size1; i++) { + for (size_t j = i; j < K->size2; j++) { + gsl_matrix_view H_sub = gsl_matrix_submatrix( + H, i * V->size1, j * V->size2, V->size1, V->size2); + gsl_matrix_memcpy(&H_sub.matrix, V); + gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j)); + + if (i != j) { + gsl_matrix_view H_sub_sym = gsl_matrix_submatrix( + H, j * V->size1, i * V->size2, V->size1, V->size2); + gsl_matrix_memcpy(&H_sub_sym.matrix, &H_sub.matrix); + } + } + } + return; } // This function calculates HWE p value with methods described in // Wigginton et al. (2005) AJHG; it is based on the code in plink 1.07. -double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab) { - if ( (n_hom1+n_hom2+n_ab)==0 ) {return 1;} - - // "AA" is the rare allele. - int n_aa=n_hom1 < n_hom2 ? n_hom1 : n_hom2; - int n_bb=n_hom1 < n_hom2 ? n_hom2 : n_hom1; - - int rare_copies = 2 * n_aa + n_ab; - int genotypes = n_ab + n_bb + n_aa; - - double * het_probs = (double *) malloc( (rare_copies + 1) * - sizeof(double)); - if (het_probs == NULL) - cout << "Internal error: SNP-HWE: Unable to allocate array" << - endl; - - int i; - for (i = 0; i <= rare_copies; i++) - het_probs[i] = 0.0; - - // Start at midpoint. - // XZ modified to add (long int) - int mid = ((long int)rare_copies * - (2 * (long int)genotypes - (long int)rare_copies)) / - (2 * (long int)genotypes); - - // Check to ensure that midpoint and rare alleles have same - // parity. - if ((rare_copies & 1) ^ (mid & 1)) - mid++; - - int curr_hets = mid; - int curr_homr = (rare_copies - mid) / 2; - int curr_homc = genotypes - curr_hets - curr_homr; - - het_probs[mid] = 1.0; - double sum = het_probs[mid]; - for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) { - het_probs[curr_hets - 2] = het_probs[curr_hets] * - curr_hets * (curr_hets - 1.0) - / (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0)); - sum += het_probs[curr_hets - 2]; - - // Two fewer heterozygotes for next iteration; add one - // rare, one common homozygote. - curr_homr++; - curr_homc++; - } +double CalcHWE(const size_t n_hom1, const size_t n_hom2, const size_t n_ab) { + if ((n_hom1 + n_hom2 + n_ab) == 0) { + return 1; + } - curr_hets = mid; - curr_homr = (rare_copies - mid) / 2; - curr_homc = genotypes - curr_hets - curr_homr; - for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) { - het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * - curr_homr * curr_homc / - ((curr_hets + 2.0) * (curr_hets + 1.0)); - sum += het_probs[curr_hets + 2]; - - // Add 2 heterozygotes for next iteration; subtract - // one rare, one common homozygote. - curr_homr--; - curr_homc--; - } + // "AA" is the rare allele. + int n_aa = n_hom1 < n_hom2 ? n_hom1 : n_hom2; + int n_bb = n_hom1 < n_hom2 ? n_hom2 : n_hom1; + + int rare_copies = 2 * n_aa + n_ab; + int genotypes = n_ab + n_bb + n_aa; + + double *het_probs = (double *)malloc((rare_copies + 1) * sizeof(double)); + if (het_probs == NULL) + cout << "Internal error: SNP-HWE: Unable to allocate array" << endl; + + int i; + for (i = 0; i <= rare_copies; i++) + het_probs[i] = 0.0; + + // Start at midpoint. + // XZ modified to add (long int) + int mid = ((long int)rare_copies * + (2 * (long int)genotypes - (long int)rare_copies)) / + (2 * (long int)genotypes); + + // Check to ensure that midpoint and rare alleles have same + // parity. + if ((rare_copies & 1) ^ (mid & 1)) + mid++; + + int curr_hets = mid; + int curr_homr = (rare_copies - mid) / 2; + int curr_homc = genotypes - curr_hets - curr_homr; + + het_probs[mid] = 1.0; + double sum = het_probs[mid]; + for (curr_hets = mid; curr_hets > 1; curr_hets -= 2) { + het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets * + (curr_hets - 1.0) / + (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0)); + sum += het_probs[curr_hets - 2]; + + // Two fewer heterozygotes for next iteration; add one + // rare, one common homozygote. + curr_homr++; + curr_homc++; + } - for (i = 0; i <= rare_copies; i++) - het_probs[i] /= sum; + curr_hets = mid; + curr_homr = (rare_copies - mid) / 2; + curr_homc = genotypes - curr_hets - curr_homr; + for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2) { + het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr * + curr_homc / + ((curr_hets + 2.0) * (curr_hets + 1.0)); + sum += het_probs[curr_hets + 2]; + + // Add 2 heterozygotes for next iteration; subtract + // one rare, one common homozygote. + curr_homr--; + curr_homc--; + } + + for (i = 0; i <= rare_copies; i++) + het_probs[i] /= sum; - double p_hwe = 0.0; + double p_hwe = 0.0; - // p-value calculation for p_hwe. - for (i = 0; i <= rare_copies; i++) - { - if (het_probs[i] > het_probs[n_ab]) - continue; - p_hwe += het_probs[i]; - } + // p-value calculation for p_hwe. + for (i = 0; i <= rare_copies; i++) { + if (het_probs[i] > het_probs[n_ab]) + continue; + p_hwe += het_probs[i]; + } - p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe; + p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe; - free(het_probs); + free(het_probs); - return p_hwe; + return p_hwe; } -double UcharToDouble02(const unsigned char c) { - return (double)c*0.01; -} +double UcharToDouble02(const unsigned char c) { return (double)c * 0.01; } unsigned char Double02ToUchar(const double dosage) { - return (int) (dosage*100); + return (int)(dosage * 100); } -void uchar_matrix_get_row (const vector<vector<unsigned char> > &X, - const size_t i_row, VectorXd &x_row) { - if (i_row<X.size()) { - for (size_t j=0; j<x_row.size(); j++) { - x_row(j)=UcharToDouble02(X[i_row][j]); +void uchar_matrix_get_row(const vector<vector<unsigned char>> &X, + const size_t i_row, VectorXd &x_row) { + if (i_row < X.size()) { + for (size_t j = 0; j < x_row.size(); j++) { + x_row(j) = UcharToDouble02(X[i_row][j]); } } else { std::cerr << "Error return genotype vector...\n"; diff --git a/src/mathfunc.h b/src/mathfunc.h index b24364b..29eafe4 100644 --- a/src/mathfunc.h +++ b/src/mathfunc.h @@ -19,32 +19,32 @@ #ifndef __MATHFUNC_H__ #define __MATHFUNC_H__ -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" #include "Eigen/Dense" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" using namespace std; using namespace Eigen; -double VectorVar (const gsl_vector *v); -void CenterMatrix (gsl_matrix *G); -void CenterMatrix (gsl_matrix *G, const gsl_vector *w); -void CenterMatrix (gsl_matrix *G, const gsl_matrix *W); -void StandardizeMatrix (gsl_matrix *G); -double ScaleMatrix (gsl_matrix *G); -double CenterVector (gsl_vector *y); -void CenterVector (gsl_vector *y, const gsl_matrix *W); -void StandardizeVector (gsl_vector *y); -void CalcUtX (const gsl_matrix *U, gsl_matrix *UtX); -void CalcUtX (const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX); -void CalcUtX (const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx); -double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab); +double VectorVar(const gsl_vector *v); +void CenterMatrix(gsl_matrix *G); +void CenterMatrix(gsl_matrix *G, const gsl_vector *w); +void CenterMatrix(gsl_matrix *G, const gsl_matrix *W); +void StandardizeMatrix(gsl_matrix *G); +double ScaleMatrix(gsl_matrix *G); +double CenterVector(gsl_vector *y); +void CenterVector(gsl_vector *y, const gsl_matrix *W); +void StandardizeVector(gsl_vector *y); +void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX); +void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX); +void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx); +double CalcHWE(const size_t n_hom1, const size_t n_hom2, const size_t n_ab); void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H); void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H); double UcharToDouble02(const unsigned char c); unsigned char Double02ToUchar(const double dosage); -void uchar_matrix_get_row (const vector<vector<unsigned char> > &X, - const size_t i_row, VectorXd &x_row); +void uchar_matrix_get_row(const vector<vector<unsigned char>> &X, + const size_t i_row, VectorXd &x_row); #endif diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index 78cd926..f1ab3fc 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -16,895 +16,914 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> #include <fstream> +#include <iostream> #include <sstream> -#include <iomanip> +#include <assert.h> +#include <bitset> #include <cmath> +#include <cstring> +#include <iomanip> #include <iostream> #include <stdio.h> #include <stdlib.h> -#include <bitset> -#include <cstring> -#include <assert.h> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" -#include "gsl/gsl_roots.h" -#include "gsl/gsl_min.h" #include "gsl/gsl_integration.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_min.h" +#include "gsl/gsl_roots.h" +#include "gsl/gsl_vector.h" -#include "io.h" -#include "lapack.h" #include "eigenlib.h" #include "gzstream.h" +#include "io.h" +#include "lapack.h" #include "lmm.h" #include "mvlmm.h" using namespace std; // In this file, X, Y are already transformed (i.e. UtX and UtY). -void MVLMM::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; - - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_oxford=cPar.file_oxford; - file_out=cPar.file_out; - path_out=cPar.path_out; - - l_min=cPar.l_min; - l_max=cPar.l_max; - n_region=cPar.n_region; - p_nr=cPar.p_nr; - em_iter=cPar.em_iter; - nr_iter=cPar.nr_iter; - em_prec=cPar.em_prec; - nr_prec=cPar.nr_prec; - crt=cPar.crt; - - Vg_remle_null=cPar.Vg_remle_null; - Ve_remle_null=cPar.Ve_remle_null; - Vg_mle_null=cPar.Vg_mle_null; - Ve_mle_null=cPar.Ve_mle_null; - - time_UtX=0.0; - time_opt=0.0; - - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ni_test=cPar.ni_test; - ns_test=cPar.ns_test; - n_cvt=cPar.n_cvt; - - n_ph=cPar.n_ph; - - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; - - return; +void MVLMM::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; + d_pace = cPar.d_pace; + + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_oxford = cPar.file_oxford; + file_out = cPar.file_out; + path_out = cPar.path_out; + + l_min = cPar.l_min; + l_max = cPar.l_max; + n_region = cPar.n_region; + p_nr = cPar.p_nr; + em_iter = cPar.em_iter; + nr_iter = cPar.nr_iter; + em_prec = cPar.em_prec; + nr_prec = cPar.nr_prec; + crt = cPar.crt; + + Vg_remle_null = cPar.Vg_remle_null; + Ve_remle_null = cPar.Ve_remle_null; + Vg_mle_null = cPar.Vg_mle_null; + Ve_mle_null = cPar.Ve_mle_null; + + time_UtX = 0.0; + time_opt = 0.0; + + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ni_test = cPar.ni_test; + ns_test = cPar.ns_test; + n_cvt = cPar.n_cvt; + + n_ph = cPar.n_ph; + + indicator_idv = cPar.indicator_idv; + indicator_snp = cPar.indicator_snp; + snpInfo = cPar.snpInfo; + + return; } -void MVLMM::CopyToParam (PARAM &cPar) { - cPar.time_UtX=time_UtX; - cPar.time_opt=time_opt; +void MVLMM::CopyToParam(PARAM &cPar) { + cPar.time_UtX = time_UtX; + cPar.time_opt = time_opt; - cPar.Vg_remle_null=Vg_remle_null; - cPar.Ve_remle_null=Ve_remle_null; - cPar.Vg_mle_null=Vg_mle_null; - cPar.Ve_mle_null=Ve_mle_null; + cPar.Vg_remle_null = Vg_remle_null; + cPar.Ve_remle_null = Ve_remle_null; + cPar.Vg_mle_null = Vg_mle_null; + cPar.Ve_mle_null = Ve_mle_null; - cPar.VVg_remle_null=VVg_remle_null; - cPar.VVe_remle_null=VVe_remle_null; - cPar.VVg_mle_null=VVg_mle_null; - cPar.VVe_mle_null=VVe_mle_null; + cPar.VVg_remle_null = VVg_remle_null; + cPar.VVe_remle_null = VVe_remle_null; + cPar.VVg_mle_null = VVg_mle_null; + cPar.VVe_mle_null = VVe_mle_null; - cPar.beta_remle_null=beta_remle_null; - cPar.se_beta_remle_null=se_beta_remle_null; - cPar.beta_mle_null=beta_mle_null; - cPar.se_beta_mle_null=se_beta_mle_null; + cPar.beta_remle_null = beta_remle_null; + cPar.se_beta_remle_null = se_beta_remle_null; + cPar.beta_mle_null = beta_mle_null; + cPar.se_beta_mle_null = se_beta_mle_null; - cPar.logl_remle_H0=logl_remle_H0; - cPar.logl_mle_H0=logl_mle_H0; - return; + cPar.logl_remle_H0 = logl_remle_H0; + cPar.logl_mle_H0 = logl_mle_H0; + return; } -void MVLMM::WriteFiles () { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".assoc.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t" - <<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t"; - - for (size_t i=0; i<n_ph; i++) { - outfile<<"beta_"<<i+1<<"\t"; - } - for (size_t i=0; i<n_ph; i++) { - for (size_t j=i; j<n_ph; j++) { - outfile<<"Vbeta_"<<i+1<<"_"<<j+1<<"\t"; - } - } - - if (a_mode==1) { - outfile<<"p_wald"<<endl; - } else if (a_mode==2) { - outfile<<"p_lrt"<<endl; - } else if (a_mode==3) { - outfile<<"p_score"<<endl; - } else if (a_mode==4) { - outfile<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl; - } else {} - - - size_t t=0, c=0; - for (size_t i=0; i<snpInfo.size(); ++i) { - if (indicator_snp[i]==0) {continue;} - - outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t" - <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<< - "\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<< - fixed<<setprecision(3)<<snpInfo[i].maf<<"\t"; - - outfile<<scientific<<setprecision(6); - - for (size_t i=0; i<n_ph; i++) { - outfile<<sumStat[t].v_beta[i]<<"\t"; - } - - c=0; - for (size_t i=0; i<n_ph; i++) { - for (size_t j=i; j<n_ph; j++) { - outfile<<sumStat[t].v_Vbeta[c]<<"\t"; - c++; - } - } - - if (a_mode==1) { - outfile<<sumStat[t].p_wald <<endl; - } else if (a_mode==2) { - outfile<<sumStat[t].p_lrt<<endl; - } else if (a_mode==3) { - outfile<<sumStat[t].p_score<<endl; - } else if (a_mode==4) { - outfile<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<< - "\t"<<sumStat[t].p_score<<endl; - } else {} - - t++; - } - - outfile.close(); - outfile.clear(); - return; +void MVLMM::WriteFiles() { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".assoc.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + outfile << "chr" + << "\t" + << "rs" + << "\t" + << "ps" + << "\t" + << "n_miss" + << "\t" + << "allele1" + << "\t" + << "allele0" + << "\t" + << "af" + << "\t"; + + for (size_t i = 0; i < n_ph; i++) { + outfile << "beta_" << i + 1 << "\t"; + } + for (size_t i = 0; i < n_ph; i++) { + for (size_t j = i; j < n_ph; j++) { + outfile << "Vbeta_" << i + 1 << "_" << j + 1 << "\t"; + } + } + + if (a_mode == 1) { + outfile << "p_wald" << endl; + } else if (a_mode == 2) { + outfile << "p_lrt" << endl; + } else if (a_mode == 3) { + outfile << "p_score" << endl; + } else if (a_mode == 4) { + outfile << "p_wald" + << "\t" + << "p_lrt" + << "\t" + << "p_score" << endl; + } else { + } + + size_t t = 0, c = 0; + for (size_t i = 0; i < snpInfo.size(); ++i) { + if (indicator_snp[i] == 0) { + continue; + } + + outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" + << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t" + << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t" << fixed + << setprecision(3) << snpInfo[i].maf << "\t"; + + outfile << scientific << setprecision(6); + + for (size_t i = 0; i < n_ph; i++) { + outfile << sumStat[t].v_beta[i] << "\t"; + } + + c = 0; + for (size_t i = 0; i < n_ph; i++) { + for (size_t j = i; j < n_ph; j++) { + outfile << sumStat[t].v_Vbeta[c] << "\t"; + c++; + } + } + + if (a_mode == 1) { + outfile << sumStat[t].p_wald << endl; + } else if (a_mode == 2) { + outfile << sumStat[t].p_lrt << endl; + } else if (a_mode == 3) { + outfile << sumStat[t].p_score << endl; + } else if (a_mode == 4) { + outfile << sumStat[t].p_wald << "\t" << sumStat[t].p_lrt << "\t" + << sumStat[t].p_score << endl; + } else { + } + + t++; + } + + outfile.close(); + outfile.clear(); + return; } // Below are functions for EM algorithm. -double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, - gsl_vector *D_l, gsl_matrix *UltVeh, - gsl_matrix *UltVehi) { - size_t d_size=V_g->size1; - double d, logdet_Ve=0.0; - - // Eigen decomposition of V_e. - gsl_matrix *Lambda=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e_temp=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e_h=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e_hi=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *VgVehi=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *U_l=gsl_matrix_alloc (d_size, d_size); - - gsl_matrix_memcpy(V_e_temp, V_e); - EigenDecomp(V_e_temp, U_l, D_l, 0); - - // Calculate V_e_h and V_e_hi. - gsl_matrix_set_zero(V_e_h); - gsl_matrix_set_zero(V_e_hi); - for (size_t i=0; i<d_size; i++) { - d=gsl_vector_get (D_l, i); - if (d<=0) {continue;} - logdet_Ve+=log(d); - - gsl_vector_view U_col=gsl_matrix_column(U_l, i); - d=sqrt(d); - gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_h); - d=1.0/d; - gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_hi); - } - - // Copy the upper part to lower part. - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<i; j++) { - gsl_matrix_set (V_e_h, i, j, gsl_matrix_get(V_e_h, j, i)); - gsl_matrix_set (V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i)); - } - } - - // Calculate Lambda=V_ehi V_g V_ehi. - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,V_g,V_e_hi,0.0,VgVehi); - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,V_e_hi,VgVehi,0.0,Lambda); - - // Eigen decomposition of Lambda. - EigenDecomp(Lambda, U_l, D_l, 0); - - for (size_t i=0; i<d_size; i++) { - d=gsl_vector_get (D_l, i); - if (d<0) {gsl_vector_set (D_l, i, 0);} - } - - // Calculate UltVeh and UltVehi. - gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,U_l,V_e_h,0.0,UltVeh); - gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,U_l,V_e_hi,0.0,UltVehi); - - //free memory - gsl_matrix_free (Lambda); - gsl_matrix_free (V_e_temp); - gsl_matrix_free (V_e_h); - gsl_matrix_free (V_e_hi); - gsl_matrix_free (VgVehi); - gsl_matrix_free (U_l); - - return logdet_Ve; +double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l, + gsl_matrix *UltVeh, gsl_matrix *UltVehi) { + size_t d_size = V_g->size1; + double d, logdet_Ve = 0.0; + + // Eigen decomposition of V_e. + gsl_matrix *Lambda = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e_temp = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e_h = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e_hi = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *VgVehi = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *U_l = gsl_matrix_alloc(d_size, d_size); + + gsl_matrix_memcpy(V_e_temp, V_e); + EigenDecomp(V_e_temp, U_l, D_l, 0); + + // Calculate V_e_h and V_e_hi. + gsl_matrix_set_zero(V_e_h); + gsl_matrix_set_zero(V_e_hi); + for (size_t i = 0; i < d_size; i++) { + d = gsl_vector_get(D_l, i); + if (d <= 0) { + continue; + } + logdet_Ve += log(d); + + gsl_vector_view U_col = gsl_matrix_column(U_l, i); + d = sqrt(d); + gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_h); + d = 1.0 / d; + gsl_blas_dsyr(CblasUpper, d, &U_col.vector, V_e_hi); + } + + // Copy the upper part to lower part. + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j < i; j++) { + gsl_matrix_set(V_e_h, i, j, gsl_matrix_get(V_e_h, j, i)); + gsl_matrix_set(V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i)); + } + } + + // Calculate Lambda=V_ehi V_g V_ehi. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_g, V_e_hi, 0.0, VgVehi); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_e_hi, VgVehi, 0.0, Lambda); + + // Eigen decomposition of Lambda. + EigenDecomp(Lambda, U_l, D_l, 0); + + for (size_t i = 0; i < d_size; i++) { + d = gsl_vector_get(D_l, i); + if (d < 0) { + gsl_vector_set(D_l, i, 0); + } + } + + // Calculate UltVeh and UltVehi. + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_h, 0.0, UltVeh); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_hi, 0.0, UltVehi); + + // free memory + gsl_matrix_free(Lambda); + gsl_matrix_free(V_e_temp); + gsl_matrix_free(V_e_h); + gsl_matrix_free(V_e_hi); + gsl_matrix_free(VgVehi); + gsl_matrix_free(U_l); + + return logdet_Ve; } -//Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}. -double CalcQi (const gsl_vector *eval, const gsl_vector *D_l, - const gsl_matrix *X, gsl_matrix *Qi) { - size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1; - size_t c_size=dc_size/d_size; - - double delta, dl, d1, d2, d, logdet_Q; - - gsl_matrix *Q=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix_set_zero (Q); - - for (size_t i=0; i<c_size; i++) { - for (size_t j=0; j<c_size; j++) { - for (size_t l=0; l<d_size; l++) { - dl=gsl_vector_get(D_l, l); - - if (j<i) { - d=gsl_matrix_get (Q, j*d_size+l, i*d_size+l); - } else { - d=0.0; - for (size_t k=0; k<n_size; k++) { - d1=gsl_matrix_get(X, i, k); - d2=gsl_matrix_get(X, j, k); - delta=gsl_vector_get(eval, k); - d+=d1*d2/(dl*delta+1.0); - } - } - - gsl_matrix_set (Q, i*d_size+l, j*d_size+l, d); - } - } - } - - // Calculate LU decomposition of Q, and invert Q and calculate |Q|. - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (dc_size); - LUDecomp (Q, pmt, &sig); - LUInvert (Q, pmt, Qi); - - logdet_Q=LULndet (Q); - - gsl_matrix_free (Q); - gsl_permutation_free (pmt); - - return logdet_Q; +// Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}. +double CalcQi(const gsl_vector *eval, const gsl_vector *D_l, + const gsl_matrix *X, gsl_matrix *Qi) { + size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1; + size_t c_size = dc_size / d_size; + + double delta, dl, d1, d2, d, logdet_Q; + + gsl_matrix *Q = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix_set_zero(Q); + + for (size_t i = 0; i < c_size; i++) { + for (size_t j = 0; j < c_size; j++) { + for (size_t l = 0; l < d_size; l++) { + dl = gsl_vector_get(D_l, l); + + if (j < i) { + d = gsl_matrix_get(Q, j * d_size + l, i * d_size + l); + } else { + d = 0.0; + for (size_t k = 0; k < n_size; k++) { + d1 = gsl_matrix_get(X, i, k); + d2 = gsl_matrix_get(X, j, k); + delta = gsl_vector_get(eval, k); + d += d1 * d2 / (dl * delta + 1.0); + } + } + + gsl_matrix_set(Q, i * d_size + l, j * d_size + l, d); + } + } + } + + // Calculate LU decomposition of Q, and invert Q and calculate |Q|. + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(dc_size); + LUDecomp(Q, pmt, &sig); + LUInvert(Q, pmt, Qi); + + logdet_Q = LULndet(Q); + + gsl_matrix_free(Q); + gsl_permutation_free(pmt); + + return logdet_Q; } // xHiy=\sum_{k=1}^n x_k\otimes ((delta_k*Dl+I)^{-1}Ul^TVe^{-1/2}y. -void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l, - const gsl_matrix *X, const gsl_matrix *UltVehiY, - gsl_vector *xHiy) { - size_t n_size=eval->size, c_size=X->size1, d_size=D_l->size; - - gsl_vector_set_zero (xHiy); - - double x, delta, dl, y, d; - for (size_t i=0; i<d_size; i++) { - dl=gsl_vector_get(D_l, i); - for (size_t j=0; j<c_size; j++) { - d=0.0; - for (size_t k=0; k<n_size; k++) { - x=gsl_matrix_get(X, j, k); - y=gsl_matrix_get(UltVehiY, i, k); - delta=gsl_vector_get(eval, k); - d+=x*y/(delta*dl+1.0); - } - gsl_vector_set(xHiy, j*d_size+i, d); - } - } - - return; -} +void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l, + const gsl_matrix *X, const gsl_matrix *UltVehiY, + gsl_vector *xHiy) { + size_t n_size = eval->size, c_size = X->size1, d_size = D_l->size; + + gsl_vector_set_zero(xHiy); + + double x, delta, dl, y, d; + for (size_t i = 0; i < d_size; i++) { + dl = gsl_vector_get(D_l, i); + for (size_t j = 0; j < c_size; j++) { + d = 0.0; + for (size_t k = 0; k < n_size; k++) { + x = gsl_matrix_get(X, j, k); + y = gsl_matrix_get(UltVehiY, i, k); + delta = gsl_vector_get(eval, k); + d += x * y / (delta * dl + 1.0); + } + gsl_vector_set(xHiy, j * d_size + i, d); + } + } + return; +} // OmegaU=D_l/(delta Dl+I)^{-1} // OmegaE=delta D_l/(delta Dl+I)^{-1} -void CalcOmega (const gsl_vector *eval, const gsl_vector *D_l, -gsl_matrix *OmegaU, gsl_matrix *OmegaE) { - size_t n_size=eval->size, d_size=D_l->size; - double delta, dl, d_u, d_e; +void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l, + gsl_matrix *OmegaU, gsl_matrix *OmegaE) { + size_t n_size = eval->size, d_size = D_l->size; + double delta, dl, d_u, d_e; - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get(eval, k); - for (size_t i=0; i<d_size; i++) { - dl=gsl_vector_get(D_l, i); + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + for (size_t i = 0; i < d_size; i++) { + dl = gsl_vector_get(D_l, i); - d_u=dl/(delta*dl+1.0); - d_e=delta*d_u; + d_u = dl / (delta * dl + 1.0); + d_e = delta * d_u; - gsl_matrix_set(OmegaU, i, k, d_u); - gsl_matrix_set(OmegaE, i, k, d_e); - } - } + gsl_matrix_set(OmegaU, i, k, d_u); + gsl_matrix_set(OmegaE, i, k, d_e); + } + } - return; + return; } -void UpdateU (const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY, - const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) { - gsl_matrix_memcpy (UltVehiU, UltVehiY); - gsl_matrix_sub (UltVehiU, UltVehiBX); +void UpdateU(const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY, + const gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU) { + gsl_matrix_memcpy(UltVehiU, UltVehiY); + gsl_matrix_sub(UltVehiU, UltVehiBX); - gsl_matrix_mul_elements (UltVehiU, OmegaE); - return; + gsl_matrix_mul_elements(UltVehiU, OmegaE); + return; } -void UpdateE (const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX, - const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) { - gsl_matrix_memcpy (UltVehiE, UltVehiY); - gsl_matrix_sub (UltVehiE, UltVehiBX); - gsl_matrix_sub (UltVehiE, UltVehiU); +void UpdateE(const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX, + const gsl_matrix *UltVehiU, gsl_matrix *UltVehiE) { + gsl_matrix_memcpy(UltVehiE, UltVehiY); + gsl_matrix_sub(UltVehiE, UltVehiBX); + gsl_matrix_sub(UltVehiE, UltVehiU); - return; + return; } -void UpdateL_B (const gsl_matrix *X, const gsl_matrix *XXti, - const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU, - gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) { - size_t c_size=X->size1, d_size=UltVehiY->size1; +void UpdateL_B(const gsl_matrix *X, const gsl_matrix *XXti, + const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU, + gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB) { + size_t c_size = X->size1, d_size = UltVehiY->size1; - gsl_matrix *YUX=gsl_matrix_alloc (d_size, c_size); + gsl_matrix *YUX = gsl_matrix_alloc(d_size, c_size); - gsl_matrix_memcpy (UltVehiBX, UltVehiY); - gsl_matrix_sub (UltVehiBX, UltVehiU); + gsl_matrix_memcpy(UltVehiBX, UltVehiY); + gsl_matrix_sub(UltVehiBX, UltVehiU); - gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,UltVehiBX,X,0.0,YUX); - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,YUX,XXti,0.0,UltVehiB); + gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, UltVehiBX, X, 0.0, YUX); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, YUX, XXti, 0.0, UltVehiB); - gsl_matrix_free(YUX); + gsl_matrix_free(YUX); - return; + return; } -void UpdateRL_B (const gsl_vector *xHiy, const gsl_matrix *Qi, - gsl_matrix *UltVehiB) { - size_t d_size=UltVehiB->size1, c_size=UltVehiB->size2, - dc_size=Qi->size1; +void UpdateRL_B(const gsl_vector *xHiy, const gsl_matrix *Qi, + gsl_matrix *UltVehiB) { + size_t d_size = UltVehiB->size1, c_size = UltVehiB->size2, + dc_size = Qi->size1; - gsl_vector *b=gsl_vector_alloc (dc_size); + gsl_vector *b = gsl_vector_alloc(dc_size); - // Calculate b=Qiv. - gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b); + // Calculate b=Qiv. + gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b); - // Copy b to UltVehiB. - for (size_t i=0; i<c_size; i++) { - gsl_vector_view UltVehiB_col=gsl_matrix_column (UltVehiB, i); - gsl_vector_const_view b_subcol= - gsl_vector_const_subvector (b, i*d_size, d_size); - gsl_vector_memcpy (&UltVehiB_col.vector, &b_subcol.vector); - } + // Copy b to UltVehiB. + for (size_t i = 0; i < c_size; i++) { + gsl_vector_view UltVehiB_col = gsl_matrix_column(UltVehiB, i); + gsl_vector_const_view b_subcol = + gsl_vector_const_subvector(b, i * d_size, d_size); + gsl_vector_memcpy(&UltVehiB_col.vector, &b_subcol.vector); + } - gsl_vector_free(b); + gsl_vector_free(b); - return; + return; } -void UpdateV (const gsl_vector *eval, const gsl_matrix *U, - const gsl_matrix *E, const gsl_matrix *Sigma_uu, - const gsl_matrix *Sigma_ee, gsl_matrix *V_g, gsl_matrix *V_e) { - size_t n_size=eval->size, d_size=U->size1; +void UpdateV(const gsl_vector *eval, const gsl_matrix *U, const gsl_matrix *E, + const gsl_matrix *Sigma_uu, const gsl_matrix *Sigma_ee, + gsl_matrix *V_g, gsl_matrix *V_e) { + size_t n_size = eval->size, d_size = U->size1; - gsl_matrix_set_zero (V_g); - gsl_matrix_set_zero (V_e); + gsl_matrix_set_zero(V_g); + gsl_matrix_set_zero(V_e); - double delta; + double delta; - // Calculate the first part: UD^{-1}U^T and EE^T. - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); - if (delta==0) {continue;} + // Calculate the first part: UD^{-1}U^T and EE^T. + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + if (delta == 0) { + continue; + } - gsl_vector_const_view U_col=gsl_matrix_const_column (U, k); - gsl_blas_dsyr (CblasUpper, 1.0/delta, &U_col.vector, V_g); - } + gsl_vector_const_view U_col = gsl_matrix_const_column(U, k); + gsl_blas_dsyr(CblasUpper, 1.0 / delta, &U_col.vector, V_g); + } - gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e); + gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e); - // Copy the upper part to lower part. - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<i; j++) { - gsl_matrix_set (V_g, i, j, gsl_matrix_get(V_g, j, i)); - gsl_matrix_set (V_e, i, j, gsl_matrix_get(V_e, j, i)); - } - } + // Copy the upper part to lower part. + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j < i; j++) { + gsl_matrix_set(V_g, i, j, gsl_matrix_get(V_g, j, i)); + gsl_matrix_set(V_e, i, j, gsl_matrix_get(V_e, j, i)); + } + } - // Add Sigma. - gsl_matrix_add (V_g, Sigma_uu); - gsl_matrix_add (V_e, Sigma_ee); + // Add Sigma. + gsl_matrix_add(V_g, Sigma_uu); + gsl_matrix_add(V_e, Sigma_ee); - // Scale by 1/n. - gsl_matrix_scale (V_g, 1.0/(double)n_size); - gsl_matrix_scale (V_e, 1.0/(double)n_size); + // Scale by 1/n. + gsl_matrix_scale(V_g, 1.0 / (double)n_size); + gsl_matrix_scale(V_e, 1.0 / (double)n_size); - return; + return; } -void CalcSigma (const char func_name, const gsl_vector *eval, - const gsl_vector *D_l, const gsl_matrix *X, - const gsl_matrix *OmegaU, const gsl_matrix *OmegaE, - const gsl_matrix *UltVeh, const gsl_matrix *Qi, - gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) { - if (func_name!='R' && func_name!='L' && func_name!='r' && - func_name!='l') { - cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<< - "likelihood, 'L' for log-likelihood."<<endl; - return; - } - - size_t n_size=eval->size, c_size=X->size1; - size_t d_size=D_l->size, dc_size=Qi->size1; - - gsl_matrix_set_zero(Sigma_uu); - gsl_matrix_set_zero(Sigma_ee); - - double delta, dl, x, d; - - // Calculate the first diagonal term. - gsl_vector_view Suu_diag=gsl_matrix_diagonal (Sigma_uu); - gsl_vector_view See_diag=gsl_matrix_diagonal (Sigma_ee); - - for (size_t k=0; k<n_size; k++) { - gsl_vector_const_view OmegaU_col=gsl_matrix_const_column (OmegaU, k); - gsl_vector_const_view OmegaE_col=gsl_matrix_const_column (OmegaE, k); - - gsl_vector_add (&Suu_diag.vector, &OmegaU_col.vector); - gsl_vector_add (&See_diag.vector, &OmegaE_col.vector); - } - - // Calculate the second term for REML. - if (func_name=='R' || func_name=='r') { - gsl_matrix *M_u=gsl_matrix_alloc(dc_size, d_size); - gsl_matrix *M_e=gsl_matrix_alloc(dc_size, d_size); - gsl_matrix *QiM=gsl_matrix_alloc(dc_size, d_size); - - gsl_matrix_set_zero(M_u); - gsl_matrix_set_zero(M_e); - - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get(eval, k); - - for (size_t i=0; i<d_size; i++) { - dl=gsl_vector_get(D_l, i); - for (size_t j=0; j<c_size; j++) { - x=gsl_matrix_get(X, j, k); - d=x/(delta*dl+1.0); - gsl_matrix_set(M_e, j*d_size+i, i, d); - gsl_matrix_set(M_u, j*d_size+i, i, d*dl); - } - } - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Qi,M_u,0.0,QiM); - gsl_blas_dgemm(CblasTrans,CblasNoTrans,delta,M_u,QiM,1.0, - Sigma_uu); - - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Qi,M_e,0.0,QiM); - gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,M_e,QiM,1.0, - Sigma_ee); - } - - gsl_matrix_free(M_u); - gsl_matrix_free(M_e); - gsl_matrix_free(QiM); - } - - // Multiply both sides by VehUl. - gsl_matrix *M=gsl_matrix_alloc (d_size, d_size); - - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Sigma_uu,UltVeh,0.0,M); - gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,M,0.0,Sigma_uu); - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Sigma_ee,UltVeh,0.0,M); - gsl_blas_dgemm(CblasTrans, CblasNoTrans,1.0,UltVeh,M,0.0,Sigma_ee); - - gsl_matrix_free(M); - return; +void CalcSigma(const char func_name, const gsl_vector *eval, + const gsl_vector *D_l, const gsl_matrix *X, + const gsl_matrix *OmegaU, const gsl_matrix *OmegaE, + const gsl_matrix *UltVeh, const gsl_matrix *Qi, + gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee) { + if (func_name != 'R' && func_name != 'L' && func_name != 'r' && + func_name != 'l') { + cout << "func_name only takes 'R' or 'L': 'R' for log-restricted " + << "likelihood, 'L' for log-likelihood." << endl; + return; + } + + size_t n_size = eval->size, c_size = X->size1; + size_t d_size = D_l->size, dc_size = Qi->size1; + + gsl_matrix_set_zero(Sigma_uu); + gsl_matrix_set_zero(Sigma_ee); + + double delta, dl, x, d; + + // Calculate the first diagonal term. + gsl_vector_view Suu_diag = gsl_matrix_diagonal(Sigma_uu); + gsl_vector_view See_diag = gsl_matrix_diagonal(Sigma_ee); + + for (size_t k = 0; k < n_size; k++) { + gsl_vector_const_view OmegaU_col = gsl_matrix_const_column(OmegaU, k); + gsl_vector_const_view OmegaE_col = gsl_matrix_const_column(OmegaE, k); + + gsl_vector_add(&Suu_diag.vector, &OmegaU_col.vector); + gsl_vector_add(&See_diag.vector, &OmegaE_col.vector); + } + + // Calculate the second term for REML. + if (func_name == 'R' || func_name == 'r') { + gsl_matrix *M_u = gsl_matrix_alloc(dc_size, d_size); + gsl_matrix *M_e = gsl_matrix_alloc(dc_size, d_size); + gsl_matrix *QiM = gsl_matrix_alloc(dc_size, d_size); + + gsl_matrix_set_zero(M_u); + gsl_matrix_set_zero(M_e); + + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + + for (size_t i = 0; i < d_size; i++) { + dl = gsl_vector_get(D_l, i); + for (size_t j = 0; j < c_size; j++) { + x = gsl_matrix_get(X, j, k); + d = x / (delta * dl + 1.0); + gsl_matrix_set(M_e, j * d_size + i, i, d); + gsl_matrix_set(M_u, j * d_size + i, i, d * dl); + } + } + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_u, 0.0, QiM); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, delta, M_u, QiM, 1.0, Sigma_uu); + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_e, 0.0, QiM); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, M_e, QiM, 1.0, Sigma_ee); + } + + gsl_matrix_free(M_u); + gsl_matrix_free(M_e); + gsl_matrix_free(QiM); + } + + // Multiply both sides by VehUl. + gsl_matrix *M = gsl_matrix_alloc(d_size, d_size); + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_uu, UltVeh, 0.0, M); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_uu); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_ee, UltVeh, 0.0, M); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_ee); + + gsl_matrix_free(M); + return; } // 'R' for restricted likelihood and 'L' for likelihood. // 'R' update B and 'L' don't. // only calculate -0.5*\sum_{k=1}^n|H_k|-0.5yPxy. -double MphCalcLogL (const gsl_vector *eval, const gsl_vector *xHiy, - const gsl_vector *D_l, const gsl_matrix *UltVehiY, - const gsl_matrix *Qi) { - size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1; - double logl=0.0, delta, dl, y, d; - - // Calculate yHiy+log|H_k|. - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get(eval, k); - for (size_t i=0; i<d_size; i++) { - y=gsl_matrix_get(UltVehiY, i, k); - dl=gsl_vector_get(D_l, i); - d=delta*dl+1.0; - - logl+=y*y/d+log(d); - } - } +double MphCalcLogL(const gsl_vector *eval, const gsl_vector *xHiy, + const gsl_vector *D_l, const gsl_matrix *UltVehiY, + const gsl_matrix *Qi) { + size_t n_size = eval->size, d_size = D_l->size, dc_size = Qi->size1; + double logl = 0.0, delta, dl, y, d; + + // Calculate yHiy+log|H_k|. + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + for (size_t i = 0; i < d_size; i++) { + y = gsl_matrix_get(UltVehiY, i, k); + dl = gsl_vector_get(D_l, i); + d = delta * dl + 1.0; + + logl += y * y / d + log(d); + } + } - // Calculate the rest of yPxy. - gsl_vector *Qiv=gsl_vector_alloc(dc_size); + // Calculate the rest of yPxy. + gsl_vector *Qiv = gsl_vector_alloc(dc_size); - gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv); - gsl_blas_ddot(xHiy, Qiv, &d); + gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv); + gsl_blas_ddot(xHiy, Qiv, &d); - logl-=d; + logl -= d; - gsl_vector_free(Qiv); + gsl_vector_free(Qiv); - return -0.5*logl; + return -0.5 * logl; } // Y is a dxn matrix, X is a cxn matrix, B is a dxc matrix, V_g is a // dxd matrix, V_e is a dxd matrix, eval is a size n vector //'R' for restricted likelihood and 'L' for likelihood. -double MphEM (const char func_name, const size_t max_iter, - const double max_prec, const gsl_vector *eval, - const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *U_hat, - gsl_matrix *E_hat, gsl_matrix *OmegaU, gsl_matrix *OmegaE, - gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX, - gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, - gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B) { - if (func_name!='R' && func_name!='L' && - func_name!='r' && func_name!='l') { - cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<< - "likelihood, 'L' for log-likelihood."<<endl; - return 0.0; - } - - size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1; - size_t dc_size=d_size*c_size; - - gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size); - gsl_matrix *XXti=gsl_matrix_alloc (c_size, c_size); - gsl_vector *D_l=gsl_vector_alloc (d_size); - gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *UltVehiB=gsl_matrix_alloc (d_size, c_size); - gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *Sigma_uu=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *Sigma_ee=gsl_matrix_alloc (d_size, d_size); - gsl_vector *xHiy=gsl_vector_alloc (dc_size); - gsl_permutation * pmt=gsl_permutation_alloc (c_size); - - double logl_const=0.0, logl_old=0.0, logl_new=0.0; - double logdet_Q, logdet_Ve; - int sig; - - // Calculate |XXt| and (XXt)^{-1}. - gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt); - for (size_t i=0; i<c_size; ++i) { - for (size_t j=0; j<i; ++j) { - gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i)); - } - } - - LUDecomp (XXt, pmt, &sig); - LUInvert (XXt, pmt, XXti); - - // Calculate the constant for logl. - if (func_name=='R' || func_name=='r') { - logl_const=-0.5*(double)(n_size-c_size)* - (double)d_size*log(2.0*M_PI)+0.5*(double)d_size*LULndet (XXt); - } else { - logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI); - } - - // Start EM. - for (size_t t=0; t<max_iter; t++) { - logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi); - - logdet_Q=CalcQi (eval, D_l, X, Qi); - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, - Y, 0.0, UltVehiY); - CalcXHiY(eval, D_l, X, UltVehiY, xHiy); - - // Calculate log likelihood/restricted likelihood value, and - // terminate if change is small. - logl_new=logl_const+MphCalcLogL (eval, xHiy, D_l, UltVehiY, Qi) - - 0.5*(double)n_size*logdet_Ve; - if (func_name=='R' || func_name=='r') { - logl_new+=-0.5*(logdet_Q-(double)c_size*logdet_Ve); - } - if (t!=0 && abs(logl_new-logl_old)<max_prec) {break;} - logl_old=logl_new; - - CalcOmega (eval, D_l, OmegaU, OmegaE); - - // Update UltVehiB, UltVehiU. - if (func_name=='R' || func_name=='r') { - UpdateRL_B(xHiy, Qi, UltVehiB); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, - 0.0, UltVehiBX); - } else if (t==0) { - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, - 0.0, UltVehiB); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, - 0.0, UltVehiBX); - } - - UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU); - - if (func_name=='L' || func_name=='l') { - - // UltVehiBX is destroyed here. - UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, - 0.0, UltVehiBX); - } - - UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE); - - // Calculate U_hat, E_hat and B. - gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiU, - 0.0,U_hat); - gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiE, - 0.0,E_hat); - gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,UltVeh,UltVehiB,0.0,B); - - // Calculate Sigma_uu and Sigma_ee. - CalcSigma (func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, - Qi, Sigma_uu, Sigma_ee); - - // Update V_g and V_e. - UpdateV (eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e); - } - - gsl_matrix_free(XXt); - gsl_matrix_free(XXti); - gsl_vector_free(D_l); - gsl_matrix_free(UltVeh); - gsl_matrix_free(UltVehi); - gsl_matrix_free(UltVehiB); - gsl_matrix_free(Qi); - gsl_matrix_free(Sigma_uu); - gsl_matrix_free(Sigma_ee); - gsl_vector_free(xHiy); - gsl_permutation_free(pmt); - - return logl_new; +double MphEM(const char func_name, const size_t max_iter, const double max_prec, + const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y, + gsl_matrix *U_hat, gsl_matrix *E_hat, gsl_matrix *OmegaU, + gsl_matrix *OmegaE, gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX, + gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, gsl_matrix *V_g, + gsl_matrix *V_e, gsl_matrix *B) { + if (func_name != 'R' && func_name != 'L' && func_name != 'r' && + func_name != 'l') { + cout << "func_name only takes 'R' or 'L': 'R' for log-restricted " + << "likelihood, 'L' for log-likelihood." << endl; + return 0.0; + } + + size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1; + size_t dc_size = d_size * c_size; + + gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size); + gsl_matrix *XXti = gsl_matrix_alloc(c_size, c_size); + gsl_vector *D_l = gsl_vector_alloc(d_size); + gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *UltVehiB = gsl_matrix_alloc(d_size, c_size); + gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *Sigma_uu = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *Sigma_ee = gsl_matrix_alloc(d_size, d_size); + gsl_vector *xHiy = gsl_vector_alloc(dc_size); + gsl_permutation *pmt = gsl_permutation_alloc(c_size); + + double logl_const = 0.0, logl_old = 0.0, logl_new = 0.0; + double logdet_Q, logdet_Ve; + int sig; + + // Calculate |XXt| and (XXt)^{-1}. + gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt); + for (size_t i = 0; i < c_size; ++i) { + for (size_t j = 0; j < i; ++j) { + gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i)); + } + } + + LUDecomp(XXt, pmt, &sig); + LUInvert(XXt, pmt, XXti); + + // Calculate the constant for logl. + if (func_name == 'R' || func_name == 'r') { + logl_const = + -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) + + 0.5 * (double)d_size * LULndet(XXt); + } else { + logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI); + } + + // Start EM. + for (size_t t = 0; t < max_iter; t++) { + logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi); + + logdet_Q = CalcQi(eval, D_l, X, Qi); + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY); + CalcXHiY(eval, D_l, X, UltVehiY, xHiy); + + // Calculate log likelihood/restricted likelihood value, and + // terminate if change is small. + logl_new = logl_const + MphCalcLogL(eval, xHiy, D_l, UltVehiY, Qi) - + 0.5 * (double)n_size * logdet_Ve; + if (func_name == 'R' || func_name == 'r') { + logl_new += -0.5 * (logdet_Q - (double)c_size * logdet_Ve); + } + if (t != 0 && abs(logl_new - logl_old) < max_prec) { + break; + } + logl_old = logl_new; + + CalcOmega(eval, D_l, OmegaU, OmegaE); + + // Update UltVehiB, UltVehiU. + if (func_name == 'R' || func_name == 'r') { + UpdateRL_B(xHiy, Qi, UltVehiB); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0, + UltVehiBX); + } else if (t == 0) { + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, 0.0, + UltVehiB); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0, + UltVehiBX); + } + + UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU); + + if (func_name == 'L' || func_name == 'l') { + + // UltVehiBX is destroyed here. + UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0, + UltVehiBX); + } + + UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE); + + // Calculate U_hat, E_hat and B. + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiU, 0.0, U_hat); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiE, 0.0, E_hat); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiB, 0.0, B); + + // Calculate Sigma_uu and Sigma_ee. + CalcSigma(func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, Qi, Sigma_uu, + Sigma_ee); + + // Update V_g and V_e. + UpdateV(eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e); + } + + gsl_matrix_free(XXt); + gsl_matrix_free(XXti); + gsl_vector_free(D_l); + gsl_matrix_free(UltVeh); + gsl_matrix_free(UltVehi); + gsl_matrix_free(UltVehiB); + gsl_matrix_free(Qi); + gsl_matrix_free(Sigma_uu); + gsl_matrix_free(Sigma_ee); + gsl_vector_free(xHiy); + gsl_permutation_free(pmt); + + return logl_new; } // Calculate p-value, beta (d by 1 vector) and V(beta). -double MphCalcP (const gsl_vector *eval, const gsl_vector *x_vec, - const gsl_matrix *W, const gsl_matrix *Y, - const gsl_matrix *V_g, const gsl_matrix *V_e, - gsl_matrix *UltVehiY, gsl_vector *beta, gsl_matrix *Vbeta) { - size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1; - size_t dc_size=d_size*c_size; - double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value; - - gsl_vector *D_l=gsl_vector_alloc (d_size); - gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *WHix=gsl_matrix_alloc (dc_size, d_size); - gsl_matrix *QiWHix=gsl_matrix_alloc(dc_size, d_size); - - gsl_matrix *xPx=gsl_matrix_alloc (d_size, d_size); - gsl_vector *xPy=gsl_vector_alloc (d_size); - gsl_vector *WHiy=gsl_vector_alloc (dc_size); - - gsl_matrix_set_zero (xPx); - gsl_matrix_set_zero (WHix); - gsl_vector_set_zero (xPy); - gsl_vector_set_zero (WHiy); - - // Eigen decomposition and calculate log|Ve|. - logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi); - - // Calculate Qi and log|Q|. - logdet_Q=CalcQi (eval, D_l, W, Qi); - - // Calculate UltVehiY. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, - 0.0, UltVehiY); - - // Calculate WHix, WHiy, xHiy, xHix. - for (size_t i=0; i<d_size; i++) { - dl=gsl_vector_get(D_l, i); - - d1=0.0; d2=0.0; - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get(eval, k); - dx=gsl_vector_get(x_vec, k); - dy=gsl_matrix_get(UltVehiY, i, k); - - d1+=dx*dy/(delta*dl+1.0); - d2+=dx*dx/(delta*dl+1.0); - } - gsl_vector_set (xPy, i, d1); - gsl_matrix_set (xPx, i, i, d2); - - for (size_t j=0; j<c_size; j++) { - d1=0.0; d2=0.0; - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get(eval, k); - dx=gsl_vector_get(x_vec, k); - dw=gsl_matrix_get(W, j, k); - dy=gsl_matrix_get(UltVehiY, i, k); - - d1+=dx*dw/(delta*dl+1.0); - d2+=dy*dw/(delta*dl+1.0); - } - gsl_matrix_set(WHix, j*d_size+i, i, d1); - gsl_vector_set(WHiy, j*d_size+i, d2); - } - } - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix); - gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx); - gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy); - - // Calculate V(beta) and beta. - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (d_size); - LUDecomp (xPx, pmt, &sig); - LUSolve (xPx, pmt, xPy, D_l); - LUInvert (xPx, pmt, Vbeta); - - // Need to multiply UltVehi on both sides or one side. - gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, - 0.0, xPx); - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta); - - // Calculate test statistic and p value. - gsl_blas_ddot(D_l, xPy, &d); - - p_value=gsl_cdf_chisq_Q (d, (double)d_size); - - gsl_vector_free(D_l); - gsl_matrix_free(UltVeh); - gsl_matrix_free(UltVehi); - gsl_matrix_free(Qi); - gsl_matrix_free(WHix); - gsl_matrix_free(QiWHix); - - gsl_matrix_free(xPx); - gsl_vector_free(xPy); - gsl_vector_free(WHiy); - - gsl_permutation_free(pmt); - - return p_value; +double MphCalcP(const gsl_vector *eval, const gsl_vector *x_vec, + const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g, + const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_vector *beta, + gsl_matrix *Vbeta) { + size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1; + size_t dc_size = d_size * c_size; + double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value; + + gsl_vector *D_l = gsl_vector_alloc(d_size); + gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *WHix = gsl_matrix_alloc(dc_size, d_size); + gsl_matrix *QiWHix = gsl_matrix_alloc(dc_size, d_size); + + gsl_matrix *xPx = gsl_matrix_alloc(d_size, d_size); + gsl_vector *xPy = gsl_vector_alloc(d_size); + gsl_vector *WHiy = gsl_vector_alloc(dc_size); + + gsl_matrix_set_zero(xPx); + gsl_matrix_set_zero(WHix); + gsl_vector_set_zero(xPy); + gsl_vector_set_zero(WHiy); + + // Eigen decomposition and calculate log|Ve|. + logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi); + + // Calculate Qi and log|Q|. + logdet_Q = CalcQi(eval, D_l, W, Qi); + + // Calculate UltVehiY. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY); + + // Calculate WHix, WHiy, xHiy, xHix. + for (size_t i = 0; i < d_size; i++) { + dl = gsl_vector_get(D_l, i); + + d1 = 0.0; + d2 = 0.0; + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + dx = gsl_vector_get(x_vec, k); + dy = gsl_matrix_get(UltVehiY, i, k); + + d1 += dx * dy / (delta * dl + 1.0); + d2 += dx * dx / (delta * dl + 1.0); + } + gsl_vector_set(xPy, i, d1); + gsl_matrix_set(xPx, i, i, d2); + + for (size_t j = 0; j < c_size; j++) { + d1 = 0.0; + d2 = 0.0; + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + dx = gsl_vector_get(x_vec, k); + dw = gsl_matrix_get(W, j, k); + dy = gsl_matrix_get(UltVehiY, i, k); + + d1 += dx * dw / (delta * dl + 1.0); + d2 += dy * dw / (delta * dl + 1.0); + } + gsl_matrix_set(WHix, j * d_size + i, i, d1); + gsl_vector_set(WHiy, j * d_size + i, d2); + } + } + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx); + gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy); + + // Calculate V(beta) and beta. + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(d_size); + LUDecomp(xPx, pmt, &sig); + LUSolve(xPx, pmt, xPy, D_l); + LUInvert(xPx, pmt, Vbeta); + + // Need to multiply UltVehi on both sides or one side. + gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, 0.0, xPx); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta); + + // Calculate test statistic and p value. + gsl_blas_ddot(D_l, xPy, &d); + + p_value = gsl_cdf_chisq_Q(d, (double)d_size); + + gsl_vector_free(D_l); + gsl_matrix_free(UltVeh); + gsl_matrix_free(UltVehi); + gsl_matrix_free(Qi); + gsl_matrix_free(WHix); + gsl_matrix_free(QiWHix); + + gsl_matrix_free(xPx); + gsl_vector_free(xPy); + gsl_vector_free(WHiy); + + gsl_permutation_free(pmt); + + return p_value; } // Calculate B and its standard error (which is a matrix of the same // dimension as B). -void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W, - const gsl_matrix *Y, const gsl_matrix *V_g, - const gsl_matrix *V_e, gsl_matrix *UltVehiY, - gsl_matrix *B, gsl_matrix *se_B) { - size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1; - size_t dc_size=d_size*c_size; - double delta, dl, d, dy, dw, logdet_Ve, logdet_Q; - - gsl_vector *D_l=gsl_vector_alloc (d_size); - gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *Qi_temp=gsl_matrix_alloc (dc_size, dc_size); - gsl_vector *WHiy=gsl_vector_alloc (dc_size); - gsl_vector *QiWHiy=gsl_vector_alloc (dc_size); - gsl_vector *beta=gsl_vector_alloc (dc_size); - gsl_matrix *Vbeta=gsl_matrix_alloc (dc_size, dc_size); - - gsl_vector_set_zero (WHiy); - - // Eigen decomposition and calculate log|Ve|. - logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi); - - // Calculate Qi and log|Q|. - logdet_Q=CalcQi (eval, D_l, W, Qi); - - // Calculate UltVehiY. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, - 0.0, UltVehiY); - - // Calculate WHiy. - for (size_t i=0; i<d_size; i++) { - dl=gsl_vector_get(D_l, i); - - for (size_t j=0; j<c_size; j++) { - d=0.0; - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get(eval, k); - dw=gsl_matrix_get(W, j, k); - dy=gsl_matrix_get(UltVehiY, i, k); - - d+=dy*dw/(delta*dl+1.0); - } - gsl_vector_set(WHiy, j*d_size+i, d); - } - } - - gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy); - - // Need to multiply I_c\otimes UltVehi on both sides or one side. - for (size_t i=0; i<c_size; i++) { - gsl_vector_view QiWHiy_sub= - gsl_vector_subvector(QiWHiy, i*d_size, d_size); - gsl_vector_view beta_sub=gsl_vector_subvector(beta,i*d_size,d_size); - gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0, - &beta_sub.vector); - - for (size_t j=0; j<c_size; j++) { - gsl_matrix_view Qi_sub= - gsl_matrix_submatrix (Qi, i*d_size, j*d_size, d_size, d_size); - gsl_matrix_view Qitemp_sub= - gsl_matrix_submatrix (Qi_temp,i*d_size,j*d_size,d_size,d_size); - gsl_matrix_view Vbeta_sub= - gsl_matrix_submatrix (Vbeta, i*d_size, j*d_size, d_size, d_size); - - if (j<i) { - gsl_matrix_view Vbeta_sym= - gsl_matrix_submatrix(Vbeta,j*d_size,i*d_size,d_size,d_size); - gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix,&Vbeta_sym.matrix); - } else { - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, - UltVeh, 0.0, &Qitemp_sub.matrix); - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, - &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix); - } - } - } - - // Copy beta to B, and Vbeta to se_B. - for (size_t j=0; j<B->size2; j++) { - for (size_t i=0; i<B->size1; i++) { - gsl_matrix_set(B, i, j, gsl_vector_get(beta, j*d_size+i)); - gsl_matrix_set(se_B, i, j, - sqrt(gsl_matrix_get(Vbeta,j*d_size+i,j*d_size+i))); - } - } - - // Free matrices. - gsl_vector_free(D_l); - gsl_matrix_free(UltVeh); - gsl_matrix_free(UltVehi); - gsl_matrix_free(Qi); - gsl_matrix_free(Qi_temp); - gsl_vector_free(WHiy); - gsl_vector_free(QiWHiy); - gsl_vector_free(beta); - gsl_matrix_free(Vbeta); - - return; +void MphCalcBeta(const gsl_vector *eval, const gsl_matrix *W, + const gsl_matrix *Y, const gsl_matrix *V_g, + const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_matrix *B, + gsl_matrix *se_B) { + size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1; + size_t dc_size = d_size * c_size; + double delta, dl, d, dy, dw, logdet_Ve, logdet_Q; + + gsl_vector *D_l = gsl_vector_alloc(d_size); + gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *Qi_temp = gsl_matrix_alloc(dc_size, dc_size); + gsl_vector *WHiy = gsl_vector_alloc(dc_size); + gsl_vector *QiWHiy = gsl_vector_alloc(dc_size); + gsl_vector *beta = gsl_vector_alloc(dc_size); + gsl_matrix *Vbeta = gsl_matrix_alloc(dc_size, dc_size); + + gsl_vector_set_zero(WHiy); + + // Eigen decomposition and calculate log|Ve|. + logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi); + + // Calculate Qi and log|Q|. + logdet_Q = CalcQi(eval, D_l, W, Qi); + + // Calculate UltVehiY. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY); + + // Calculate WHiy. + for (size_t i = 0; i < d_size; i++) { + dl = gsl_vector_get(D_l, i); + + for (size_t j = 0; j < c_size; j++) { + d = 0.0; + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + dw = gsl_matrix_get(W, j, k); + dy = gsl_matrix_get(UltVehiY, i, k); + + d += dy * dw / (delta * dl + 1.0); + } + gsl_vector_set(WHiy, j * d_size + i, d); + } + } + + gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy); + + // Need to multiply I_c\otimes UltVehi on both sides or one side. + for (size_t i = 0; i < c_size; i++) { + gsl_vector_view QiWHiy_sub = + gsl_vector_subvector(QiWHiy, i * d_size, d_size); + gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size); + gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0, + &beta_sub.vector); + + for (size_t j = 0; j < c_size; j++) { + gsl_matrix_view Qi_sub = + gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size); + gsl_matrix_view Qitemp_sub = + gsl_matrix_submatrix(Qi_temp, i * d_size, j * d_size, d_size, d_size); + gsl_matrix_view Vbeta_sub = + gsl_matrix_submatrix(Vbeta, i * d_size, j * d_size, d_size, d_size); + + if (j < i) { + gsl_matrix_view Vbeta_sym = + gsl_matrix_submatrix(Vbeta, j * d_size, i * d_size, d_size, d_size); + gsl_matrix_transpose_memcpy(&Vbeta_sub.matrix, &Vbeta_sym.matrix); + } else { + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh, + 0.0, &Qitemp_sub.matrix); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, + &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix); + } + } + } + + // Copy beta to B, and Vbeta to se_B. + for (size_t j = 0; j < B->size2; j++) { + for (size_t i = 0; i < B->size1; i++) { + gsl_matrix_set(B, i, j, gsl_vector_get(beta, j * d_size + i)); + gsl_matrix_set(se_B, i, j, sqrt(gsl_matrix_get(Vbeta, j * d_size + i, + j * d_size + i))); + } + } + + // Free matrices. + gsl_vector_free(D_l); + gsl_matrix_free(UltVeh); + gsl_matrix_free(UltVehi); + gsl_matrix_free(Qi); + gsl_matrix_free(Qi_temp); + gsl_vector_free(WHiy); + gsl_vector_free(QiWHiy); + gsl_vector_free(beta); + gsl_matrix_free(Vbeta); + + return; } // Below are functions for Newton-Raphson's algorithm. @@ -912,996 +931,962 @@ void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W, // Calculate all Hi and return logdet_H=\sum_{k=1}^{n}log|H_k| // and calculate Qi and return logdet_Q // and calculate yPy. -void CalcHiQi (const gsl_vector *eval, const gsl_matrix *X, - const gsl_matrix *V_g, const gsl_matrix *V_e, - gsl_matrix *Hi_all, gsl_matrix *Qi, double &logdet_H, - double &logdet_Q) { - gsl_matrix_set_zero (Hi_all); - gsl_matrix_set_zero (Qi); - logdet_H=0.0; logdet_Q=0.0; - - size_t n_size=eval->size, c_size=X->size1, d_size=V_g->size1; - double logdet_Ve=0.0, delta, dl, d; - - gsl_matrix *mat_dd=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); - gsl_vector *D_l=gsl_vector_alloc (d_size); - - // Calculate D_l, UltVeh and UltVehi. - logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi); - - // Calculate each Hi and log|H_k|. - logdet_H=(double)n_size*logdet_Ve; - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); - - gsl_matrix_memcpy (mat_dd, UltVehi); - for (size_t i=0; i<d_size; i++) { - dl=gsl_vector_get(D_l, i); - d=delta*dl+1.0; - - gsl_vector_view mat_row=gsl_matrix_row (mat_dd, i); - gsl_vector_scale (&mat_row.vector, 1.0/d); - - logdet_H+=log(d); - } - - gsl_matrix_view Hi_k= - gsl_matrix_submatrix(Hi_all, 0, k*d_size, d_size, d_size); - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi, - mat_dd, 0.0, &Hi_k.matrix); - } - - // Calculate Qi, and multiply I\o times UtVeh on both side and - // calculate logdet_Q, don't forget to substract - // c_size*logdet_Ve. - logdet_Q=CalcQi (eval, D_l, X, Qi)-(double)c_size*logdet_Ve; - - for (size_t i=0; i<c_size; i++) { - for (size_t j=0; j<c_size; j++) { - gsl_matrix_view Qi_sub= - gsl_matrix_submatrix(Qi,i*d_size,j*d_size,d_size,d_size); - if (j<i) { - gsl_matrix_view Qi_sym= - gsl_matrix_submatrix(Qi,j*d_size,i*d_size,d_size,d_size); - gsl_matrix_transpose_memcpy(&Qi_sub.matrix,&Qi_sym.matrix); - } else { - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &Qi_sub.matrix, UltVeh, 0.0, mat_dd); - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, - mat_dd, 0.0, &Qi_sub.matrix); - } - } - } - - // Free memory. - gsl_matrix_free(mat_dd); - gsl_matrix_free(UltVeh); - gsl_matrix_free(UltVehi); - gsl_vector_free(D_l); - - return; +void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X, + const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *Hi_all, + gsl_matrix *Qi, double &logdet_H, double &logdet_Q) { + gsl_matrix_set_zero(Hi_all); + gsl_matrix_set_zero(Qi); + logdet_H = 0.0; + logdet_Q = 0.0; + + size_t n_size = eval->size, c_size = X->size1, d_size = V_g->size1; + double logdet_Ve = 0.0, delta, dl, d; + + gsl_matrix *mat_dd = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size); + gsl_vector *D_l = gsl_vector_alloc(d_size); + + // Calculate D_l, UltVeh and UltVehi. + logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi); + + // Calculate each Hi and log|H_k|. + logdet_H = (double)n_size * logdet_Ve; + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + + gsl_matrix_memcpy(mat_dd, UltVehi); + for (size_t i = 0; i < d_size; i++) { + dl = gsl_vector_get(D_l, i); + d = delta * dl + 1.0; + + gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i); + gsl_vector_scale(&mat_row.vector, 1.0 / d); + + logdet_H += log(d); + } + + gsl_matrix_view Hi_k = + gsl_matrix_submatrix(Hi_all, 0, k * d_size, d_size, d_size); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi, mat_dd, 0.0, + &Hi_k.matrix); + } + + // Calculate Qi, and multiply I\o times UtVeh on both side and + // calculate logdet_Q, don't forget to substract + // c_size*logdet_Ve. + logdet_Q = CalcQi(eval, D_l, X, Qi) - (double)c_size * logdet_Ve; + + for (size_t i = 0; i < c_size; i++) { + for (size_t j = 0; j < c_size; j++) { + gsl_matrix_view Qi_sub = + gsl_matrix_submatrix(Qi, i * d_size, j * d_size, d_size, d_size); + if (j < i) { + gsl_matrix_view Qi_sym = + gsl_matrix_submatrix(Qi, j * d_size, i * d_size, d_size, d_size); + gsl_matrix_transpose_memcpy(&Qi_sub.matrix, &Qi_sym.matrix); + } else { + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh, + 0.0, mat_dd); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, mat_dd, 0.0, + &Qi_sub.matrix); + } + } + } + + // Free memory. + gsl_matrix_free(mat_dd); + gsl_matrix_free(UltVeh); + gsl_matrix_free(UltVehi); + gsl_vector_free(D_l); + + return; } // Calculate all Hiy. -void Calc_Hiy_all (const gsl_matrix *Y, const gsl_matrix *Hi_all, - gsl_matrix *Hiy_all) { - gsl_matrix_set_zero (Hiy_all); +void Calc_Hiy_all(const gsl_matrix *Y, const gsl_matrix *Hi_all, + gsl_matrix *Hiy_all) { + gsl_matrix_set_zero(Hiy_all); - size_t n_size=Y->size2, d_size=Y->size1; + size_t n_size = Y->size2, d_size = Y->size1; - for (size_t k=0; k<n_size; k++) { - gsl_matrix_const_view Hi_k= - gsl_matrix_const_submatrix(Hi_all,0,k*d_size,d_size,d_size); - gsl_vector_const_view y_k=gsl_matrix_const_column(Y,k); - gsl_vector_view Hiy_k=gsl_matrix_column(Hiy_all, k); + for (size_t k = 0; k < n_size; k++) { + gsl_matrix_const_view Hi_k = + gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size); + gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k); + gsl_vector_view Hiy_k = gsl_matrix_column(Hiy_all, k); - gsl_blas_dgemv(CblasNoTrans,1.0,&Hi_k.matrix,&y_k.vector, - 0.0,&Hiy_k.vector); - } + gsl_blas_dgemv(CblasNoTrans, 1.0, &Hi_k.matrix, &y_k.vector, 0.0, + &Hiy_k.vector); + } - return; + return; } // Calculate all xHi. -void Calc_xHi_all (const gsl_matrix *X, const gsl_matrix *Hi_all, - gsl_matrix *xHi_all) { - gsl_matrix_set_zero (xHi_all); +void Calc_xHi_all(const gsl_matrix *X, const gsl_matrix *Hi_all, + gsl_matrix *xHi_all) { + gsl_matrix_set_zero(xHi_all); - size_t n_size=X->size2, c_size=X->size1, d_size=Hi_all->size1; + size_t n_size = X->size2, c_size = X->size1, d_size = Hi_all->size1; - double d; + double d; - for (size_t k=0; k<n_size; k++) { - gsl_matrix_const_view Hi_k= - gsl_matrix_const_submatrix(Hi_all,0,k*d_size,d_size,d_size); + for (size_t k = 0; k < n_size; k++) { + gsl_matrix_const_view Hi_k = + gsl_matrix_const_submatrix(Hi_all, 0, k * d_size, d_size, d_size); - for (size_t i=0; i<c_size; i++) { - d=gsl_matrix_get (X, i, k); - gsl_matrix_view xHi_sub= - gsl_matrix_submatrix(xHi_all,i*d_size,k*d_size, - d_size,d_size); - gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix); - gsl_matrix_scale(&xHi_sub.matrix, d); - } - } + for (size_t i = 0; i < c_size; i++) { + d = gsl_matrix_get(X, i, k); + gsl_matrix_view xHi_sub = + gsl_matrix_submatrix(xHi_all, i * d_size, k * d_size, d_size, d_size); + gsl_matrix_memcpy(&xHi_sub.matrix, &Hi_k.matrix); + gsl_matrix_scale(&xHi_sub.matrix, d); + } + } - return; + return; } // Calculate scalar yHiy. -double Calc_yHiy (const gsl_matrix *Y, const gsl_matrix *Hiy_all) { - double yHiy=0.0, d; - size_t n_size=Y->size2; +double Calc_yHiy(const gsl_matrix *Y, const gsl_matrix *Hiy_all) { + double yHiy = 0.0, d; + size_t n_size = Y->size2; - for (size_t k=0; k<n_size; k++) { - gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k); - gsl_vector_const_view Hiy_k=gsl_matrix_const_column(Hiy_all, k); + for (size_t k = 0; k < n_size; k++) { + gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k); + gsl_vector_const_view Hiy_k = gsl_matrix_const_column(Hiy_all, k); - gsl_blas_ddot (&Hiy_k.vector, &y_k.vector, &d); - yHiy+=d; - } + gsl_blas_ddot(&Hiy_k.vector, &y_k.vector, &d); + yHiy += d; + } - return yHiy; + return yHiy; } // Calculate the vector xHiy. -void Calc_xHiy (const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) { - gsl_vector_set_zero (xHiy); +void Calc_xHiy(const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy) { + gsl_vector_set_zero(xHiy); - size_t n_size=Y->size2, d_size=Y->size1, dc_size=xHi->size1; + size_t n_size = Y->size2, d_size = Y->size1, dc_size = xHi->size1; - for (size_t k=0; k<n_size; k++) { - gsl_matrix_const_view xHi_k= - gsl_matrix_const_submatrix(xHi,0,k*d_size,dc_size,d_size); - gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k); + for (size_t k = 0; k < n_size; k++) { + gsl_matrix_const_view xHi_k = + gsl_matrix_const_submatrix(xHi, 0, k * d_size, dc_size, d_size); + gsl_vector_const_view y_k = gsl_matrix_const_column(Y, k); - gsl_blas_dgemv(CblasNoTrans,1.0,&xHi_k.matrix,&y_k.vector, - 1.0,xHiy); - } + gsl_blas_dgemv(CblasNoTrans, 1.0, &xHi_k.matrix, &y_k.vector, 1.0, xHiy); + } - return; + return; } // 0<=i,j<d_size -size_t GetIndex (const size_t i, const size_t j, const size_t d_size) { - if (i>=d_size || j>=d_size) { - cout<<"error in GetIndex."<<endl; - return 0; - } - - size_t s, l; - if (j<i) { - s=j; - l=i; - } else { - s=i; - l=j; - } - - return (2*d_size-s+1)*s/2+l-s; +size_t GetIndex(const size_t i, const size_t j, const size_t d_size) { + if (i >= d_size || j >= d_size) { + cout << "error in GetIndex." << endl; + return 0; + } + + size_t s, l; + if (j < i) { + s = j; + l = i; + } else { + s = i; + l = j; + } + + return (2 * d_size - s + 1) * s / 2 + l - s; } -void Calc_yHiDHiy (const gsl_vector *eval, const gsl_matrix *Hiy, - const size_t i, const size_t j, double &yHiDHiy_g, - double &yHiDHiy_e) { - yHiDHiy_g=0.0; - yHiDHiy_e=0.0; +void Calc_yHiDHiy(const gsl_vector *eval, const gsl_matrix *Hiy, const size_t i, + const size_t j, double &yHiDHiy_g, double &yHiDHiy_e) { + yHiDHiy_g = 0.0; + yHiDHiy_e = 0.0; - size_t n_size=eval->size; + size_t n_size = eval->size; - double delta, d1, d2; + double delta, d1, d2; - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); - d1=gsl_matrix_get (Hiy, i, k); - d2=gsl_matrix_get (Hiy, j, k); + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + d1 = gsl_matrix_get(Hiy, i, k); + d2 = gsl_matrix_get(Hiy, j, k); - if (i==j) { - yHiDHiy_g+=delta*d1*d2; - yHiDHiy_e+=d1*d2; - } else { - yHiDHiy_g+=delta*d1*d2*2.0; - yHiDHiy_e+=d1*d2*2.0; - } - } + if (i == j) { + yHiDHiy_g += delta * d1 * d2; + yHiDHiy_e += d1 * d2; + } else { + yHiDHiy_g += delta * d1 * d2 * 2.0; + yHiDHiy_e += d1 * d2 * 2.0; + } + } - return; + return; } -void Calc_xHiDHiy (const gsl_vector *eval, const gsl_matrix *xHi, - const gsl_matrix *Hiy, const size_t i, const size_t j, - gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) { - gsl_vector_set_zero(xHiDHiy_g); - gsl_vector_set_zero(xHiDHiy_e); +void Calc_xHiDHiy(const gsl_vector *eval, const gsl_matrix *xHi, + const gsl_matrix *Hiy, const size_t i, const size_t j, + gsl_vector *xHiDHiy_g, gsl_vector *xHiDHiy_e) { + gsl_vector_set_zero(xHiDHiy_g); + gsl_vector_set_zero(xHiDHiy_e); + + size_t n_size = eval->size, d_size = Hiy->size1; - size_t n_size=eval->size, d_size=Hiy->size1; + double delta, d; - double delta, d; + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); + gsl_vector_const_view xHi_col_i = + gsl_matrix_const_column(xHi, k * d_size + i); + d = gsl_matrix_get(Hiy, j, k); - gsl_vector_const_view xHi_col_i= - gsl_matrix_const_column (xHi, k*d_size+i); - d=gsl_matrix_get (Hiy, j, k); + gsl_blas_daxpy(d * delta, &xHi_col_i.vector, xHiDHiy_g); + gsl_blas_daxpy(d, &xHi_col_i.vector, xHiDHiy_e); - gsl_blas_daxpy (d*delta, &xHi_col_i.vector, xHiDHiy_g); - gsl_blas_daxpy (d, &xHi_col_i.vector, xHiDHiy_e); + if (i != j) { + gsl_vector_const_view xHi_col_j = + gsl_matrix_const_column(xHi, k * d_size + j); + d = gsl_matrix_get(Hiy, i, k); - if (i!=j) { - gsl_vector_const_view xHi_col_j= - gsl_matrix_const_column (xHi, k*d_size+j); - d=gsl_matrix_get (Hiy, i, k); - - gsl_blas_daxpy (d*delta, &xHi_col_j.vector, xHiDHiy_g); - gsl_blas_daxpy (d, &xHi_col_j.vector, xHiDHiy_e); - } - } + gsl_blas_daxpy(d * delta, &xHi_col_j.vector, xHiDHiy_g); + gsl_blas_daxpy(d, &xHi_col_j.vector, xHiDHiy_e); + } + } - return; + return; } -void Calc_xHiDHix (const gsl_vector *eval, const gsl_matrix *xHi, - const size_t i, const size_t j, gsl_matrix *xHiDHix_g, - gsl_matrix *xHiDHix_e) { - gsl_matrix_set_zero(xHiDHix_g); - gsl_matrix_set_zero(xHiDHix_e); +void Calc_xHiDHix(const gsl_vector *eval, const gsl_matrix *xHi, const size_t i, + const size_t j, gsl_matrix *xHiDHix_g, + gsl_matrix *xHiDHix_e) { + gsl_matrix_set_zero(xHiDHix_g); + gsl_matrix_set_zero(xHiDHix_e); - size_t n_size=eval->size, dc_size=xHi->size1; - size_t d_size=xHi->size2/n_size; + size_t n_size = eval->size, dc_size = xHi->size1; + size_t d_size = xHi->size2 / n_size; - double delta; + double delta; - gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *mat_dcdc_t=gsl_matrix_alloc (dc_size, dc_size); + gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *mat_dcdc_t = gsl_matrix_alloc(dc_size, dc_size); - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); - gsl_vector_const_view xHi_col_i= - gsl_matrix_const_column (xHi, k*d_size+i); - gsl_vector_const_view xHi_col_j= - gsl_matrix_const_column (xHi, k*d_size+j); + gsl_vector_const_view xHi_col_i = + gsl_matrix_const_column(xHi, k * d_size + i); + gsl_vector_const_view xHi_col_j = + gsl_matrix_const_column(xHi, k * d_size + j); - gsl_matrix_set_zero (mat_dcdc); - gsl_blas_dger(1.0,&xHi_col_i.vector,&xHi_col_j.vector, - mat_dcdc); + gsl_matrix_set_zero(mat_dcdc); + gsl_blas_dger(1.0, &xHi_col_i.vector, &xHi_col_j.vector, mat_dcdc); - gsl_matrix_transpose_memcpy (mat_dcdc_t, mat_dcdc); + gsl_matrix_transpose_memcpy(mat_dcdc_t, mat_dcdc); - gsl_matrix_add (xHiDHix_e, mat_dcdc); + gsl_matrix_add(xHiDHix_e, mat_dcdc); - gsl_matrix_scale (mat_dcdc, delta); - gsl_matrix_add (xHiDHix_g, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHix_g, mat_dcdc); - if (i!=j) { - gsl_matrix_add (xHiDHix_e, mat_dcdc_t); + if (i != j) { + gsl_matrix_add(xHiDHix_e, mat_dcdc_t); - gsl_matrix_scale (mat_dcdc_t, delta); - gsl_matrix_add (xHiDHix_g, mat_dcdc_t); - } - } + gsl_matrix_scale(mat_dcdc_t, delta); + gsl_matrix_add(xHiDHix_g, mat_dcdc_t); + } + } - gsl_matrix_free(mat_dcdc); - gsl_matrix_free(mat_dcdc_t); + gsl_matrix_free(mat_dcdc); + gsl_matrix_free(mat_dcdc_t); - return; + return; } -void Calc_yHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi, - const gsl_matrix *Hiy, const size_t i1, - const size_t j1, const size_t i2, const size_t j2, - double &yHiDHiDHiy_gg, double &yHiDHiDHiy_ee, - double &yHiDHiDHiy_ge) { - yHiDHiDHiy_gg=0.0; - yHiDHiDHiy_ee=0.0; - yHiDHiDHiy_ge=0.0; - - size_t n_size=eval->size, d_size=Hiy->size1; - - double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2; - double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; - - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); - - d_Hiy_i1=gsl_matrix_get (Hiy, i1, k); - d_Hiy_j1=gsl_matrix_get (Hiy, j1, k); - d_Hiy_i2=gsl_matrix_get (Hiy, i2, k); - d_Hiy_j2=gsl_matrix_get (Hiy, j2, k); - - d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2); - d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2); - d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2); - d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2); - - if (i1==j1) { - yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2); - yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2); - yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2); - - if (i2!=j2) { - yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2); - yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2); - yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2); - } - } else { - yHiDHiDHiy_gg+= - delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2 + - d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2); - yHiDHiDHiy_ee+= - (d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2+d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2); - yHiDHiDHiy_ge+= - delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2 + - d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2); - - if (i2!=j2) { - yHiDHiDHiy_gg+= - delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 + - d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2); - yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 + - d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2); - yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2 + - d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2); - } - } - } - - return; +void Calc_yHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi, + const gsl_matrix *Hiy, const size_t i1, const size_t j1, + const size_t i2, const size_t j2, double &yHiDHiDHiy_gg, + double &yHiDHiDHiy_ee, double &yHiDHiDHiy_ge) { + yHiDHiDHiy_gg = 0.0; + yHiDHiDHiy_ee = 0.0; + yHiDHiDHiy_ge = 0.0; + + size_t n_size = eval->size, d_size = Hiy->size1; + + double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2; + double d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; + + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + + d_Hiy_i1 = gsl_matrix_get(Hiy, i1, k); + d_Hiy_j1 = gsl_matrix_get(Hiy, j1, k); + d_Hiy_i2 = gsl_matrix_get(Hiy, i2, k); + d_Hiy_j2 = gsl_matrix_get(Hiy, j2, k); + + d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2); + d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2); + d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2); + d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2); + + if (i1 == j1) { + yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2); + yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2); + yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2); + + if (i2 != j2) { + yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2); + yHiDHiDHiy_ee += (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2); + yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2); + } + } else { + yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 + + d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2); + yHiDHiDHiy_ee += + (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 + d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2); + yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1i2 * d_Hiy_j2 + + d_Hiy_j1 * d_Hi_i1i2 * d_Hiy_j2); + + if (i2 != j2) { + yHiDHiDHiy_gg += delta * delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 + + d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2); + yHiDHiDHiy_ee += + (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 + d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2); + yHiDHiDHiy_ge += delta * (d_Hiy_i1 * d_Hi_j1j2 * d_Hiy_i2 + + d_Hiy_j1 * d_Hi_i1j2 * d_Hiy_i2); + } + } + } + + return; } -void Calc_xHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi, - const gsl_matrix *xHi, const gsl_matrix *Hiy, - const size_t i1, const size_t j1, const size_t i2, - const size_t j2, gsl_vector *xHiDHiDHiy_gg, - gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) { - gsl_vector_set_zero(xHiDHiDHiy_gg); - gsl_vector_set_zero(xHiDHiDHiy_ee); - gsl_vector_set_zero(xHiDHiDHiy_ge); - - size_t n_size=eval->size, d_size=Hiy->size1; - - double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2; - double d_Hi_j1i2, d_Hi_j1j2; - - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); - - gsl_vector_const_view xHi_col_i= - gsl_matrix_const_column (xHi, k*d_size+i1); - gsl_vector_const_view xHi_col_j= - gsl_matrix_const_column (xHi, k*d_size+j1); - - d_Hiy_i=gsl_matrix_get (Hiy, i2, k); - d_Hiy_j=gsl_matrix_get (Hiy, j2, k); - - d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2); - d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2); - d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2); - d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2); - - if (i1==j1) { - gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, - xHiDHiDHiy_gg); - gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, - xHiDHiDHiy_ee); - gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, - xHiDHiDHiy_ge); - - if (i2!=j2) { - gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i, - &xHi_col_i.vector, xHiDHiDHiy_gg); - gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, - xHiDHiDHiy_ee); - gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, - xHiDHiDHiy_ge); - } - } else { - gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, - xHiDHiDHiy_gg); - gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, - xHiDHiDHiy_ee); - gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, - xHiDHiDHiy_ge); - - gsl_blas_daxpy (delta*delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector, - xHiDHiDHiy_gg); - gsl_blas_daxpy (d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector, - xHiDHiDHiy_ee); - gsl_blas_daxpy (delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector, - xHiDHiDHiy_ge); - - if (i2!=j2) { - gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i, - &xHi_col_i.vector, xHiDHiDHiy_gg); - gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, - xHiDHiDHiy_ee); - gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, - xHiDHiDHiy_ge); - - gsl_blas_daxpy (delta*delta*d_Hi_i1j2*d_Hiy_i, - &xHi_col_j.vector, xHiDHiDHiy_gg); - gsl_blas_daxpy (d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector, - xHiDHiDHiy_ee); - gsl_blas_daxpy (delta*d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector, - xHiDHiDHiy_ge); - } - } - } - - return; +void Calc_xHiDHiDHiy(const gsl_vector *eval, const gsl_matrix *Hi, + const gsl_matrix *xHi, const gsl_matrix *Hiy, + const size_t i1, const size_t j1, const size_t i2, + const size_t j2, gsl_vector *xHiDHiDHiy_gg, + gsl_vector *xHiDHiDHiy_ee, gsl_vector *xHiDHiDHiy_ge) { + gsl_vector_set_zero(xHiDHiDHiy_gg); + gsl_vector_set_zero(xHiDHiDHiy_ee); + gsl_vector_set_zero(xHiDHiDHiy_ge); + + size_t n_size = eval->size, d_size = Hiy->size1; + + double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2; + double d_Hi_j1i2, d_Hi_j1j2; + + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + + gsl_vector_const_view xHi_col_i = + gsl_matrix_const_column(xHi, k * d_size + i1); + gsl_vector_const_view xHi_col_j = + gsl_matrix_const_column(xHi, k * d_size + j1); + + d_Hiy_i = gsl_matrix_get(Hiy, i2, k); + d_Hiy_j = gsl_matrix_get(Hiy, j2, k); + + d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2); + d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2); + d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2); + d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2); + + if (i1 == j1) { + gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, + xHiDHiDHiy_gg); + gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee); + gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, + xHiDHiDHiy_ge); + + if (i2 != j2) { + gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, + xHiDHiDHiy_gg); + gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee); + gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, + xHiDHiDHiy_ge); + } + } else { + gsl_blas_daxpy(delta * delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, + xHiDHiDHiy_gg); + gsl_blas_daxpy(d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee); + gsl_blas_daxpy(delta * d_Hi_j1i2 * d_Hiy_j, &xHi_col_i.vector, + xHiDHiDHiy_ge); + + gsl_blas_daxpy(delta * delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector, + xHiDHiDHiy_gg); + gsl_blas_daxpy(d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector, xHiDHiDHiy_ee); + gsl_blas_daxpy(delta * d_Hi_i1i2 * d_Hiy_j, &xHi_col_j.vector, + xHiDHiDHiy_ge); + + if (i2 != j2) { + gsl_blas_daxpy(delta * delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, + xHiDHiDHiy_gg); + gsl_blas_daxpy(d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee); + gsl_blas_daxpy(delta * d_Hi_j1j2 * d_Hiy_i, &xHi_col_i.vector, + xHiDHiDHiy_ge); + + gsl_blas_daxpy(delta * delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector, + xHiDHiDHiy_gg); + gsl_blas_daxpy(d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector, xHiDHiDHiy_ee); + gsl_blas_daxpy(delta * d_Hi_i1j2 * d_Hiy_i, &xHi_col_j.vector, + xHiDHiDHiy_ge); + } + } + } + + return; } +void Calc_xHiDHiDHix(const gsl_vector *eval, const gsl_matrix *Hi, + const gsl_matrix *xHi, const size_t i1, const size_t j1, + const size_t i2, const size_t j2, + gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee, + gsl_matrix *xHiDHiDHix_ge) { + gsl_matrix_set_zero(xHiDHiDHix_gg); + gsl_matrix_set_zero(xHiDHiDHix_ee); + gsl_matrix_set_zero(xHiDHiDHix_ge); + + size_t n_size = eval->size, d_size = Hi->size1, dc_size = xHi->size1; + + double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; + + gsl_matrix *mat_dcdc = gsl_matrix_alloc(dc_size, dc_size); + + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + + gsl_vector_const_view xHi_col_i1 = + gsl_matrix_const_column(xHi, k * d_size + i1); + gsl_vector_const_view xHi_col_j1 = + gsl_matrix_const_column(xHi, k * d_size + j1); + gsl_vector_const_view xHi_col_i2 = + gsl_matrix_const_column(xHi, k * d_size + i2); + gsl_vector_const_view xHi_col_j2 = + gsl_matrix_const_column(xHi, k * d_size + j2); + + d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2); + d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2); + d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2); + d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2); + + if (i1 == j1) { + gsl_matrix_set_zero(mat_dcdc); + gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector, + mat_dcdc); + + gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); + + if (i2 != j2) { + gsl_matrix_set_zero(mat_dcdc); + gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector, + mat_dcdc); + + gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); + } + } else { + gsl_matrix_set_zero(mat_dcdc); + gsl_blas_dger(d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector, + mat_dcdc); + + gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); + + gsl_matrix_set_zero(mat_dcdc); + gsl_blas_dger(d_Hi_i1i2, &xHi_col_j1.vector, &xHi_col_j2.vector, + mat_dcdc); + + gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); + + if (i2 != j2) { + gsl_matrix_set_zero(mat_dcdc); + gsl_blas_dger(d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector, + mat_dcdc); + + gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); + + gsl_matrix_set_zero(mat_dcdc); + gsl_blas_dger(d_Hi_i1j2, &xHi_col_j1.vector, &xHi_col_i2.vector, + mat_dcdc); + + gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); + gsl_matrix_scale(mat_dcdc, delta); + gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); + } + } + } + + gsl_matrix_free(mat_dcdc); -void Calc_xHiDHiDHix (const gsl_vector *eval, const gsl_matrix *Hi, - const gsl_matrix *xHi, const size_t i1, const size_t j1, - const size_t i2, const size_t j2, - gsl_matrix *xHiDHiDHix_gg, gsl_matrix *xHiDHiDHix_ee, - gsl_matrix *xHiDHiDHix_ge) { - gsl_matrix_set_zero(xHiDHiDHix_gg); - gsl_matrix_set_zero(xHiDHiDHix_ee); - gsl_matrix_set_zero(xHiDHiDHix_ge); - - size_t n_size=eval->size, d_size=Hi->size1, dc_size=xHi->size1; - - double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; - - gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size); - - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); - - gsl_vector_const_view xHi_col_i1= - gsl_matrix_const_column (xHi, k*d_size+i1); - gsl_vector_const_view xHi_col_j1= - gsl_matrix_const_column (xHi, k*d_size+j1); - gsl_vector_const_view xHi_col_i2= - gsl_matrix_const_column (xHi, k*d_size+i2); - gsl_vector_const_view xHi_col_j2= - gsl_matrix_const_column (xHi, k*d_size+j2); - - d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2); - d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2); - d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2); - d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2); - - if (i1==j1) { - gsl_matrix_set_zero (mat_dcdc); - gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector, - &xHi_col_j2.vector, mat_dcdc); - - gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); - - if (i2!=j2) { - gsl_matrix_set_zero (mat_dcdc); - gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector, - &xHi_col_i2.vector, mat_dcdc); - - gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); - } - } else { - gsl_matrix_set_zero (mat_dcdc); - gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector, - &xHi_col_j2.vector, mat_dcdc); - - gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); - - gsl_matrix_set_zero (mat_dcdc); - gsl_blas_dger (d_Hi_i1i2, &xHi_col_j1.vector, - &xHi_col_j2.vector, mat_dcdc); - - gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); - - if (i2!=j2) { - gsl_matrix_set_zero (mat_dcdc); - gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector, - &xHi_col_i2.vector, mat_dcdc); - - gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); - - gsl_matrix_set_zero (mat_dcdc); - gsl_blas_dger (d_Hi_i1j2, &xHi_col_j1.vector, - &xHi_col_i2.vector, mat_dcdc); - - gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc); - gsl_matrix_scale(mat_dcdc, delta); - gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc); - } - } - } - - gsl_matrix_free(mat_dcdc); - - return; + return; } -void Calc_traceHiD (const gsl_vector *eval, const gsl_matrix *Hi, - const size_t i, const size_t j, double &tHiD_g, - double &tHiD_e) { - tHiD_g=0.0; - tHiD_e=0.0; - - size_t n_size=eval->size, d_size=Hi->size1; - double delta, d; - - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); - d=gsl_matrix_get (Hi, j, k*d_size+i); - - if (i==j) { - tHiD_g+=delta*d; - tHiD_e+=d; - } else { - tHiD_g+=delta*d*2.0; - tHiD_e+=d*2.0; - } - } - - return; +void Calc_traceHiD(const gsl_vector *eval, const gsl_matrix *Hi, const size_t i, + const size_t j, double &tHiD_g, double &tHiD_e) { + tHiD_g = 0.0; + tHiD_e = 0.0; + + size_t n_size = eval->size, d_size = Hi->size1; + double delta, d; + + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + d = gsl_matrix_get(Hi, j, k * d_size + i); + + if (i == j) { + tHiD_g += delta * d; + tHiD_e += d; + } else { + tHiD_g += delta * d * 2.0; + tHiD_e += d * 2.0; + } + } + + return; } -void Calc_traceHiDHiD (const gsl_vector *eval, const gsl_matrix *Hi, - const size_t i1, const size_t j1, const size_t i2, - const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee, - double &tHiDHiD_ge) { - tHiDHiD_gg=0.0; - tHiDHiD_ee=0.0; - tHiDHiD_ge=0.0; - - size_t n_size=eval->size, d_size=Hi->size1; - double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; - - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get (eval, k); - - d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2); - d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2); - d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2); - d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2); - - if (i1==j1) { - tHiDHiD_gg+=delta*delta*d_Hi_i1j2*d_Hi_j1i2; - tHiDHiD_ee+=d_Hi_i1j2*d_Hi_j1i2; - tHiDHiD_ge+=delta*d_Hi_i1j2*d_Hi_j1i2; - - if (i2!=j2) { - tHiDHiD_gg+=delta*delta*d_Hi_i1i2*d_Hi_j1j2; - tHiDHiD_ee+=d_Hi_i1i2*d_Hi_j1j2; - tHiDHiD_ge+=delta*d_Hi_i1i2*d_Hi_j1j2; - } - } else { - tHiDHiD_gg+=delta*delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2* - d_Hi_i1i2); - tHiDHiD_ee+=(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2); - tHiDHiD_ge+=delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2); - - if (i2!=j2) { - tHiDHiD_gg+=delta*delta*(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2* - d_Hi_i1j2); - tHiDHiD_ee+=(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*d_Hi_i1j2); - tHiDHiD_ge+=delta*(d_Hi_i1i2*d_Hi_j1j2 + - d_Hi_j1i2*d_Hi_i1j2); - } - } - } - - return; +void Calc_traceHiDHiD(const gsl_vector *eval, const gsl_matrix *Hi, + const size_t i1, const size_t j1, const size_t i2, + const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee, + double &tHiDHiD_ge) { + tHiDHiD_gg = 0.0; + tHiDHiD_ee = 0.0; + tHiDHiD_ge = 0.0; + + size_t n_size = eval->size, d_size = Hi->size1; + double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2; + + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + + d_Hi_i1i2 = gsl_matrix_get(Hi, i1, k * d_size + i2); + d_Hi_i1j2 = gsl_matrix_get(Hi, i1, k * d_size + j2); + d_Hi_j1i2 = gsl_matrix_get(Hi, j1, k * d_size + i2); + d_Hi_j1j2 = gsl_matrix_get(Hi, j1, k * d_size + j2); + + if (i1 == j1) { + tHiDHiD_gg += delta * delta * d_Hi_i1j2 * d_Hi_j1i2; + tHiDHiD_ee += d_Hi_i1j2 * d_Hi_j1i2; + tHiDHiD_ge += delta * d_Hi_i1j2 * d_Hi_j1i2; + + if (i2 != j2) { + tHiDHiD_gg += delta * delta * d_Hi_i1i2 * d_Hi_j1j2; + tHiDHiD_ee += d_Hi_i1i2 * d_Hi_j1j2; + tHiDHiD_ge += delta * d_Hi_i1i2 * d_Hi_j1j2; + } + } else { + tHiDHiD_gg += + delta * delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2); + tHiDHiD_ee += (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2); + tHiDHiD_ge += delta * (d_Hi_i1j2 * d_Hi_j1i2 + d_Hi_j1j2 * d_Hi_i1i2); + + if (i2 != j2) { + tHiDHiD_gg += + delta * delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2); + tHiDHiD_ee += (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2); + tHiDHiD_ge += delta * (d_Hi_i1i2 * d_Hi_j1j2 + d_Hi_j1i2 * d_Hi_i1j2); + } + } + } + + return; } // trace(PD) = trace((Hi-HixQixHi)D)=trace(HiD) - trace(HixQixHiD) -void Calc_tracePD (const gsl_vector *eval, const gsl_matrix *Qi, - const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g, - const gsl_matrix *xHiDHix_all_e, const size_t i, - const size_t j, double &tPD_g, double &tPD_e) { - size_t dc_size=Qi->size1, d_size=Hi->size1; - size_t v=GetIndex(i, j, d_size); - - double d; - - // Calculate the first part: trace(HiD). - Calc_traceHiD (eval, Hi, i, j, tPD_g, tPD_e); - - // Calculate the second part: -trace(HixQixHiD). - for (size_t k=0; k<dc_size; k++) { - gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, k); - gsl_vector_const_view xHiDHix_g_col = - gsl_matrix_const_column (xHiDHix_all_g, v*dc_size+k); - gsl_vector_const_view xHiDHix_e_col = - gsl_matrix_const_column (xHiDHix_all_e, v*dc_size+k); - - gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d); - tPD_g-=d; - gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d); - tPD_e-=d; - } - - return; +void Calc_tracePD(const gsl_vector *eval, const gsl_matrix *Qi, + const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g, + const gsl_matrix *xHiDHix_all_e, const size_t i, + const size_t j, double &tPD_g, double &tPD_e) { + size_t dc_size = Qi->size1, d_size = Hi->size1; + size_t v = GetIndex(i, j, d_size); + + double d; + + // Calculate the first part: trace(HiD). + Calc_traceHiD(eval, Hi, i, j, tPD_g, tPD_e); + + // Calculate the second part: -trace(HixQixHiD). + for (size_t k = 0; k < dc_size; k++) { + gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, k); + gsl_vector_const_view xHiDHix_g_col = + gsl_matrix_const_column(xHiDHix_all_g, v * dc_size + k); + gsl_vector_const_view xHiDHix_e_col = + gsl_matrix_const_column(xHiDHix_all_e, v * dc_size + k); + + gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d); + tPD_g -= d; + gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d); + tPD_e -= d; + } + + return; } // trace(PDPD) = trace((Hi-HixQixHi)D(Hi-HixQixHi)D) // = trace(HiDHiD) - trace(HixQixHiDHiD) // - trace(HiDHixQixHiD) + trace(HixQixHiDHixQixHiD) -void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi, - const gsl_matrix *Hi, const gsl_matrix *xHi, - const gsl_matrix *QixHiDHix_all_g, - const gsl_matrix *QixHiDHix_all_e, - const gsl_matrix *xHiDHiDHix_all_gg, - const gsl_matrix *xHiDHiDHix_all_ee, - const gsl_matrix *xHiDHiDHix_all_ge, - const size_t i1, const size_t j1, - const size_t i2, const size_t j2, - double &tPDPD_gg, double &tPDPD_ee, - double &tPDPD_ge) { - size_t dc_size=Qi->size1, d_size=Hi->size1; - size_t v_size=d_size*(d_size+1)/2; - size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size); - - double d; - - // Calculate the first part: trace(HiDHiD). - Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, - tPDPD_ge); - - // Calculate the second and third parts: - // -trace(HixQixHiDHiD) - trace(HiDHixQixHiD) - for (size_t i=0; i<dc_size; i++) { - gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, i); - gsl_vector_const_view xHiDHiDHix_gg_col= - gsl_matrix_const_column(xHiDHiDHix_all_gg, - (v1*v_size+v2)*dc_size+i); - gsl_vector_const_view xHiDHiDHix_ee_col = - gsl_matrix_const_column(xHiDHiDHix_all_ee, - (v1*v_size+v2)*dc_size+i); - gsl_vector_const_view xHiDHiDHix_ge_col = - gsl_matrix_const_column(xHiDHiDHix_all_ge, - (v1*v_size+v2)*dc_size+i); - - gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d); - tPDPD_gg-=d*2.0; - gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d); - tPDPD_ee-=d*2.0; - gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d); - tPDPD_ge-=d*2.0; - } - - // Calculate the fourth part: trace(HixQixHiDHixQixHiD). - for (size_t i=0; i<dc_size; i++) { - - gsl_vector_const_view QixHiDHix_g_fullrow1 = - gsl_matrix_const_row (QixHiDHix_all_g, i); - gsl_vector_const_view QixHiDHix_e_fullrow1 = - gsl_matrix_const_row (QixHiDHix_all_e, i); - gsl_vector_const_view QixHiDHix_g_row1 = - gsl_vector_const_subvector (&QixHiDHix_g_fullrow1.vector, - v1*dc_size, dc_size); - gsl_vector_const_view QixHiDHix_e_row1 = - gsl_vector_const_subvector (&QixHiDHix_e_fullrow1.vector, - v1*dc_size, dc_size); - - gsl_vector_const_view QixHiDHix_g_col2 = - gsl_matrix_const_column (QixHiDHix_all_g, v2*dc_size+i); - gsl_vector_const_view QixHiDHix_e_col2 = - gsl_matrix_const_column (QixHiDHix_all_e, v2*dc_size+i); - - gsl_blas_ddot(&QixHiDHix_g_row1.vector,&QixHiDHix_g_col2.vector,&d); - tPDPD_gg+=d; - gsl_blas_ddot(&QixHiDHix_e_row1.vector,&QixHiDHix_e_col2.vector,&d); - tPDPD_ee+=d; - gsl_blas_ddot(&QixHiDHix_g_row1.vector,&QixHiDHix_e_col2.vector,&d); - tPDPD_ge+=d; - } - - return; +void Calc_tracePDPD(const gsl_vector *eval, const gsl_matrix *Qi, + const gsl_matrix *Hi, const gsl_matrix *xHi, + const gsl_matrix *QixHiDHix_all_g, + const gsl_matrix *QixHiDHix_all_e, + const gsl_matrix *xHiDHiDHix_all_gg, + const gsl_matrix *xHiDHiDHix_all_ee, + const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1, + const size_t j1, const size_t i2, const size_t j2, + double &tPDPD_gg, double &tPDPD_ee, double &tPDPD_ge) { + size_t dc_size = Qi->size1, d_size = Hi->size1; + size_t v_size = d_size * (d_size + 1) / 2; + size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size); + + double d; + + // Calculate the first part: trace(HiDHiD). + Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge); + + // Calculate the second and third parts: + // -trace(HixQixHiDHiD) - trace(HiDHixQixHiD) + for (size_t i = 0; i < dc_size; i++) { + gsl_vector_const_view Qi_row = gsl_matrix_const_row(Qi, i); + gsl_vector_const_view xHiDHiDHix_gg_col = gsl_matrix_const_column( + xHiDHiDHix_all_gg, (v1 * v_size + v2) * dc_size + i); + gsl_vector_const_view xHiDHiDHix_ee_col = gsl_matrix_const_column( + xHiDHiDHix_all_ee, (v1 * v_size + v2) * dc_size + i); + gsl_vector_const_view xHiDHiDHix_ge_col = gsl_matrix_const_column( + xHiDHiDHix_all_ge, (v1 * v_size + v2) * dc_size + i); + + gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_col.vector, &d); + tPDPD_gg -= d * 2.0; + gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_col.vector, &d); + tPDPD_ee -= d * 2.0; + gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ge_col.vector, &d); + tPDPD_ge -= d * 2.0; + } + + // Calculate the fourth part: trace(HixQixHiDHixQixHiD). + for (size_t i = 0; i < dc_size; i++) { + + gsl_vector_const_view QixHiDHix_g_fullrow1 = + gsl_matrix_const_row(QixHiDHix_all_g, i); + gsl_vector_const_view QixHiDHix_e_fullrow1 = + gsl_matrix_const_row(QixHiDHix_all_e, i); + gsl_vector_const_view QixHiDHix_g_row1 = gsl_vector_const_subvector( + &QixHiDHix_g_fullrow1.vector, v1 * dc_size, dc_size); + gsl_vector_const_view QixHiDHix_e_row1 = gsl_vector_const_subvector( + &QixHiDHix_e_fullrow1.vector, v1 * dc_size, dc_size); + + gsl_vector_const_view QixHiDHix_g_col2 = + gsl_matrix_const_column(QixHiDHix_all_g, v2 * dc_size + i); + gsl_vector_const_view QixHiDHix_e_col2 = + gsl_matrix_const_column(QixHiDHix_all_e, v2 * dc_size + i); + + gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_g_col2.vector, &d); + tPDPD_gg += d; + gsl_blas_ddot(&QixHiDHix_e_row1.vector, &QixHiDHix_e_col2.vector, &d); + tPDPD_ee += d; + gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_e_col2.vector, &d); + tPDPD_ge += d; + } + + return; } // Calculate (xHiDHiy) for every pair (i,j). -void Calc_xHiDHiy_all (const gsl_vector *eval, const gsl_matrix *xHi, - const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g, - gsl_matrix *xHiDHiy_all_e) { - gsl_matrix_set_zero(xHiDHiy_all_g); - gsl_matrix_set_zero(xHiDHiy_all_e); - - size_t d_size=Hiy->size1; - size_t v; - - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<d_size; j++) { - if (j<i) {continue;} - v=GetIndex(i, j, d_size); - - gsl_vector_view xHiDHiy_g=gsl_matrix_column (xHiDHiy_all_g, v); - gsl_vector_view xHiDHiy_e=gsl_matrix_column (xHiDHiy_all_e, v); - - Calc_xHiDHiy (eval, xHi, Hiy, i, j, &xHiDHiy_g.vector, - &xHiDHiy_e.vector); - } - } - return; +void Calc_xHiDHiy_all(const gsl_vector *eval, const gsl_matrix *xHi, + const gsl_matrix *Hiy, gsl_matrix *xHiDHiy_all_g, + gsl_matrix *xHiDHiy_all_e) { + gsl_matrix_set_zero(xHiDHiy_all_g); + gsl_matrix_set_zero(xHiDHiy_all_e); + + size_t d_size = Hiy->size1; + size_t v; + + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j < d_size; j++) { + if (j < i) { + continue; + } + v = GetIndex(i, j, d_size); + + gsl_vector_view xHiDHiy_g = gsl_matrix_column(xHiDHiy_all_g, v); + gsl_vector_view xHiDHiy_e = gsl_matrix_column(xHiDHiy_all_e, v); + + Calc_xHiDHiy(eval, xHi, Hiy, i, j, &xHiDHiy_g.vector, &xHiDHiy_e.vector); + } + } + return; } // Calculate (xHiDHix) for every pair (i,j). -void Calc_xHiDHix_all (const gsl_vector *eval, const gsl_matrix *xHi, - gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) { +void Calc_xHiDHix_all(const gsl_vector *eval, const gsl_matrix *xHi, + gsl_matrix *xHiDHix_all_g, gsl_matrix *xHiDHix_all_e) { gsl_matrix_set_zero(xHiDHix_all_g); gsl_matrix_set_zero(xHiDHix_all_e); - - size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1; + + size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1; size_t v; - - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<d_size; j++) { - if (j<i) {continue;} - v=GetIndex(i, j, d_size); - + + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j < d_size; j++) { + if (j < i) { + continue; + } + v = GetIndex(i, j, d_size); + gsl_matrix_view xHiDHix_g = - gsl_matrix_submatrix (xHiDHix_all_g, 0, v*dc_size, dc_size, dc_size); + gsl_matrix_submatrix(xHiDHix_all_g, 0, v * dc_size, dc_size, dc_size); gsl_matrix_view xHiDHix_e = - gsl_matrix_submatrix (xHiDHix_all_e, 0, v*dc_size, dc_size, dc_size); - - Calc_xHiDHix (eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix); + gsl_matrix_submatrix(xHiDHix_all_e, 0, v * dc_size, dc_size, dc_size); + + Calc_xHiDHix(eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix); } } return; } // Calculate (xHiDHiy) for every pair (i,j). -void Calc_xHiDHiDHiy_all (const size_t v_size, const gsl_vector *eval, - const gsl_matrix *Hi, const gsl_matrix *xHi, - const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg, - gsl_matrix *xHiDHiDHiy_all_ee, - gsl_matrix *xHiDHiDHiy_all_ge) { - gsl_matrix_set_zero(xHiDHiDHiy_all_gg); - gsl_matrix_set_zero(xHiDHiDHiy_all_ee); - gsl_matrix_set_zero(xHiDHiDHiy_all_ge); - - size_t d_size=Hiy->size1; - size_t v1, v2; - - for (size_t i1=0; i1<d_size; i1++) { - for (size_t j1=0; j1<d_size; j1++) { - if (j1<i1) {continue;} - v1=GetIndex(i1, j1, d_size); - - for (size_t i2=0; i2<d_size; i2++) { - for (size_t j2=0; j2<d_size; j2++) { - if (j2<i2) {continue;} - v2=GetIndex(i2, j2, d_size); - - gsl_vector_view xHiDHiDHiy_gg = - gsl_matrix_column (xHiDHiDHiy_all_gg, v1*v_size+v2); - gsl_vector_view xHiDHiDHiy_ee = - gsl_matrix_column (xHiDHiDHiy_all_ee, v1*v_size+v2); - gsl_vector_view xHiDHiDHiy_ge = - gsl_matrix_column (xHiDHiDHiy_all_ge, v1*v_size+v2); - - Calc_xHiDHiDHiy (eval, Hi, xHi, Hiy, i1, j1, i2, j2, &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector, &xHiDHiDHiy_ge.vector); - } - } - } - } - return; +void Calc_xHiDHiDHiy_all(const size_t v_size, const gsl_vector *eval, + const gsl_matrix *Hi, const gsl_matrix *xHi, + const gsl_matrix *Hiy, gsl_matrix *xHiDHiDHiy_all_gg, + gsl_matrix *xHiDHiDHiy_all_ee, + gsl_matrix *xHiDHiDHiy_all_ge) { + gsl_matrix_set_zero(xHiDHiDHiy_all_gg); + gsl_matrix_set_zero(xHiDHiDHiy_all_ee); + gsl_matrix_set_zero(xHiDHiDHiy_all_ge); + + size_t d_size = Hiy->size1; + size_t v1, v2; + + for (size_t i1 = 0; i1 < d_size; i1++) { + for (size_t j1 = 0; j1 < d_size; j1++) { + if (j1 < i1) { + continue; + } + v1 = GetIndex(i1, j1, d_size); + + for (size_t i2 = 0; i2 < d_size; i2++) { + for (size_t j2 = 0; j2 < d_size; j2++) { + if (j2 < i2) { + continue; + } + v2 = GetIndex(i2, j2, d_size); + + gsl_vector_view xHiDHiDHiy_gg = + gsl_matrix_column(xHiDHiDHiy_all_gg, v1 * v_size + v2); + gsl_vector_view xHiDHiDHiy_ee = + gsl_matrix_column(xHiDHiDHiy_all_ee, v1 * v_size + v2); + gsl_vector_view xHiDHiDHiy_ge = + gsl_matrix_column(xHiDHiDHiy_all_ge, v1 * v_size + v2); + + Calc_xHiDHiDHiy(eval, Hi, xHi, Hiy, i1, j1, i2, j2, + &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector, + &xHiDHiDHiy_ge.vector); + } + } + } + } + return; } // Calculate (xHiDHix) for every pair (i,j). -void Calc_xHiDHiDHix_all (const size_t v_size, const gsl_vector *eval, - const gsl_matrix *Hi, const gsl_matrix *xHi, - gsl_matrix *xHiDHiDHix_all_gg, - gsl_matrix *xHiDHiDHix_all_ee, - gsl_matrix *xHiDHiDHix_all_ge) { - gsl_matrix_set_zero(xHiDHiDHix_all_gg); - gsl_matrix_set_zero(xHiDHiDHix_all_ee); - gsl_matrix_set_zero(xHiDHiDHix_all_ge); - - size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1; - size_t v1, v2; - - for (size_t i1=0; i1<d_size; i1++) { - for (size_t j1=0; j1<d_size; j1++) { - if (j1<i1) {continue;} - v1=GetIndex(i1, j1, d_size); - - for (size_t i2=0; i2<d_size; i2++) { - for (size_t j2=0; j2<d_size; j2++) { - if (j2<i2) {continue;} - v2=GetIndex(i2, j2, d_size); - - if (v2<v1) {continue;} - - gsl_matrix_view xHiDHiDHix_gg1 = - gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0, - (v1*v_size+v2)*dc_size, - dc_size, dc_size); - gsl_matrix_view xHiDHiDHix_ee1 = - gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0, - (v1*v_size+v2)*dc_size, - dc_size, dc_size); - gsl_matrix_view xHiDHiDHix_ge1 = - gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0, - (v1*v_size+v2)*dc_size, - dc_size, dc_size); - - Calc_xHiDHiDHix (eval, Hi, xHi, i1, j1, i2, j2, - &xHiDHiDHix_gg1.matrix, - &xHiDHiDHix_ee1.matrix, - &xHiDHiDHix_ge1.matrix); - - if (v2!=v1) { - gsl_matrix_view xHiDHiDHix_gg2 = - gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0, - (v2*v_size+v1)*dc_size, - dc_size, dc_size); - gsl_matrix_view xHiDHiDHix_ee2 = - gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0, - (v2*v_size+v1)*dc_size, - dc_size, dc_size); - gsl_matrix_view xHiDHiDHix_ge2 = - gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0, - (v2*v_size+v1)*dc_size, - dc_size, dc_size); - - gsl_matrix_memcpy (&xHiDHiDHix_gg2.matrix, - &xHiDHiDHix_gg1.matrix); - gsl_matrix_memcpy (&xHiDHiDHix_ee2.matrix, - &xHiDHiDHix_ee1.matrix); - gsl_matrix_memcpy (&xHiDHiDHix_ge2.matrix, - &xHiDHiDHix_ge1.matrix); - } - } - } - } - } - - return; +void Calc_xHiDHiDHix_all(const size_t v_size, const gsl_vector *eval, + const gsl_matrix *Hi, const gsl_matrix *xHi, + gsl_matrix *xHiDHiDHix_all_gg, + gsl_matrix *xHiDHiDHix_all_ee, + gsl_matrix *xHiDHiDHix_all_ge) { + gsl_matrix_set_zero(xHiDHiDHix_all_gg); + gsl_matrix_set_zero(xHiDHiDHix_all_ee); + gsl_matrix_set_zero(xHiDHiDHix_all_ge); + + size_t d_size = xHi->size2 / eval->size, dc_size = xHi->size1; + size_t v1, v2; + + for (size_t i1 = 0; i1 < d_size; i1++) { + for (size_t j1 = 0; j1 < d_size; j1++) { + if (j1 < i1) { + continue; + } + v1 = GetIndex(i1, j1, d_size); + + for (size_t i2 = 0; i2 < d_size; i2++) { + for (size_t j2 = 0; j2 < d_size; j2++) { + if (j2 < i2) { + continue; + } + v2 = GetIndex(i2, j2, d_size); + + if (v2 < v1) { + continue; + } + + gsl_matrix_view xHiDHiDHix_gg1 = gsl_matrix_submatrix( + xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, + dc_size); + gsl_matrix_view xHiDHiDHix_ee1 = gsl_matrix_submatrix( + xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, + dc_size); + gsl_matrix_view xHiDHiDHix_ge1 = gsl_matrix_submatrix( + xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, + dc_size); + + Calc_xHiDHiDHix(eval, Hi, xHi, i1, j1, i2, j2, &xHiDHiDHix_gg1.matrix, + &xHiDHiDHix_ee1.matrix, &xHiDHiDHix_ge1.matrix); + + if (v2 != v1) { + gsl_matrix_view xHiDHiDHix_gg2 = gsl_matrix_submatrix( + xHiDHiDHix_all_gg, 0, (v2 * v_size + v1) * dc_size, dc_size, + dc_size); + gsl_matrix_view xHiDHiDHix_ee2 = gsl_matrix_submatrix( + xHiDHiDHix_all_ee, 0, (v2 * v_size + v1) * dc_size, dc_size, + dc_size); + gsl_matrix_view xHiDHiDHix_ge2 = gsl_matrix_submatrix( + xHiDHiDHix_all_ge, 0, (v2 * v_size + v1) * dc_size, dc_size, + dc_size); + + gsl_matrix_memcpy(&xHiDHiDHix_gg2.matrix, &xHiDHiDHix_gg1.matrix); + gsl_matrix_memcpy(&xHiDHiDHix_ee2.matrix, &xHiDHiDHix_ee1.matrix); + gsl_matrix_memcpy(&xHiDHiDHix_ge2.matrix, &xHiDHiDHix_ge1.matrix); + } + } + } + } + } + + return; } // Calculate (xHiDHix)Qi(xHiy) for every pair (i,j). -void Calc_xHiDHixQixHiy_all (const gsl_matrix *xHiDHix_all_g, - const gsl_matrix *xHiDHix_all_e, - const gsl_vector *QixHiy, - gsl_matrix *xHiDHixQixHiy_all_g, - gsl_matrix *xHiDHixQixHiy_all_e) { - size_t dc_size=xHiDHix_all_g->size1; - size_t v_size=xHiDHix_all_g->size2/dc_size; - - for (size_t i=0; i<v_size; i++) { - gsl_matrix_const_view xHiDHix_g = - gsl_matrix_const_submatrix (xHiDHix_all_g, 0, i*dc_size, - dc_size, dc_size); - gsl_matrix_const_view xHiDHix_e = - gsl_matrix_const_submatrix (xHiDHix_all_e, 0, i*dc_size, - dc_size, dc_size); - - gsl_vector_view xHiDHixQixHiy_g = - gsl_matrix_column (xHiDHixQixHiy_all_g, i); - gsl_vector_view xHiDHixQixHiy_e = - gsl_matrix_column (xHiDHixQixHiy_all_e, i); - - gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_g.matrix, - QixHiy, 0.0, &xHiDHixQixHiy_g.vector); - gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_e.matrix, - QixHiy, 0.0, &xHiDHixQixHiy_e.vector); - } - - return; +void Calc_xHiDHixQixHiy_all(const gsl_matrix *xHiDHix_all_g, + const gsl_matrix *xHiDHix_all_e, + const gsl_vector *QixHiy, + gsl_matrix *xHiDHixQixHiy_all_g, + gsl_matrix *xHiDHixQixHiy_all_e) { + size_t dc_size = xHiDHix_all_g->size1; + size_t v_size = xHiDHix_all_g->size2 / dc_size; + + for (size_t i = 0; i < v_size; i++) { + gsl_matrix_const_view xHiDHix_g = gsl_matrix_const_submatrix( + xHiDHix_all_g, 0, i * dc_size, dc_size, dc_size); + gsl_matrix_const_view xHiDHix_e = gsl_matrix_const_submatrix( + xHiDHix_all_e, 0, i * dc_size, dc_size, dc_size); + + gsl_vector_view xHiDHixQixHiy_g = gsl_matrix_column(xHiDHixQixHiy_all_g, i); + gsl_vector_view xHiDHixQixHiy_e = gsl_matrix_column(xHiDHixQixHiy_all_e, i); + + gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_g.matrix, QixHiy, 0.0, + &xHiDHixQixHiy_g.vector); + gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHix_e.matrix, QixHiy, 0.0, + &xHiDHixQixHiy_e.vector); + } + + return; } // Calculate Qi(xHiDHiy) and Qi(xHiDHix)Qi(xHiy) for each pair of i,j (i<=j). -void Calc_QiVec_all (const gsl_matrix *Qi, const gsl_matrix *vec_all_g, - const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g, - gsl_matrix *Qivec_all_e) { - for (size_t i=0; i<vec_all_g->size2; i++) { - gsl_vector_const_view vec_g=gsl_matrix_const_column (vec_all_g, i); - gsl_vector_const_view vec_e=gsl_matrix_const_column (vec_all_e, i); - - gsl_vector_view Qivec_g=gsl_matrix_column (Qivec_all_g, i); - gsl_vector_view Qivec_e=gsl_matrix_column (Qivec_all_e, i); - - gsl_blas_dgemv(CblasNoTrans,1.0,Qi,&vec_g.vector,0.0, - &Qivec_g.vector); - gsl_blas_dgemv(CblasNoTrans,1.0,Qi,&vec_e.vector,0.0, - &Qivec_e.vector); - } - - return; +void Calc_QiVec_all(const gsl_matrix *Qi, const gsl_matrix *vec_all_g, + const gsl_matrix *vec_all_e, gsl_matrix *Qivec_all_g, + gsl_matrix *Qivec_all_e) { + for (size_t i = 0; i < vec_all_g->size2; i++) { + gsl_vector_const_view vec_g = gsl_matrix_const_column(vec_all_g, i); + gsl_vector_const_view vec_e = gsl_matrix_const_column(vec_all_e, i); + + gsl_vector_view Qivec_g = gsl_matrix_column(Qivec_all_g, i); + gsl_vector_view Qivec_e = gsl_matrix_column(Qivec_all_e, i); + + gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_g.vector, 0.0, &Qivec_g.vector); + gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, &vec_e.vector, 0.0, &Qivec_e.vector); + } + + return; } // Calculate Qi(xHiDHix) for each pair of i,j (i<=j). -void Calc_QiMat_all (const gsl_matrix *Qi, const gsl_matrix *mat_all_g, - const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g, - gsl_matrix *Qimat_all_e) { - size_t dc_size=Qi->size1; - size_t v_size=mat_all_g->size2/mat_all_g->size1; - - for (size_t i=0; i<v_size; i++) { - gsl_matrix_const_view mat_g = - gsl_matrix_const_submatrix (mat_all_g, 0, i*dc_size, - dc_size, dc_size); - gsl_matrix_const_view mat_e = - gsl_matrix_const_submatrix (mat_all_e, 0, i*dc_size, - dc_size, dc_size); - - gsl_matrix_view Qimat_g = - gsl_matrix_submatrix (Qimat_all_g, 0, i*dc_size, dc_size, - dc_size); - gsl_matrix_view Qimat_e = - gsl_matrix_submatrix (Qimat_all_e, 0, i*dc_size, dc_size, - dc_size); - - gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi, - &mat_g.matrix, 0.0, &Qimat_g.matrix); - gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi, - &mat_e.matrix, 0.0, &Qimat_e.matrix); - } - - return; +void Calc_QiMat_all(const gsl_matrix *Qi, const gsl_matrix *mat_all_g, + const gsl_matrix *mat_all_e, gsl_matrix *Qimat_all_g, + gsl_matrix *Qimat_all_e) { + size_t dc_size = Qi->size1; + size_t v_size = mat_all_g->size2 / mat_all_g->size1; + + for (size_t i = 0; i < v_size; i++) { + gsl_matrix_const_view mat_g = + gsl_matrix_const_submatrix(mat_all_g, 0, i * dc_size, dc_size, dc_size); + gsl_matrix_const_view mat_e = + gsl_matrix_const_submatrix(mat_all_e, 0, i * dc_size, dc_size, dc_size); + + gsl_matrix_view Qimat_g = + gsl_matrix_submatrix(Qimat_all_g, 0, i * dc_size, dc_size, dc_size); + gsl_matrix_view Qimat_e = + gsl_matrix_submatrix(Qimat_all_e, 0, i * dc_size, dc_size, dc_size); + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_g.matrix, 0.0, + &Qimat_g.matrix); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_e.matrix, 0.0, + &Qimat_e.matrix); + } + + return; } // Calculate yPDPy // yPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)y // = ytHiDHiy - (yHix)Qi(xHiDHiy) - (yHiDHix)Qi(xHiy) // + (yHix)Qi(xHiDHix)Qi(xtHiy) -void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy, - const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g, - const gsl_matrix *xHiDHiy_all_e, - const gsl_matrix *xHiDHixQixHiy_all_g, - const gsl_matrix *xHiDHixQixHiy_all_e, - const size_t i, const size_t j, - double &yPDPy_g, double &yPDPy_e) { - size_t d_size=Hiy->size1; - size_t v=GetIndex(i, j, d_size); - - double d; - - // First part: ytHiDHiy. - Calc_yHiDHiy (eval, Hiy, i, j, yPDPy_g, yPDPy_e); - - // Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy) - gsl_vector_const_view xHiDHiy_g = - gsl_matrix_const_column (xHiDHiy_all_g, v); - gsl_vector_const_view xHiDHiy_e = - gsl_matrix_const_column (xHiDHiy_all_e, v); - - gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d); - yPDPy_g-=d*2.0; - gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d); - yPDPy_e-=d*2.0; - - // Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy). - gsl_vector_const_view xHiDHixQixHiy_g = - gsl_matrix_const_column (xHiDHixQixHiy_all_g, v); - gsl_vector_const_view xHiDHixQixHiy_e = - gsl_matrix_const_column (xHiDHixQixHiy_all_e, v); - - gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d); - yPDPy_g+=d; - gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d); - yPDPy_e+=d; - - return; +void Calc_yPDPy(const gsl_vector *eval, const gsl_matrix *Hiy, + const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g, + const gsl_matrix *xHiDHiy_all_e, + const gsl_matrix *xHiDHixQixHiy_all_g, + const gsl_matrix *xHiDHixQixHiy_all_e, const size_t i, + const size_t j, double &yPDPy_g, double &yPDPy_e) { + size_t d_size = Hiy->size1; + size_t v = GetIndex(i, j, d_size); + + double d; + + // First part: ytHiDHiy. + Calc_yHiDHiy(eval, Hiy, i, j, yPDPy_g, yPDPy_e); + + // Second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy) + gsl_vector_const_view xHiDHiy_g = gsl_matrix_const_column(xHiDHiy_all_g, v); + gsl_vector_const_view xHiDHiy_e = gsl_matrix_const_column(xHiDHiy_all_e, v); + + gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d); + yPDPy_g -= d * 2.0; + gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d); + yPDPy_e -= d * 2.0; + + // Fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy). + gsl_vector_const_view xHiDHixQixHiy_g = + gsl_matrix_const_column(xHiDHixQixHiy_all_g, v); + gsl_vector_const_view xHiDHixQixHiy_e = + gsl_matrix_const_column(xHiDHixQixHiy_all_e, v); + + gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d); + yPDPy_g += d; + gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d); + yPDPy_e += d; + + return; } // calculate yPDPDPy = y(Hi-HixQixHi)D(Hi-HixQixHi)D(Hi-HixQixHi)y @@ -1912,3445 +1897,3503 @@ void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy, // + (yHiDHix)Qi(xHiDHix)Qi(xHiy) // + (yHix)Qi(xHiDHiDHix)Qi(xHiy) // - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy) -void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi, - const gsl_matrix *xHi, const gsl_matrix *Hiy, - const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g, - const gsl_matrix *xHiDHiy_all_e, - const gsl_matrix *QixHiDHiy_all_g, - const gsl_matrix *QixHiDHiy_all_e, - const gsl_matrix *xHiDHixQixHiy_all_g, - const gsl_matrix *xHiDHixQixHiy_all_e, - const gsl_matrix *QixHiDHixQixHiy_all_g, - const gsl_matrix *QixHiDHixQixHiy_all_e, - const gsl_matrix *xHiDHiDHiy_all_gg, - const gsl_matrix *xHiDHiDHiy_all_ee, - const gsl_matrix *xHiDHiDHiy_all_ge, - const gsl_matrix *xHiDHiDHix_all_gg, - const gsl_matrix *xHiDHiDHix_all_ee, - const gsl_matrix *xHiDHiDHix_all_ge, - const size_t i1, const size_t j1, const size_t i2, - const size_t j2, double &yPDPDPy_gg, double &yPDPDPy_ee, - double &yPDPDPy_ge) { - size_t d_size=Hi->size1, dc_size=xHi->size1; - size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size); - size_t v_size=d_size*(d_size+1)/2; - - double d; - - gsl_vector *xHiDHiDHixQixHiy=gsl_vector_alloc (dc_size); - - // First part: yHiDHiDHiy. - Calc_yHiDHiDHiy (eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, - yPDPDPy_ee, yPDPDPy_ge); - - // Second and third parts: - // -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy). - gsl_vector_const_view xHiDHiDHiy_gg1 = - gsl_matrix_const_column (xHiDHiDHiy_all_gg, v1*v_size+v2); - gsl_vector_const_view xHiDHiDHiy_ee1 = - gsl_matrix_const_column (xHiDHiDHiy_all_ee, v1*v_size+v2); - gsl_vector_const_view xHiDHiDHiy_ge1 = - gsl_matrix_const_column (xHiDHiDHiy_all_ge, v1*v_size+v2); - - gsl_vector_const_view xHiDHiDHiy_gg2 = - gsl_matrix_const_column (xHiDHiDHiy_all_gg, v2*v_size+v1); - gsl_vector_const_view xHiDHiDHiy_ee2 = - gsl_matrix_const_column (xHiDHiDHiy_all_ee, v2*v_size+v1); - gsl_vector_const_view xHiDHiDHiy_ge2 = - gsl_matrix_const_column (xHiDHiDHiy_all_ge, v2*v_size+v1); - - gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d); - yPDPDPy_gg-=d; - gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d); - yPDPDPy_ee-=d; - gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d); - yPDPDPy_ge-=d; - - gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d); - yPDPDPy_gg-=d; - gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d); - yPDPDPy_ee-=d; - gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d); - yPDPDPy_ge-=d; - - // Fourth part: - (yHiDHix)Qi(xHiDHiy). - gsl_vector_const_view xHiDHiy_g1 = - gsl_matrix_const_column (xHiDHiy_all_g, v1); - gsl_vector_const_view xHiDHiy_e1 = - gsl_matrix_const_column (xHiDHiy_all_e, v1); - gsl_vector_const_view QixHiDHiy_g2 = - gsl_matrix_const_column (QixHiDHiy_all_g, v2); - gsl_vector_const_view QixHiDHiy_e2 = - gsl_matrix_const_column (QixHiDHiy_all_e, v2); - - gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d); - yPDPDPy_gg-=d; - gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d); - yPDPDPy_ee-=d; - gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d); - yPDPDPy_ge-=d; - - // Fifth and sixth parts: - // + (yHix)Qi(xHiDHix)Qi(xHiDHiy) + - // (yHiDHix)Qi(xHiDHix)Qi(xHiy) - gsl_vector_const_view QixHiDHiy_g1 = - gsl_matrix_const_column (QixHiDHiy_all_g, v1); - gsl_vector_const_view QixHiDHiy_e1 = - gsl_matrix_const_column (QixHiDHiy_all_e, v1); - - gsl_vector_const_view xHiDHixQixHiy_g1 = - gsl_matrix_const_column (xHiDHixQixHiy_all_g, v1); - gsl_vector_const_view xHiDHixQixHiy_e1 = - gsl_matrix_const_column (xHiDHixQixHiy_all_e, v1); - gsl_vector_const_view xHiDHixQixHiy_g2 = - gsl_matrix_const_column (xHiDHixQixHiy_all_g, v2); - gsl_vector_const_view xHiDHixQixHiy_e2 = - gsl_matrix_const_column (xHiDHixQixHiy_all_e, v2); - - gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d); - yPDPDPy_gg+=d; - gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d); - yPDPDPy_gg+=d; - - gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d); - yPDPDPy_ee+=d; - gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d); - yPDPDPy_ee+=d; - - gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d); - yPDPDPy_ge+=d; - gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d); - yPDPDPy_ge+=d; - - // Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy) - gsl_matrix_const_view xHiDHiDHix_gg = - gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0, - (v1*v_size+v2)*dc_size, - dc_size, dc_size); - gsl_matrix_const_view xHiDHiDHix_ee = - gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0, - (v1*v_size+v2)*dc_size, - dc_size, dc_size); - gsl_matrix_const_view xHiDHiDHix_ge = - gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0, - (v1*v_size+v2)*dc_size, - dc_size, dc_size); - - gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix, - QixHiy, 0.0, xHiDHiDHixQixHiy); - gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); - yPDPDPy_gg+=d; - gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix, - QixHiy, 0.0, xHiDHiDHixQixHiy); - gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); - yPDPDPy_ee+=d; - gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix, - QixHiy, 0.0, xHiDHiDHixQixHiy); - gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); - yPDPDPy_ge+=d; - - // Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy). - gsl_vector_const_view QixHiDHixQixHiy_g1 = - gsl_matrix_const_column (QixHiDHixQixHiy_all_g, v1); - gsl_vector_const_view QixHiDHixQixHiy_e1 = - gsl_matrix_const_column (QixHiDHixQixHiy_all_e, v1); - - gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector,&xHiDHixQixHiy_g2.vector,&d); - yPDPDPy_gg-=d; - gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector,&xHiDHixQixHiy_e2.vector,&d); - yPDPDPy_ee-=d; - gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector,&xHiDHixQixHiy_e2.vector,&d); - yPDPDPy_ge-=d; - - // Free memory. - gsl_vector_free(xHiDHiDHixQixHiy); - - return; +void Calc_yPDPDPy( + const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matrix *xHi, + const gsl_matrix *Hiy, const gsl_vector *QixHiy, + const gsl_matrix *xHiDHiy_all_g, const gsl_matrix *xHiDHiy_all_e, + const gsl_matrix *QixHiDHiy_all_g, const gsl_matrix *QixHiDHiy_all_e, + const gsl_matrix *xHiDHixQixHiy_all_g, + const gsl_matrix *xHiDHixQixHiy_all_e, + const gsl_matrix *QixHiDHixQixHiy_all_g, + const gsl_matrix *QixHiDHixQixHiy_all_e, + const gsl_matrix *xHiDHiDHiy_all_gg, const gsl_matrix *xHiDHiDHiy_all_ee, + const gsl_matrix *xHiDHiDHiy_all_ge, const gsl_matrix *xHiDHiDHix_all_gg, + const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge, + const size_t i1, const size_t j1, const size_t i2, const size_t j2, + double &yPDPDPy_gg, double &yPDPDPy_ee, double &yPDPDPy_ge) { + size_t d_size = Hi->size1, dc_size = xHi->size1; + size_t v1 = GetIndex(i1, j1, d_size), v2 = GetIndex(i2, j2, d_size); + size_t v_size = d_size * (d_size + 1) / 2; + + double d; + + gsl_vector *xHiDHiDHixQixHiy = gsl_vector_alloc(dc_size); + + // First part: yHiDHiDHiy. + Calc_yHiDHiDHiy(eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee, + yPDPDPy_ge); + + // Second and third parts: + // -(yHix)Qi(xHiDHiDHiy) - (yHiDHiDHix)Qi(xHiy). + gsl_vector_const_view xHiDHiDHiy_gg1 = + gsl_matrix_const_column(xHiDHiDHiy_all_gg, v1 * v_size + v2); + gsl_vector_const_view xHiDHiDHiy_ee1 = + gsl_matrix_const_column(xHiDHiDHiy_all_ee, v1 * v_size + v2); + gsl_vector_const_view xHiDHiDHiy_ge1 = + gsl_matrix_const_column(xHiDHiDHiy_all_ge, v1 * v_size + v2); + + gsl_vector_const_view xHiDHiDHiy_gg2 = + gsl_matrix_const_column(xHiDHiDHiy_all_gg, v2 * v_size + v1); + gsl_vector_const_view xHiDHiDHiy_ee2 = + gsl_matrix_const_column(xHiDHiDHiy_all_ee, v2 * v_size + v1); + gsl_vector_const_view xHiDHiDHiy_ge2 = + gsl_matrix_const_column(xHiDHiDHiy_all_ge, v2 * v_size + v1); + + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d); + yPDPDPy_gg -= d; + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d); + yPDPDPy_ee -= d; + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d); + yPDPDPy_ge -= d; + + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d); + yPDPDPy_gg -= d; + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d); + yPDPDPy_ee -= d; + gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d); + yPDPDPy_ge -= d; + + // Fourth part: - (yHiDHix)Qi(xHiDHiy). + gsl_vector_const_view xHiDHiy_g1 = gsl_matrix_const_column(xHiDHiy_all_g, v1); + gsl_vector_const_view xHiDHiy_e1 = gsl_matrix_const_column(xHiDHiy_all_e, v1); + gsl_vector_const_view QixHiDHiy_g2 = + gsl_matrix_const_column(QixHiDHiy_all_g, v2); + gsl_vector_const_view QixHiDHiy_e2 = + gsl_matrix_const_column(QixHiDHiy_all_e, v2); + + gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d); + yPDPDPy_gg -= d; + gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d); + yPDPDPy_ee -= d; + gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d); + yPDPDPy_ge -= d; + + // Fifth and sixth parts: + // + (yHix)Qi(xHiDHix)Qi(xHiDHiy) + + // (yHiDHix)Qi(xHiDHix)Qi(xHiy) + gsl_vector_const_view QixHiDHiy_g1 = + gsl_matrix_const_column(QixHiDHiy_all_g, v1); + gsl_vector_const_view QixHiDHiy_e1 = + gsl_matrix_const_column(QixHiDHiy_all_e, v1); + + gsl_vector_const_view xHiDHixQixHiy_g1 = + gsl_matrix_const_column(xHiDHixQixHiy_all_g, v1); + gsl_vector_const_view xHiDHixQixHiy_e1 = + gsl_matrix_const_column(xHiDHixQixHiy_all_e, v1); + gsl_vector_const_view xHiDHixQixHiy_g2 = + gsl_matrix_const_column(xHiDHixQixHiy_all_g, v2); + gsl_vector_const_view xHiDHixQixHiy_e2 = + gsl_matrix_const_column(xHiDHixQixHiy_all_e, v2); + + gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d); + yPDPDPy_gg += d; + gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d); + yPDPDPy_gg += d; + + gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d); + yPDPDPy_ee += d; + gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d); + yPDPDPy_ee += d; + + gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d); + yPDPDPy_ge += d; + gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d); + yPDPDPy_ge += d; + + // Seventh part: + (yHix)Qi(xHiDHiDHix)Qi(xHiy) + gsl_matrix_const_view xHiDHiDHix_gg = gsl_matrix_const_submatrix( + xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size); + gsl_matrix_const_view xHiDHiDHix_ee = gsl_matrix_const_submatrix( + xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size); + gsl_matrix_const_view xHiDHiDHix_ge = gsl_matrix_const_submatrix( + xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size); + + gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix, QixHiy, 0.0, + xHiDHiDHixQixHiy); + gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); + yPDPDPy_gg += d; + gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ee.matrix, QixHiy, 0.0, + xHiDHiDHixQixHiy); + gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); + yPDPDPy_ee += d; + gsl_blas_dgemv(CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix, QixHiy, 0.0, + xHiDHiDHixQixHiy); + gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d); + yPDPDPy_ge += d; + + // Eighth part: - (yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy). + gsl_vector_const_view QixHiDHixQixHiy_g1 = + gsl_matrix_const_column(QixHiDHixQixHiy_all_g, v1); + gsl_vector_const_view QixHiDHixQixHiy_e1 = + gsl_matrix_const_column(QixHiDHixQixHiy_all_e, v1); + + gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_g2.vector, &d); + yPDPDPy_gg -= d; + gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector, &xHiDHixQixHiy_e2.vector, &d); + yPDPDPy_ee -= d; + gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_e2.vector, &d); + yPDPDPy_ge -= d; + + // Free memory. + gsl_vector_free(xHiDHiDHixQixHiy); + + return; } // Calculate Edgeworth correctation factors for small samples notation // and method follows Thomas J. Rothenberg, Econometirca 1984; 52 (4) // M=xHiDHix -void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, - const gsl_matrix *QixHiDHix_all_g, - const gsl_matrix *QixHiDHix_all_e, - const gsl_matrix *xHiDHiDHix_all_gg, - const gsl_matrix *xHiDHiDHix_all_ee, - const gsl_matrix *xHiDHiDHix_all_ge, - const size_t d_size, double &crt_a, - double &crt_b, double &crt_c) { - crt_a=0.0; crt_b=0.0; crt_c=0.0; - - size_t dc_size=Qi->size1, v_size=Hessian_inv->size1/2; - size_t c_size=dc_size/d_size; - double h_gg, h_ge, h_ee, d, B=0.0, C=0.0, D=0.0; - double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee; - double trCC_gg, trCC_ge, trCC_ee, trD_gg=0.0, trD_ge=0.0, trD_ee=0.0; - - gsl_matrix *QiMQi_g1=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *QiMQi_e1=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *QiMQi_g2=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *QiMQi_e2=gsl_matrix_alloc (dc_size, dc_size); - - gsl_matrix *QiMQisQisi_g1=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *QiMQisQisi_e1=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *QiMQisQisi_g2=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *QiMQisQisi_e2=gsl_matrix_alloc (d_size, d_size); - - gsl_matrix *QiMQiMQi_gg=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *QiMQiMQi_ge=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *QiMQiMQi_ee=gsl_matrix_alloc (dc_size, dc_size); - - gsl_matrix *QiMMQi_gg=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *QiMMQi_ge=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *QiMMQi_ee=gsl_matrix_alloc (dc_size, dc_size); - - gsl_matrix *Qi_si=gsl_matrix_alloc (d_size, d_size); - - gsl_matrix *M_dd=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *M_dcdc=gsl_matrix_alloc (dc_size, dc_size); - - // Invert Qi_sub to Qi_si. - gsl_matrix *Qi_sub=gsl_matrix_alloc (d_size, d_size); - - gsl_matrix_const_view Qi_s = - gsl_matrix_const_submatrix (Qi, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (d_size); - - gsl_matrix_memcpy (Qi_sub, &Qi_s.matrix); - LUDecomp (Qi_sub, pmt, &sig); - LUInvert (Qi_sub, pmt, Qi_si); - - gsl_permutation_free(pmt); - gsl_matrix_free(Qi_sub); - - // Calculate correction factors. - for (size_t v1=0; v1<v_size; v1++) { - - // Calculate Qi(xHiDHix)Qi, and subpart of it. - gsl_matrix_const_view QiM_g1 = - gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v1*dc_size, - dc_size, dc_size); - gsl_matrix_const_view QiM_e1 = - gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v1*dc_size, - dc_size, dc_size); - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, - Qi, 0.0, QiMQi_g1); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, - Qi, 0.0, QiMQi_e1); - - gsl_matrix_view QiMQi_g1_s = - gsl_matrix_submatrix (QiMQi_g1, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - gsl_matrix_view QiMQi_e1_s = - gsl_matrix_submatrix (QiMQi_e1, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - - // Calculate trCg1 and trCe1. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix, - Qi_si, 0.0, QiMQisQisi_g1); - trCg1=0.0; - for (size_t k=0; k<d_size; k++) { - trCg1-=gsl_matrix_get (QiMQisQisi_g1, k, k); - } - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix, - Qi_si, 0.0, QiMQisQisi_e1); - trCe1=0.0; - for (size_t k=0; k<d_size; k++) { - trCe1-=gsl_matrix_get (QiMQisQisi_e1, k, k); - } - - for (size_t v2=0; v2<v_size; v2++) { - if (v2<v1) {continue;} - - // Calculate Qi(xHiDHix)Qi, and subpart of it. - gsl_matrix_const_view QiM_g2 = - gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v2*dc_size, - dc_size, dc_size); - gsl_matrix_const_view QiM_e2 = - gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v2*dc_size, - dc_size, dc_size); - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix, - Qi, 0.0, QiMQi_g2); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix, - Qi, 0.0, QiMQi_e2); - - gsl_matrix_view QiMQi_g2_s = - gsl_matrix_submatrix (QiMQi_g2, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - gsl_matrix_view QiMQi_e2_s = - gsl_matrix_submatrix (QiMQi_e2, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - - // Calculate trCg2 and trCe2. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &QiMQi_g2_s.matrix, Qi_si, 0.0, QiMQisQisi_g2); - trCg2=0.0; - for (size_t k=0; k<d_size; k++) { - trCg2-=gsl_matrix_get (QiMQisQisi_g2, k, k); - } - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &QiMQi_e2_s.matrix, Qi_si, 0.0, QiMQisQisi_e2); - trCe2=0.0; - for (size_t k=0; k<d_size; k++) { - trCe2-=gsl_matrix_get (QiMQisQisi_e2, k, k); - } - - // Calculate trCC_gg, trCC_ge, trCC_ee. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - QiMQisQisi_g1, QiMQisQisi_g2, 0.0, M_dd); - trCC_gg=0.0; - for (size_t k=0; k<d_size; k++) { - trCC_gg+=gsl_matrix_get (M_dd, k, k); - } - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1, - QiMQisQisi_e2, 0.0, M_dd); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1, - QiMQisQisi_g2, 1.0, M_dd); - trCC_ge=0.0; - for (size_t k=0; k<d_size; k++) { - trCC_ge+=gsl_matrix_get (M_dd, k, k); - } - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1, - QiMQisQisi_e2, 0.0, M_dd); - trCC_ee=0.0; - for (size_t k=0; k<d_size; k++) { - trCC_ee+=gsl_matrix_get (M_dd, k, k); - } - - // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, - QiMQi_g2, 0.0, QiMQiMQi_gg); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, - QiMQi_e2, 0.0, QiMQiMQi_ge); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, - QiMQi_g2, 1.0, QiMQiMQi_ge); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, - QiMQi_e2, 0.0, QiMQiMQi_ee); - - gsl_matrix_view QiMQiMQi_gg_s = - gsl_matrix_submatrix (QiMQiMQi_gg, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - gsl_matrix_view QiMQiMQi_ge_s = - gsl_matrix_submatrix (QiMQiMQi_ge, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - gsl_matrix_view QiMQiMQi_ee_s = - gsl_matrix_submatrix (QiMQiMQi_ee, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - - // and part of trB_gg, trB_ge, trB_ee. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &QiMQiMQi_gg_s.matrix, Qi_si, 0.0, M_dd); - trB_gg=0.0; - for (size_t k=0; k<d_size; k++) { - d=gsl_matrix_get (M_dd, k, k); - trB_gg-=d; - } - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &QiMQiMQi_ge_s.matrix, Qi_si, 0.0, M_dd); - trB_ge=0.0; - for (size_t k=0; k<d_size; k++) { - d=gsl_matrix_get (M_dd, k, k); - trB_ge-=d; - } - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &QiMQiMQi_ee_s.matrix, Qi_si, 0.0, M_dd); - trB_ee=0.0; - for (size_t k=0; k<d_size; k++) { - d=gsl_matrix_get (M_dd, k, k); - trB_ee-=d; - } - - // Calculate Qi(xHiDHiDHix)Qi, and subpart of it. - gsl_matrix_const_view MM_gg = - gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0, - (v1*v_size+v2)*dc_size, dc_size, - dc_size); - gsl_matrix_const_view MM_ge = - gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0, - (v1*v_size+v2)*dc_size, dc_size, - dc_size); - gsl_matrix_const_view MM_ee = - gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0, - (v1*v_size+v2)*dc_size, dc_size, - dc_size); - - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, - &MM_gg.matrix, 0.0, M_dcdc); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0, - QiMMQi_gg); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, - &MM_ge.matrix, 0.0, M_dcdc); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, - Qi, 0.0, QiMMQi_ge); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, - &MM_ee.matrix, 0.0, M_dcdc); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, - 0.0, QiMMQi_ee); - - gsl_matrix_view QiMMQi_gg_s = - gsl_matrix_submatrix (QiMMQi_gg, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - gsl_matrix_view QiMMQi_ge_s = - gsl_matrix_submatrix (QiMMQi_ge, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - gsl_matrix_view QiMMQi_ee_s = - gsl_matrix_submatrix (QiMMQi_ee, (c_size-1)*d_size, - (c_size-1)*d_size, d_size, d_size); - - // Calculate the other part of trB_gg, trB_ge, trB_ee. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &QiMMQi_gg_s.matrix, Qi_si, 0.0, M_dd); - for (size_t k=0; k<d_size; k++) { - trB_gg+=gsl_matrix_get (M_dd, k, k); - } - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &QiMMQi_ge_s.matrix, Qi_si, 0.0, M_dd); - for (size_t k=0; k<d_size; k++) { - trB_ge+=2.0*gsl_matrix_get (M_dd, k, k); - } - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, - &QiMMQi_ee_s.matrix, Qi_si, 0.0, M_dd); - for (size_t k=0; k<d_size; k++) { - trB_ee+=gsl_matrix_get (M_dd, k, k); - } - - // Calculate trD_gg, trD_ge, trD_ee. - trD_gg=2.0*trB_gg; - trD_ge=2.0*trB_ge; - trD_ee=2.0*trB_ee; - - //calculate B, C and D - h_gg=-1.0*gsl_matrix_get (Hessian_inv, v1, v2); - h_ge=-1.0*gsl_matrix_get (Hessian_inv, v1, v2+v_size); - h_ee=-1.0*gsl_matrix_get (Hessian_inv, v1+v_size, v2+v_size); - - B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee; - C+=h_gg*(trCC_gg+0.5*trCg1*trCg2) + - h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2) + - h_ee*(trCC_ee+0.5*trCe1*trCe2); - D+=h_gg*(trCC_gg+0.5*trD_gg) + - h_ge*(trCC_ge+0.5*trD_ge) + h_ee*(trCC_ee+0.5*trD_ee); - - if (v1!=v2) { - B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee; - C+=h_gg*(trCC_gg+0.5*trCg1*trCg2) + - h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2) + - h_ee*(trCC_ee+0.5*trCe1*trCe2); - D+=h_gg*(trCC_gg+0.5*trD_gg) + - h_ge*(trCC_ge+0.5*trD_ge) + - h_ee*(trCC_ee+0.5*trD_ee); - } - } - } - - // Calculate a, b, c from B C D. - crt_a=2.0*D-C; - crt_b=2.0*B; - crt_c=C; - - // Free matrix memory. - gsl_matrix_free(QiMQi_g1); - gsl_matrix_free(QiMQi_e1); - gsl_matrix_free(QiMQi_g2); - gsl_matrix_free(QiMQi_e2); - - gsl_matrix_free(QiMQisQisi_g1); - gsl_matrix_free(QiMQisQisi_e1); - gsl_matrix_free(QiMQisQisi_g2); - gsl_matrix_free(QiMQisQisi_e2); - - gsl_matrix_free(QiMQiMQi_gg); - gsl_matrix_free(QiMQiMQi_ge); - gsl_matrix_free(QiMQiMQi_ee); - - gsl_matrix_free(QiMMQi_gg); - gsl_matrix_free(QiMMQi_ge); - gsl_matrix_free(QiMMQi_ee); - - gsl_matrix_free(Qi_si); - - gsl_matrix_free(M_dd); - gsl_matrix_free(M_dcdc); - - return; +void CalcCRT(const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, + const gsl_matrix *QixHiDHix_all_g, + const gsl_matrix *QixHiDHix_all_e, + const gsl_matrix *xHiDHiDHix_all_gg, + const gsl_matrix *xHiDHiDHix_all_ee, + const gsl_matrix *xHiDHiDHix_all_ge, const size_t d_size, + double &crt_a, double &crt_b, double &crt_c) { + crt_a = 0.0; + crt_b = 0.0; + crt_c = 0.0; + + size_t dc_size = Qi->size1, v_size = Hessian_inv->size1 / 2; + size_t c_size = dc_size / d_size; + double h_gg, h_ge, h_ee, d, B = 0.0, C = 0.0, D = 0.0; + double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee; + double trCC_gg, trCC_ge, trCC_ee, trD_gg = 0.0, trD_ge = 0.0, trD_ee = 0.0; + + gsl_matrix *QiMQi_g1 = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *QiMQi_e1 = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *QiMQi_g2 = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *QiMQi_e2 = gsl_matrix_alloc(dc_size, dc_size); + + gsl_matrix *QiMQisQisi_g1 = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *QiMQisQisi_e1 = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *QiMQisQisi_g2 = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *QiMQisQisi_e2 = gsl_matrix_alloc(d_size, d_size); + + gsl_matrix *QiMQiMQi_gg = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *QiMQiMQi_ge = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *QiMQiMQi_ee = gsl_matrix_alloc(dc_size, dc_size); + + gsl_matrix *QiMMQi_gg = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *QiMMQi_ge = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *QiMMQi_ee = gsl_matrix_alloc(dc_size, dc_size); + + gsl_matrix *Qi_si = gsl_matrix_alloc(d_size, d_size); + + gsl_matrix *M_dd = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *M_dcdc = gsl_matrix_alloc(dc_size, dc_size); + + // Invert Qi_sub to Qi_si. + gsl_matrix *Qi_sub = gsl_matrix_alloc(d_size, d_size); + + gsl_matrix_const_view Qi_s = gsl_matrix_const_submatrix( + Qi, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size); + + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(d_size); + + gsl_matrix_memcpy(Qi_sub, &Qi_s.matrix); + LUDecomp(Qi_sub, pmt, &sig); + LUInvert(Qi_sub, pmt, Qi_si); + + gsl_permutation_free(pmt); + gsl_matrix_free(Qi_sub); + + // Calculate correction factors. + for (size_t v1 = 0; v1 < v_size; v1++) { + + // Calculate Qi(xHiDHix)Qi, and subpart of it. + gsl_matrix_const_view QiM_g1 = gsl_matrix_const_submatrix( + QixHiDHix_all_g, 0, v1 * dc_size, dc_size, dc_size); + gsl_matrix_const_view QiM_e1 = gsl_matrix_const_submatrix( + QixHiDHix_all_e, 0, v1 * dc_size, dc_size, dc_size); + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, Qi, 0.0, + QiMQi_g1); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, Qi, 0.0, + QiMQi_e1); + + gsl_matrix_view QiMQi_g1_s = gsl_matrix_submatrix( + QiMQi_g1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size); + gsl_matrix_view QiMQi_e1_s = gsl_matrix_submatrix( + QiMQi_e1, (c_size - 1) * d_size, (c_size - 1) * d_size, d_size, d_size); + + // Calculate trCg1 and trCe1. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g1_s.matrix, Qi_si, + 0.0, QiMQisQisi_g1); + trCg1 = 0.0; + for (size_t k = 0; k < d_size; k++) { + trCg1 -= gsl_matrix_get(QiMQisQisi_g1, k, k); + } + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix, Qi_si, + 0.0, QiMQisQisi_e1); + trCe1 = 0.0; + for (size_t k = 0; k < d_size; k++) { + trCe1 -= gsl_matrix_get(QiMQisQisi_e1, k, k); + } + + for (size_t v2 = 0; v2 < v_size; v2++) { + if (v2 < v1) { + continue; + } + + // Calculate Qi(xHiDHix)Qi, and subpart of it. + gsl_matrix_const_view QiM_g2 = gsl_matrix_const_submatrix( + QixHiDHix_all_g, 0, v2 * dc_size, dc_size, dc_size); + gsl_matrix_const_view QiM_e2 = gsl_matrix_const_submatrix( + QixHiDHix_all_e, 0, v2 * dc_size, dc_size, dc_size); + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix, Qi, 0.0, + QiMQi_g2); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix, Qi, 0.0, + QiMQi_e2); + + gsl_matrix_view QiMQi_g2_s = + gsl_matrix_submatrix(QiMQi_g2, (c_size - 1) * d_size, + (c_size - 1) * d_size, d_size, d_size); + gsl_matrix_view QiMQi_e2_s = + gsl_matrix_submatrix(QiMQi_e2, (c_size - 1) * d_size, + (c_size - 1) * d_size, d_size, d_size); + + // Calculate trCg2 and trCe2. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g2_s.matrix, Qi_si, + 0.0, QiMQisQisi_g2); + trCg2 = 0.0; + for (size_t k = 0; k < d_size; k++) { + trCg2 -= gsl_matrix_get(QiMQisQisi_g2, k, k); + } + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e2_s.matrix, Qi_si, + 0.0, QiMQisQisi_e2); + trCe2 = 0.0; + for (size_t k = 0; k < d_size; k++) { + trCe2 -= gsl_matrix_get(QiMQisQisi_e2, k, k); + } + + // Calculate trCC_gg, trCC_ge, trCC_ee. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1, + QiMQisQisi_g2, 0.0, M_dd); + trCC_gg = 0.0; + for (size_t k = 0; k < d_size; k++) { + trCC_gg += gsl_matrix_get(M_dd, k, k); + } + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1, + QiMQisQisi_e2, 0.0, M_dd); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1, + QiMQisQisi_g2, 1.0, M_dd); + trCC_ge = 0.0; + for (size_t k = 0; k < d_size; k++) { + trCC_ge += gsl_matrix_get(M_dd, k, k); + } + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1, + QiMQisQisi_e2, 0.0, M_dd); + trCC_ee = 0.0; + for (size_t k = 0; k < d_size; k++) { + trCC_ee += gsl_matrix_get(M_dd, k, k); + } + + // Calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_g2, + 0.0, QiMQiMQi_gg); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_e2, + 0.0, QiMQiMQi_ge); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_g2, + 1.0, QiMQiMQi_ge); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_e2, + 0.0, QiMQiMQi_ee); + + gsl_matrix_view QiMQiMQi_gg_s = + gsl_matrix_submatrix(QiMQiMQi_gg, (c_size - 1) * d_size, + (c_size - 1) * d_size, d_size, d_size); + gsl_matrix_view QiMQiMQi_ge_s = + gsl_matrix_submatrix(QiMQiMQi_ge, (c_size - 1) * d_size, + (c_size - 1) * d_size, d_size, d_size); + gsl_matrix_view QiMQiMQi_ee_s = + gsl_matrix_submatrix(QiMQiMQi_ee, (c_size - 1) * d_size, + (c_size - 1) * d_size, d_size, d_size); + + // and part of trB_gg, trB_ge, trB_ee. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_gg_s.matrix, + Qi_si, 0.0, M_dd); + trB_gg = 0.0; + for (size_t k = 0; k < d_size; k++) { + d = gsl_matrix_get(M_dd, k, k); + trB_gg -= d; + } + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ge_s.matrix, + Qi_si, 0.0, M_dd); + trB_ge = 0.0; + for (size_t k = 0; k < d_size; k++) { + d = gsl_matrix_get(M_dd, k, k); + trB_ge -= d; + } + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ee_s.matrix, + Qi_si, 0.0, M_dd); + trB_ee = 0.0; + for (size_t k = 0; k < d_size; k++) { + d = gsl_matrix_get(M_dd, k, k); + trB_ee -= d; + } + + // Calculate Qi(xHiDHiDHix)Qi, and subpart of it. + gsl_matrix_const_view MM_gg = gsl_matrix_const_submatrix( + xHiDHiDHix_all_gg, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size); + gsl_matrix_const_view MM_ge = gsl_matrix_const_submatrix( + xHiDHiDHix_all_ge, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size); + gsl_matrix_const_view MM_ee = gsl_matrix_const_submatrix( + xHiDHiDHix_all_ee, 0, (v1 * v_size + v2) * dc_size, dc_size, dc_size); + + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_gg.matrix, 0.0, + M_dcdc); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0, + QiMMQi_gg); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ge.matrix, 0.0, + M_dcdc); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0, + QiMMQi_ge); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ee.matrix, 0.0, + M_dcdc); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0, + QiMMQi_ee); + + gsl_matrix_view QiMMQi_gg_s = + gsl_matrix_submatrix(QiMMQi_gg, (c_size - 1) * d_size, + (c_size - 1) * d_size, d_size, d_size); + gsl_matrix_view QiMMQi_ge_s = + gsl_matrix_submatrix(QiMMQi_ge, (c_size - 1) * d_size, + (c_size - 1) * d_size, d_size, d_size); + gsl_matrix_view QiMMQi_ee_s = + gsl_matrix_submatrix(QiMMQi_ee, (c_size - 1) * d_size, + (c_size - 1) * d_size, d_size, d_size); + + // Calculate the other part of trB_gg, trB_ge, trB_ee. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_gg_s.matrix, + Qi_si, 0.0, M_dd); + for (size_t k = 0; k < d_size; k++) { + trB_gg += gsl_matrix_get(M_dd, k, k); + } + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ge_s.matrix, + Qi_si, 0.0, M_dd); + for (size_t k = 0; k < d_size; k++) { + trB_ge += 2.0 * gsl_matrix_get(M_dd, k, k); + } + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_ee_s.matrix, + Qi_si, 0.0, M_dd); + for (size_t k = 0; k < d_size; k++) { + trB_ee += gsl_matrix_get(M_dd, k, k); + } + + // Calculate trD_gg, trD_ge, trD_ee. + trD_gg = 2.0 * trB_gg; + trD_ge = 2.0 * trB_ge; + trD_ee = 2.0 * trB_ee; + + // calculate B, C and D + h_gg = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2); + h_ge = -1.0 * gsl_matrix_get(Hessian_inv, v1, v2 + v_size); + h_ee = -1.0 * gsl_matrix_get(Hessian_inv, v1 + v_size, v2 + v_size); + + B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee; + C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) + + h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) + + h_ee * (trCC_ee + 0.5 * trCe1 * trCe2); + D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) + + h_ee * (trCC_ee + 0.5 * trD_ee); + + if (v1 != v2) { + B += h_gg * trB_gg + h_ge * trB_ge + h_ee * trB_ee; + C += h_gg * (trCC_gg + 0.5 * trCg1 * trCg2) + + h_ge * (trCC_ge + 0.5 * trCg1 * trCe2 + 0.5 * trCe1 * trCg2) + + h_ee * (trCC_ee + 0.5 * trCe1 * trCe2); + D += h_gg * (trCC_gg + 0.5 * trD_gg) + h_ge * (trCC_ge + 0.5 * trD_ge) + + h_ee * (trCC_ee + 0.5 * trD_ee); + } + } + } + + // Calculate a, b, c from B C D. + crt_a = 2.0 * D - C; + crt_b = 2.0 * B; + crt_c = C; + + // Free matrix memory. + gsl_matrix_free(QiMQi_g1); + gsl_matrix_free(QiMQi_e1); + gsl_matrix_free(QiMQi_g2); + gsl_matrix_free(QiMQi_e2); + + gsl_matrix_free(QiMQisQisi_g1); + gsl_matrix_free(QiMQisQisi_e1); + gsl_matrix_free(QiMQisQisi_g2); + gsl_matrix_free(QiMQisQisi_e2); + + gsl_matrix_free(QiMQiMQi_gg); + gsl_matrix_free(QiMQiMQi_ge); + gsl_matrix_free(QiMQiMQi_ee); + + gsl_matrix_free(QiMMQi_gg); + gsl_matrix_free(QiMMQi_ge); + gsl_matrix_free(QiMMQi_ee); + + gsl_matrix_free(Qi_si); + + gsl_matrix_free(M_dd); + gsl_matrix_free(M_dcdc); + + return; } // Calculate first-order and second-order derivatives. -void CalcDev (const char func_name, const gsl_vector *eval, - const gsl_matrix *Qi, const gsl_matrix *Hi, - const gsl_matrix *xHi, const gsl_matrix *Hiy, - const gsl_vector *QixHiy, gsl_vector *gradient, - gsl_matrix *Hessian_inv, double &crt_a, double &crt_b, - double &crt_c) { - if (func_name!='R' && func_name!='L' && func_name!='r' && - func_name!='l') { - cout<<"func_name only takes 'R' or 'L': 'R' for " << - "log-restricted likelihood, 'L' for log-likelihood."<<endl; - return; - } - - size_t dc_size=Qi->size1, d_size=Hi->size1; - size_t c_size=dc_size/d_size; - size_t v_size=d_size*(d_size+1)/2; - size_t v1, v2; - double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge; - - gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); - - gsl_matrix *xHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size); - gsl_matrix *xHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size); - gsl_matrix *xHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size); - gsl_matrix *xHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size); - gsl_matrix *xHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size); - gsl_matrix *xHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size); - - gsl_matrix *QixHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size); - gsl_matrix *QixHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size); - gsl_matrix *QixHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size); - gsl_matrix *QixHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size); - gsl_matrix *QixHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size); - gsl_matrix *QixHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size); - - gsl_matrix *xHiDHiDHiy_all_gg = - gsl_matrix_alloc (dc_size, v_size*v_size); - gsl_matrix *xHiDHiDHiy_all_ee = - gsl_matrix_alloc (dc_size, v_size*v_size); - gsl_matrix *xHiDHiDHiy_all_ge = - gsl_matrix_alloc (dc_size, v_size*v_size); - gsl_matrix *xHiDHiDHix_all_gg = - gsl_matrix_alloc (dc_size, v_size*v_size*dc_size); - gsl_matrix *xHiDHiDHix_all_ee = - gsl_matrix_alloc (dc_size, v_size*v_size*dc_size); - gsl_matrix *xHiDHiDHix_all_ge = - gsl_matrix_alloc (dc_size, v_size*v_size*dc_size); - - // Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all. - Calc_xHiDHiy_all (eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e); - Calc_xHiDHix_all (eval, xHi, xHiDHix_all_g, xHiDHix_all_e); - Calc_xHiDHixQixHiy_all (xHiDHix_all_g, xHiDHix_all_e, QixHiy, - xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e); - - Calc_xHiDHiDHiy_all (v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg, - xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge); - Calc_xHiDHiDHix_all (v_size, eval, Hi, xHi, xHiDHiDHix_all_gg, - xHiDHiDHix_all_ee, xHiDHiDHix_all_ge); - - // Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all. - Calc_QiVec_all (Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g, - QixHiDHiy_all_e); - Calc_QiVec_all (Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, - QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e); - Calc_QiMat_all (Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g, - QixHiDHix_all_e); - - double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee; - double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge; - double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge; - - // Calculate gradient and Hessian for Vg. - for (size_t i1=0; i1<d_size; i1++) { - for (size_t j1=0; j1<d_size; j1++) { - if (j1<i1) {continue;} - v1=GetIndex (i1, j1, d_size); - - Calc_yPDPy (eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e, - xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1, - yPDPy_g, yPDPy_e); - - if (func_name=='R' || func_name=='r') { - Calc_tracePD (eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e, - i1, j1, tPD_g, tPD_e); - - dev1_g=-0.5*tPD_g+0.5*yPDPy_g; - dev1_e=-0.5*tPD_e+0.5*yPDPy_e; - } else { - Calc_traceHiD (eval, Hi, i1, j1, tHiD_g, tHiD_e); - - dev1_g=-0.5*tHiD_g+0.5*yPDPy_g; - dev1_e=-0.5*tHiD_e+0.5*yPDPy_e; - } - - gsl_vector_set (gradient, v1, dev1_g); - gsl_vector_set (gradient, v1+v_size, dev1_e); - - for (size_t i2=0; i2<d_size; i2++) { - for (size_t j2=0; j2<d_size; j2++) { - if (j2<i2) {continue;} - v2=GetIndex (i2, j2, d_size); - - if (v2<v1) {continue;} - - Calc_yPDPDPy (eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g, - xHiDHiy_all_e, QixHiDHiy_all_g, QixHiDHiy_all_e, - xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, - QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e, - xHiDHiDHiy_all_gg, xHiDHiDHiy_all_ee, - xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg, - xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, - i2, j2, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge); - - // AI for REML. - if (func_name=='R' || func_name=='r') { - Calc_tracePDPD (eval, Qi, Hi, xHi, QixHiDHix_all_g, - QixHiDHix_all_e, xHiDHiDHix_all_gg, - xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, - i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge); - - dev2_gg=0.5*tPDPD_gg-yPDPDPy_gg; - dev2_ee=0.5*tPDPD_ee-yPDPDPy_ee; - dev2_ge=0.5*tPDPD_ge-yPDPDPy_ge; - } else { - Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tHiDHiD_gg, - tHiDHiD_ee, tHiDHiD_ge); - - dev2_gg=0.5*tHiDHiD_gg-yPDPDPy_gg; - dev2_ee=0.5*tHiDHiD_ee-yPDPDPy_ee; - dev2_ge=0.5*tHiDHiD_ge-yPDPDPy_ge; - } - - // Set up Hessian. - gsl_matrix_set (Hessian, v1, v2, dev2_gg); - gsl_matrix_set (Hessian, v1+v_size, v2+v_size, dev2_ee); - gsl_matrix_set (Hessian, v1, v2+v_size, dev2_ge); - gsl_matrix_set (Hessian, v2+v_size, v1, dev2_ge); - - if (v1!=v2) { - gsl_matrix_set (Hessian, v2, v1, dev2_gg); - gsl_matrix_set (Hessian, v2+v_size, v1+v_size, dev2_ee); - gsl_matrix_set (Hessian, v2, v1+v_size, dev2_ge); - gsl_matrix_set (Hessian, v1+v_size, v2, dev2_ge); - } - } - } - } - } - - // Invert Hessian. - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (v_size*2); - - LUDecomp (Hessian, pmt, &sig); - LUInvert (Hessian, pmt, Hessian_inv); - - gsl_permutation_free(pmt); - gsl_matrix_free(Hessian); - - // Calculate Edgeworth correction factors after inverting - // Hessian. - if (c_size>1) { - CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e, - xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, - d_size, crt_a, crt_b, crt_c); - } else { - crt_a=0.0; crt_b=0.0; crt_c=0.0; - } - - gsl_matrix_free(xHiDHiy_all_g); - gsl_matrix_free(xHiDHiy_all_e); - gsl_matrix_free(xHiDHix_all_g); - gsl_matrix_free(xHiDHix_all_e); - gsl_matrix_free(xHiDHixQixHiy_all_g); - gsl_matrix_free(xHiDHixQixHiy_all_e); - - gsl_matrix_free(QixHiDHiy_all_g); - gsl_matrix_free(QixHiDHiy_all_e); - gsl_matrix_free(QixHiDHix_all_g); - gsl_matrix_free(QixHiDHix_all_e); - gsl_matrix_free(QixHiDHixQixHiy_all_g); - gsl_matrix_free(QixHiDHixQixHiy_all_e); - - gsl_matrix_free(xHiDHiDHiy_all_gg); - gsl_matrix_free(xHiDHiDHiy_all_ee); - gsl_matrix_free(xHiDHiDHiy_all_ge); - gsl_matrix_free(xHiDHiDHix_all_gg); - gsl_matrix_free(xHiDHiDHix_all_ee); - gsl_matrix_free(xHiDHiDHix_all_ge); - - return; +void CalcDev(const char func_name, const gsl_vector *eval, const gsl_matrix *Qi, + const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *Hiy, + const gsl_vector *QixHiy, gsl_vector *gradient, + gsl_matrix *Hessian_inv, double &crt_a, double &crt_b, + double &crt_c) { + if (func_name != 'R' && func_name != 'L' && func_name != 'r' && + func_name != 'l') { + cout << "func_name only takes 'R' or 'L': 'R' for " + << "log-restricted likelihood, 'L' for log-likelihood." << endl; + return; + } + + size_t dc_size = Qi->size1, d_size = Hi->size1; + size_t c_size = dc_size / d_size; + size_t v_size = d_size * (d_size + 1) / 2; + size_t v1, v2; + double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge; + + gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2); + + gsl_matrix *xHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size); + gsl_matrix *xHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size); + gsl_matrix *xHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size); + gsl_matrix *xHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size); + gsl_matrix *xHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size); + gsl_matrix *xHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size); + + gsl_matrix *QixHiDHiy_all_g = gsl_matrix_alloc(dc_size, v_size); + gsl_matrix *QixHiDHiy_all_e = gsl_matrix_alloc(dc_size, v_size); + gsl_matrix *QixHiDHix_all_g = gsl_matrix_alloc(dc_size, v_size * dc_size); + gsl_matrix *QixHiDHix_all_e = gsl_matrix_alloc(dc_size, v_size * dc_size); + gsl_matrix *QixHiDHixQixHiy_all_g = gsl_matrix_alloc(dc_size, v_size); + gsl_matrix *QixHiDHixQixHiy_all_e = gsl_matrix_alloc(dc_size, v_size); + + gsl_matrix *xHiDHiDHiy_all_gg = gsl_matrix_alloc(dc_size, v_size * v_size); + gsl_matrix *xHiDHiDHiy_all_ee = gsl_matrix_alloc(dc_size, v_size * v_size); + gsl_matrix *xHiDHiDHiy_all_ge = gsl_matrix_alloc(dc_size, v_size * v_size); + gsl_matrix *xHiDHiDHix_all_gg = + gsl_matrix_alloc(dc_size, v_size * v_size * dc_size); + gsl_matrix *xHiDHiDHix_all_ee = + gsl_matrix_alloc(dc_size, v_size * v_size * dc_size); + gsl_matrix *xHiDHiDHix_all_ge = + gsl_matrix_alloc(dc_size, v_size * v_size * dc_size); + + // Calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all. + Calc_xHiDHiy_all(eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e); + Calc_xHiDHix_all(eval, xHi, xHiDHix_all_g, xHiDHix_all_e); + Calc_xHiDHixQixHiy_all(xHiDHix_all_g, xHiDHix_all_e, QixHiy, + xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e); + + Calc_xHiDHiDHiy_all(v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg, + xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge); + Calc_xHiDHiDHix_all(v_size, eval, Hi, xHi, xHiDHiDHix_all_gg, + xHiDHiDHix_all_ee, xHiDHiDHix_all_ge); + + // Calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all. + Calc_QiVec_all(Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g, + QixHiDHiy_all_e); + Calc_QiVec_all(Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, + QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e); + Calc_QiMat_all(Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g, + QixHiDHix_all_e); + + double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee; + double tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge; + double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge; + + // Calculate gradient and Hessian for Vg. + for (size_t i1 = 0; i1 < d_size; i1++) { + for (size_t j1 = 0; j1 < d_size; j1++) { + if (j1 < i1) { + continue; + } + v1 = GetIndex(i1, j1, d_size); + + Calc_yPDPy(eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e, + xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1, yPDPy_g, + yPDPy_e); + + if (func_name == 'R' || func_name == 'r') { + Calc_tracePD(eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e, i1, j1, tPD_g, + tPD_e); + + dev1_g = -0.5 * tPD_g + 0.5 * yPDPy_g; + dev1_e = -0.5 * tPD_e + 0.5 * yPDPy_e; + } else { + Calc_traceHiD(eval, Hi, i1, j1, tHiD_g, tHiD_e); + + dev1_g = -0.5 * tHiD_g + 0.5 * yPDPy_g; + dev1_e = -0.5 * tHiD_e + 0.5 * yPDPy_e; + } + + gsl_vector_set(gradient, v1, dev1_g); + gsl_vector_set(gradient, v1 + v_size, dev1_e); + + for (size_t i2 = 0; i2 < d_size; i2++) { + for (size_t j2 = 0; j2 < d_size; j2++) { + if (j2 < i2) { + continue; + } + v2 = GetIndex(i2, j2, d_size); + + if (v2 < v1) { + continue; + } + + Calc_yPDPDPy(eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e, + QixHiDHiy_all_g, QixHiDHiy_all_e, xHiDHixQixHiy_all_g, + xHiDHixQixHiy_all_e, QixHiDHixQixHiy_all_g, + QixHiDHixQixHiy_all_e, xHiDHiDHiy_all_gg, + xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg, + xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, i2, j2, + yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge); + + // AI for REML. + if (func_name == 'R' || func_name == 'r') { + Calc_tracePDPD(eval, Qi, Hi, xHi, QixHiDHix_all_g, QixHiDHix_all_e, + xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, + xHiDHiDHix_all_ge, i1, j1, i2, j2, tPDPD_gg, + tPDPD_ee, tPDPD_ge); + + dev2_gg = 0.5 * tPDPD_gg - yPDPDPy_gg; + dev2_ee = 0.5 * tPDPD_ee - yPDPDPy_ee; + dev2_ge = 0.5 * tPDPD_ge - yPDPDPy_ge; + } else { + Calc_traceHiDHiD(eval, Hi, i1, j1, i2, j2, tHiDHiD_gg, tHiDHiD_ee, + tHiDHiD_ge); + + dev2_gg = 0.5 * tHiDHiD_gg - yPDPDPy_gg; + dev2_ee = 0.5 * tHiDHiD_ee - yPDPDPy_ee; + dev2_ge = 0.5 * tHiDHiD_ge - yPDPDPy_ge; + } + + // Set up Hessian. + gsl_matrix_set(Hessian, v1, v2, dev2_gg); + gsl_matrix_set(Hessian, v1 + v_size, v2 + v_size, dev2_ee); + gsl_matrix_set(Hessian, v1, v2 + v_size, dev2_ge); + gsl_matrix_set(Hessian, v2 + v_size, v1, dev2_ge); + + if (v1 != v2) { + gsl_matrix_set(Hessian, v2, v1, dev2_gg); + gsl_matrix_set(Hessian, v2 + v_size, v1 + v_size, dev2_ee); + gsl_matrix_set(Hessian, v2, v1 + v_size, dev2_ge); + gsl_matrix_set(Hessian, v1 + v_size, v2, dev2_ge); + } + } + } + } + } + + // Invert Hessian. + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(v_size * 2); + + LUDecomp(Hessian, pmt, &sig); + LUInvert(Hessian, pmt, Hessian_inv); + + gsl_permutation_free(pmt); + gsl_matrix_free(Hessian); + + // Calculate Edgeworth correction factors after inverting + // Hessian. + if (c_size > 1) { + CalcCRT(Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e, + xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, d_size, + crt_a, crt_b, crt_c); + } else { + crt_a = 0.0; + crt_b = 0.0; + crt_c = 0.0; + } + + gsl_matrix_free(xHiDHiy_all_g); + gsl_matrix_free(xHiDHiy_all_e); + gsl_matrix_free(xHiDHix_all_g); + gsl_matrix_free(xHiDHix_all_e); + gsl_matrix_free(xHiDHixQixHiy_all_g); + gsl_matrix_free(xHiDHixQixHiy_all_e); + + gsl_matrix_free(QixHiDHiy_all_g); + gsl_matrix_free(QixHiDHiy_all_e); + gsl_matrix_free(QixHiDHix_all_g); + gsl_matrix_free(QixHiDHix_all_e); + gsl_matrix_free(QixHiDHixQixHiy_all_g); + gsl_matrix_free(QixHiDHixQixHiy_all_e); + + gsl_matrix_free(xHiDHiDHiy_all_gg); + gsl_matrix_free(xHiDHiDHiy_all_ee); + gsl_matrix_free(xHiDHiDHiy_all_ge); + gsl_matrix_free(xHiDHiDHix_all_gg); + gsl_matrix_free(xHiDHiDHix_all_ee); + gsl_matrix_free(xHiDHiDHix_all_ge); + + return; } // Update Vg, Ve. -void UpdateVgVe (const gsl_matrix *Hessian_inv, const gsl_vector *gradient, - const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) { - size_t v_size=gradient->size/2, d_size=V_g->size1; - size_t v; +void UpdateVgVe(const gsl_matrix *Hessian_inv, const gsl_vector *gradient, + const double step_scale, gsl_matrix *V_g, gsl_matrix *V_e) { + size_t v_size = gradient->size / 2, d_size = V_g->size1; + size_t v; - gsl_vector *vec_v=gsl_vector_alloc (v_size*2); + gsl_vector *vec_v = gsl_vector_alloc(v_size * 2); - double d; + double d; - // Vectorize Vg and Ve. - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<d_size; j++) { - if (j<i) {continue;} - v=GetIndex(i, j, d_size); + // Vectorize Vg and Ve. + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j < d_size; j++) { + if (j < i) { + continue; + } + v = GetIndex(i, j, d_size); - d=gsl_matrix_get (V_g, i, j); - gsl_vector_set (vec_v, v, d); + d = gsl_matrix_get(V_g, i, j); + gsl_vector_set(vec_v, v, d); - d=gsl_matrix_get (V_e, i, j); - gsl_vector_set (vec_v, v+v_size, d); - } - } + d = gsl_matrix_get(V_e, i, j); + gsl_vector_set(vec_v, v + v_size, d); + } + } - gsl_blas_dgemv (CblasNoTrans, -1.0*step_scale, Hessian_inv, - gradient, 1.0, vec_v); + gsl_blas_dgemv(CblasNoTrans, -1.0 * step_scale, Hessian_inv, gradient, 1.0, + vec_v); - // Save Vg and Ve. - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<d_size; j++) { - if (j<i) {continue;} - v=GetIndex(i, j, d_size); + // Save Vg and Ve. + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j < d_size; j++) { + if (j < i) { + continue; + } + v = GetIndex(i, j, d_size); - d=gsl_vector_get (vec_v, v); - gsl_matrix_set (V_g, i, j, d); - gsl_matrix_set (V_g, j, i, d); + d = gsl_vector_get(vec_v, v); + gsl_matrix_set(V_g, i, j, d); + gsl_matrix_set(V_g, j, i, d); - d=gsl_vector_get (vec_v, v+v_size); - gsl_matrix_set (V_e, i, j, d); - gsl_matrix_set (V_e, j, i, d); - } - } + d = gsl_vector_get(vec_v, v + v_size); + gsl_matrix_set(V_e, i, j, d); + gsl_matrix_set(V_e, j, i, d); + } + } - gsl_vector_free(vec_v); + gsl_vector_free(vec_v); - return; + return; } -double MphNR (const char func_name, const size_t max_iter, - const double max_prec, const gsl_vector *eval, - const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *Hi_all, - gsl_matrix *xHi_all, gsl_matrix *Hiy_all, gsl_matrix *V_g, - gsl_matrix *V_e, gsl_matrix *Hessian_inv, double &crt_a, - double &crt_b, double &crt_c) { - if (func_name!='R' && func_name!='L' && func_name!='r' && - func_name!='l') { - cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted "<< - "likelihood, 'L' for log-likelihood."<<endl; - return 0.0; - } - size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1; - size_t dc_size=d_size*c_size; - size_t v_size=d_size*(d_size+1)/2; - - double logdet_H, logdet_Q, yPy, logl_const; - double logl_old=0.0, logl_new=0.0, step_scale; - int sig; - size_t step_iter, flag_pd; - - gsl_matrix *Vg_save=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *Ve_save=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_temp=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *U_temp=gsl_matrix_alloc (d_size, d_size); - gsl_vector *D_temp=gsl_vector_alloc (d_size); - gsl_vector *xHiy=gsl_vector_alloc (dc_size); - gsl_vector *QixHiy=gsl_vector_alloc (dc_size); - gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size); - gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size); - - gsl_vector *gradient=gsl_vector_alloc (v_size*2); - - // Calculate |XXt| and (XXt)^{-1}. - gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt); - for (size_t i=0; i<c_size; ++i) { - for (size_t j=0; j<i; ++j) { - gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i)); - } - } - - gsl_permutation * pmt=gsl_permutation_alloc (c_size); - LUDecomp (XXt, pmt, &sig); - gsl_permutation_free (pmt); - - // Calculate the constant for logl. - if (func_name=='R' || func_name=='r') { - logl_const=-0.5*(double)(n_size-c_size) * - (double)d_size*log(2.0*M_PI) + - 0.5*(double)d_size*LULndet (XXt); - } else { - logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI); - } - - // Optimization iterations. - for (size_t t=0; t<max_iter; t++) { - gsl_matrix_memcpy (Vg_save, V_g); - gsl_matrix_memcpy (Ve_save, V_e); - - step_scale=1.0; step_iter=0; - do { - gsl_matrix_memcpy (V_g, Vg_save); - gsl_matrix_memcpy (V_e, Ve_save); - - // Update Vg, Ve, and invert Hessian. - if (t!=0) { - UpdateVgVe (Hessian_inv, gradient, step_scale, V_g, V_e); - } - - // Check if both Vg and Ve are positive definite. - flag_pd=1; - gsl_matrix_memcpy (V_temp, V_e); - EigenDecomp(V_temp, U_temp, D_temp, 0); - for (size_t i=0; i<d_size; i++) { - if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;} - } - gsl_matrix_memcpy (V_temp, V_g); - EigenDecomp(V_temp, U_temp, D_temp, 0); - for (size_t i=0; i<d_size; i++) { - if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;} - } - - // If flag_pd==1, continue to calculate quantities - // and logl. - if (flag_pd==1) { - CalcHiQi(eval,X,V_g,V_e,Hi_all,Qi,logdet_H,logdet_Q); - Calc_Hiy_all (Y, Hi_all, Hiy_all); - Calc_xHi_all (X, Hi_all, xHi_all); - - // Calculate QixHiy and yPy. - Calc_xHiy (Y, xHi_all, xHiy); - gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy); - - gsl_blas_ddot (QixHiy, xHiy, &yPy); - yPy=Calc_yHiy (Y, Hiy_all)-yPy; - - // Calculate log likelihood/restricted likelihood value. - if (func_name=='R' || func_name=='r') { - logl_new=logl_const-0.5*logdet_H-0.5*logdet_Q-0.5*yPy; - } else { - logl_new=logl_const-0.5*logdet_H-0.5*yPy; - } - } - - step_scale/=2.0; - step_iter++; - - } while ( (flag_pd==0 || logl_new<logl_old || - logl_new-logl_old>10 ) && step_iter<10 && t!=0); - - // Terminate if change is small. - if (t!=0) { - if (logl_new<logl_old || flag_pd==0) { - gsl_matrix_memcpy (V_g, Vg_save); - gsl_matrix_memcpy (V_e, Ve_save); - break; - } - - if (logl_new-logl_old<max_prec) { - break; - } - } - - logl_old=logl_new; - - CalcDev (func_name, eval, Qi, Hi_all, xHi_all, Hiy_all, - QixHiy, gradient, Hessian_inv, crt_a, crt_b, crt_c); - } - - // Mutiply Hessian_inv with -1.0. - // Now Hessian_inv is the variance matrix. - gsl_matrix_scale (Hessian_inv, -1.0); - - gsl_matrix_free(Vg_save); - gsl_matrix_free(Ve_save); - gsl_matrix_free(V_temp); - gsl_matrix_free(U_temp); - gsl_vector_free(D_temp); - gsl_vector_free(xHiy); - gsl_vector_free(QixHiy); - - gsl_matrix_free(Qi); - gsl_matrix_free(XXt); - - gsl_vector_free(gradient); - - return logl_new; +double MphNR(const char func_name, const size_t max_iter, const double max_prec, + const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y, + gsl_matrix *Hi_all, gsl_matrix *xHi_all, gsl_matrix *Hiy_all, + gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *Hessian_inv, + double &crt_a, double &crt_b, double &crt_c) { + if (func_name != 'R' && func_name != 'L' && func_name != 'r' && + func_name != 'l') { + cout << "func_name only takes 'R' or 'L': 'R' for log-restricted " + << "likelihood, 'L' for log-likelihood." << endl; + return 0.0; + } + size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1; + size_t dc_size = d_size * c_size; + size_t v_size = d_size * (d_size + 1) / 2; + + double logdet_H, logdet_Q, yPy, logl_const; + double logl_old = 0.0, logl_new = 0.0, step_scale; + int sig; + size_t step_iter, flag_pd; + + gsl_matrix *Vg_save = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *Ve_save = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_temp = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *U_temp = gsl_matrix_alloc(d_size, d_size); + gsl_vector *D_temp = gsl_vector_alloc(d_size); + gsl_vector *xHiy = gsl_vector_alloc(dc_size); + gsl_vector *QixHiy = gsl_vector_alloc(dc_size); + gsl_matrix *Qi = gsl_matrix_alloc(dc_size, dc_size); + gsl_matrix *XXt = gsl_matrix_alloc(c_size, c_size); + + gsl_vector *gradient = gsl_vector_alloc(v_size * 2); + + // Calculate |XXt| and (XXt)^{-1}. + gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt); + for (size_t i = 0; i < c_size; ++i) { + for (size_t j = 0; j < i; ++j) { + gsl_matrix_set(XXt, i, j, gsl_matrix_get(XXt, j, i)); + } + } + + gsl_permutation *pmt = gsl_permutation_alloc(c_size); + LUDecomp(XXt, pmt, &sig); + gsl_permutation_free(pmt); + + // Calculate the constant for logl. + if (func_name == 'R' || func_name == 'r') { + logl_const = + -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) + + 0.5 * (double)d_size * LULndet(XXt); + } else { + logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI); + } + + // Optimization iterations. + for (size_t t = 0; t < max_iter; t++) { + gsl_matrix_memcpy(Vg_save, V_g); + gsl_matrix_memcpy(Ve_save, V_e); + + step_scale = 1.0; + step_iter = 0; + do { + gsl_matrix_memcpy(V_g, Vg_save); + gsl_matrix_memcpy(V_e, Ve_save); + + // Update Vg, Ve, and invert Hessian. + if (t != 0) { + UpdateVgVe(Hessian_inv, gradient, step_scale, V_g, V_e); + } + + // Check if both Vg and Ve are positive definite. + flag_pd = 1; + gsl_matrix_memcpy(V_temp, V_e); + EigenDecomp(V_temp, U_temp, D_temp, 0); + for (size_t i = 0; i < d_size; i++) { + if (gsl_vector_get(D_temp, i) <= 0) { + flag_pd = 0; + } + } + gsl_matrix_memcpy(V_temp, V_g); + EigenDecomp(V_temp, U_temp, D_temp, 0); + for (size_t i = 0; i < d_size; i++) { + if (gsl_vector_get(D_temp, i) <= 0) { + flag_pd = 0; + } + } + + // If flag_pd==1, continue to calculate quantities + // and logl. + if (flag_pd == 1) { + CalcHiQi(eval, X, V_g, V_e, Hi_all, Qi, logdet_H, logdet_Q); + Calc_Hiy_all(Y, Hi_all, Hiy_all); + Calc_xHi_all(X, Hi_all, xHi_all); + + // Calculate QixHiy and yPy. + Calc_xHiy(Y, xHi_all, xHiy); + gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy); + + gsl_blas_ddot(QixHiy, xHiy, &yPy); + yPy = Calc_yHiy(Y, Hiy_all) - yPy; + + // Calculate log likelihood/restricted likelihood value. + if (func_name == 'R' || func_name == 'r') { + logl_new = logl_const - 0.5 * logdet_H - 0.5 * logdet_Q - 0.5 * yPy; + } else { + logl_new = logl_const - 0.5 * logdet_H - 0.5 * yPy; + } + } + + step_scale /= 2.0; + step_iter++; + + } while ( + (flag_pd == 0 || logl_new < logl_old || logl_new - logl_old > 10) && + step_iter < 10 && t != 0); + + // Terminate if change is small. + if (t != 0) { + if (logl_new < logl_old || flag_pd == 0) { + gsl_matrix_memcpy(V_g, Vg_save); + gsl_matrix_memcpy(V_e, Ve_save); + break; + } + + if (logl_new - logl_old < max_prec) { + break; + } + } + + logl_old = logl_new; + + CalcDev(func_name, eval, Qi, Hi_all, xHi_all, Hiy_all, QixHiy, gradient, + Hessian_inv, crt_a, crt_b, crt_c); + } + + // Mutiply Hessian_inv with -1.0. + // Now Hessian_inv is the variance matrix. + gsl_matrix_scale(Hessian_inv, -1.0); + + gsl_matrix_free(Vg_save); + gsl_matrix_free(Ve_save); + gsl_matrix_free(V_temp); + gsl_matrix_free(U_temp); + gsl_vector_free(D_temp); + gsl_vector_free(xHiy); + gsl_vector_free(QixHiy); + + gsl_matrix_free(Qi); + gsl_matrix_free(XXt); + + gsl_vector_free(gradient); + + return logl_new; } // Initialize Vg, Ve and B. void MphInitial(const size_t em_iter, const double em_prec, - const size_t nr_iter, const double nr_prec, - const gsl_vector *eval, const gsl_matrix *X, - const gsl_matrix *Y, const double l_min, const double l_max, - const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e, - gsl_matrix *B) { - - gsl_matrix_set_zero (V_g); - gsl_matrix_set_zero (V_e); - gsl_matrix_set_zero (B); - - size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1; - double a, b, c; - double lambda, logl, vg, ve; - - // Initialize the diagonal elements of Vg and Ve using univariate - // LMM and REML estimates. - gsl_matrix *Xt=gsl_matrix_alloc (n_size, c_size); - gsl_vector *beta_temp=gsl_vector_alloc(c_size); - gsl_vector *se_beta_temp=gsl_vector_alloc(c_size); - - gsl_matrix_transpose_memcpy (Xt, X); - - for (size_t i=0; i<d_size; i++) { - gsl_vector_const_view Y_row=gsl_matrix_const_row (Y, i); - CalcLambda ('R', eval, Xt, &Y_row.vector, l_min, l_max, - n_region, lambda, logl); - CalcLmmVgVeBeta (eval, Xt, &Y_row.vector, lambda, vg, ve, - beta_temp, se_beta_temp); - - gsl_matrix_set(V_g, i, i, vg); - gsl_matrix_set(V_e, i, i, ve); - } - - gsl_matrix_free (Xt); - gsl_vector_free (beta_temp); - gsl_vector_free (se_beta_temp); - - // If number of phenotypes is above four, then obtain the off - // diagonal elements with two trait models. - if (d_size>4) { - - // First obtain good initial values. - // Large matrices for EM. - gsl_matrix *U_hat=gsl_matrix_alloc (2, n_size); - gsl_matrix *E_hat=gsl_matrix_alloc (2, n_size); - gsl_matrix *OmegaU=gsl_matrix_alloc (2, n_size); - gsl_matrix *OmegaE=gsl_matrix_alloc (2, n_size); - gsl_matrix *UltVehiY=gsl_matrix_alloc (2, n_size); - gsl_matrix *UltVehiBX=gsl_matrix_alloc (2, n_size); - gsl_matrix *UltVehiU=gsl_matrix_alloc (2, n_size); - gsl_matrix *UltVehiE=gsl_matrix_alloc (2, n_size); - - // Large matrices for NR. Each dxd block is H_k^{-1}. - gsl_matrix *Hi_all=gsl_matrix_alloc (2, 2*n_size); - - // Each column is H_k^{-1}y_k. - gsl_matrix *Hiy_all=gsl_matrix_alloc (2, n_size); - - // Each dcxdc block is x_k\otimes H_k^{-1}. - gsl_matrix *xHi_all=gsl_matrix_alloc (2*c_size, 2*n_size); - gsl_matrix *Hessian=gsl_matrix_alloc (6, 6); - - // 2 by n matrix of Y. - gsl_matrix *Y_sub=gsl_matrix_alloc (2, n_size); - gsl_matrix *Vg_sub=gsl_matrix_alloc (2, 2); - gsl_matrix *Ve_sub=gsl_matrix_alloc (2, 2); - gsl_matrix *B_sub=gsl_matrix_alloc (2, c_size); - - for (size_t i=0; i<d_size; i++) { - gsl_vector_view Y_sub1=gsl_matrix_row (Y_sub, 0); - gsl_vector_const_view Y_1=gsl_matrix_const_row (Y, i); - gsl_vector_memcpy (&Y_sub1.vector, &Y_1.vector); - - for (size_t j=i+1; j<d_size; j++) { - gsl_vector_view Y_sub2=gsl_matrix_row (Y_sub, 1); - gsl_vector_const_view Y_2=gsl_matrix_const_row (Y, j); - gsl_vector_memcpy (&Y_sub2.vector, &Y_2.vector); - - gsl_matrix_set_zero (Vg_sub); - gsl_matrix_set_zero (Ve_sub); - gsl_matrix_set (Vg_sub, 0, 0, gsl_matrix_get (V_g, i, i)); - gsl_matrix_set (Ve_sub, 0, 0, gsl_matrix_get (V_e, i, i)); - gsl_matrix_set (Vg_sub, 1, 1, gsl_matrix_get (V_g, j, j)); - gsl_matrix_set (Ve_sub, 1, 1, gsl_matrix_get (V_e, j, j)); - - logl=MphEM ('R', em_iter, em_prec, eval, X, Y_sub, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, - UltVehiU, UltVehiE, Vg_sub, Ve_sub, B_sub); - logl=MphNR ('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all, - xHi_all, Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c); - - gsl_matrix_set(V_g, i, j, gsl_matrix_get (Vg_sub, 0, 1)); - gsl_matrix_set(V_g, j, i, gsl_matrix_get (Vg_sub, 0, 1)); - - gsl_matrix_set(V_e, i, j, ve=gsl_matrix_get (Ve_sub, 0, 1)); - gsl_matrix_set(V_e, j, i, ve=gsl_matrix_get (Ve_sub, 0, 1)); - } - } - - // Free matrices. - gsl_matrix_free(U_hat); - gsl_matrix_free(E_hat); - gsl_matrix_free(OmegaU); - gsl_matrix_free(OmegaE); - gsl_matrix_free(UltVehiY); - gsl_matrix_free(UltVehiBX); - gsl_matrix_free(UltVehiU); - gsl_matrix_free(UltVehiE); - - gsl_matrix_free(Hi_all); - gsl_matrix_free(Hiy_all); - gsl_matrix_free(xHi_all); - gsl_matrix_free(Hessian); - - gsl_matrix_free(Y_sub); - gsl_matrix_free(Vg_sub); - gsl_matrix_free(Ve_sub); - gsl_matrix_free(B_sub); - } - - // Calculate B hat using GSL estimate. - gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); - - gsl_vector *D_l=gsl_vector_alloc (d_size); - gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *Qi=gsl_matrix_alloc (d_size*c_size, d_size*c_size); - gsl_vector *XHiy=gsl_vector_alloc (d_size*c_size); - gsl_vector *beta=gsl_vector_alloc (d_size*c_size); - - gsl_vector_set_zero (XHiy); - - double logdet_Ve, logdet_Q, dl, d, delta, dx, dy; - - // Eigen decomposition and calculate log|Ve|. - logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi); - - // Calculate Qi and log|Q|. - logdet_Q=CalcQi (eval, D_l, X, Qi); - - // Calculate UltVehiY. - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,UltVehi,Y,0.0,UltVehiY); - - //calculate XHiy - for (size_t i=0; i<d_size; i++) { - dl=gsl_vector_get(D_l, i); - - for (size_t j=0; j<c_size; j++) { - d=0.0; - for (size_t k=0; k<n_size; k++) { - delta=gsl_vector_get(eval, k); - dx=gsl_matrix_get(X, j, k); - dy=gsl_matrix_get(UltVehiY, i, k); - d+=dy*dx/(delta*dl+1.0); - } - gsl_vector_set(XHiy, j*d_size+i, d); - } - } - - gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta); - - // Multiply beta by UltVeh and save to B. - for (size_t i=0; i<c_size; i++) { - gsl_vector_view B_col=gsl_matrix_column (B, i); - gsl_vector_view beta_sub=gsl_vector_subvector(beta,i*d_size,d_size); - gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0, - &B_col.vector); - } - - // Free memory. - gsl_matrix_free(UltVehiY); - - gsl_vector_free(D_l); - gsl_matrix_free(UltVeh); - gsl_matrix_free(UltVehi); - gsl_matrix_free(Qi); - gsl_vector_free(XHiy); - gsl_vector_free(beta); - - return; + const size_t nr_iter, const double nr_prec, + const gsl_vector *eval, const gsl_matrix *X, + const gsl_matrix *Y, const double l_min, const double l_max, + const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e, + gsl_matrix *B) { + + gsl_matrix_set_zero(V_g); + gsl_matrix_set_zero(V_e); + gsl_matrix_set_zero(B); + + size_t n_size = eval->size, c_size = X->size1, d_size = Y->size1; + double a, b, c; + double lambda, logl, vg, ve; + + // Initialize the diagonal elements of Vg and Ve using univariate + // LMM and REML estimates. + gsl_matrix *Xt = gsl_matrix_alloc(n_size, c_size); + gsl_vector *beta_temp = gsl_vector_alloc(c_size); + gsl_vector *se_beta_temp = gsl_vector_alloc(c_size); + + gsl_matrix_transpose_memcpy(Xt, X); + + for (size_t i = 0; i < d_size; i++) { + gsl_vector_const_view Y_row = gsl_matrix_const_row(Y, i); + CalcLambda('R', eval, Xt, &Y_row.vector, l_min, l_max, n_region, lambda, + logl); + CalcLmmVgVeBeta(eval, Xt, &Y_row.vector, lambda, vg, ve, beta_temp, + se_beta_temp); + + gsl_matrix_set(V_g, i, i, vg); + gsl_matrix_set(V_e, i, i, ve); + } + + gsl_matrix_free(Xt); + gsl_vector_free(beta_temp); + gsl_vector_free(se_beta_temp); + + // If number of phenotypes is above four, then obtain the off + // diagonal elements with two trait models. + if (d_size > 4) { + + // First obtain good initial values. + // Large matrices for EM. + gsl_matrix *U_hat = gsl_matrix_alloc(2, n_size); + gsl_matrix *E_hat = gsl_matrix_alloc(2, n_size); + gsl_matrix *OmegaU = gsl_matrix_alloc(2, n_size); + gsl_matrix *OmegaE = gsl_matrix_alloc(2, n_size); + gsl_matrix *UltVehiY = gsl_matrix_alloc(2, n_size); + gsl_matrix *UltVehiBX = gsl_matrix_alloc(2, n_size); + gsl_matrix *UltVehiU = gsl_matrix_alloc(2, n_size); + gsl_matrix *UltVehiE = gsl_matrix_alloc(2, n_size); + + // Large matrices for NR. Each dxd block is H_k^{-1}. + gsl_matrix *Hi_all = gsl_matrix_alloc(2, 2 * n_size); + + // Each column is H_k^{-1}y_k. + gsl_matrix *Hiy_all = gsl_matrix_alloc(2, n_size); + + // Each dcxdc block is x_k\otimes H_k^{-1}. + gsl_matrix *xHi_all = gsl_matrix_alloc(2 * c_size, 2 * n_size); + gsl_matrix *Hessian = gsl_matrix_alloc(6, 6); + + // 2 by n matrix of Y. + gsl_matrix *Y_sub = gsl_matrix_alloc(2, n_size); + gsl_matrix *Vg_sub = gsl_matrix_alloc(2, 2); + gsl_matrix *Ve_sub = gsl_matrix_alloc(2, 2); + gsl_matrix *B_sub = gsl_matrix_alloc(2, c_size); + + for (size_t i = 0; i < d_size; i++) { + gsl_vector_view Y_sub1 = gsl_matrix_row(Y_sub, 0); + gsl_vector_const_view Y_1 = gsl_matrix_const_row(Y, i); + gsl_vector_memcpy(&Y_sub1.vector, &Y_1.vector); + + for (size_t j = i + 1; j < d_size; j++) { + gsl_vector_view Y_sub2 = gsl_matrix_row(Y_sub, 1); + gsl_vector_const_view Y_2 = gsl_matrix_const_row(Y, j); + gsl_vector_memcpy(&Y_sub2.vector, &Y_2.vector); + + gsl_matrix_set_zero(Vg_sub); + gsl_matrix_set_zero(Ve_sub); + gsl_matrix_set(Vg_sub, 0, 0, gsl_matrix_get(V_g, i, i)); + gsl_matrix_set(Ve_sub, 0, 0, gsl_matrix_get(V_e, i, i)); + gsl_matrix_set(Vg_sub, 1, 1, gsl_matrix_get(V_g, j, j)); + gsl_matrix_set(Ve_sub, 1, 1, gsl_matrix_get(V_e, j, j)); + + logl = MphEM('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, + Vg_sub, Ve_sub, B_sub); + logl = MphNR('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all, xHi_all, + Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c); + + gsl_matrix_set(V_g, i, j, gsl_matrix_get(Vg_sub, 0, 1)); + gsl_matrix_set(V_g, j, i, gsl_matrix_get(Vg_sub, 0, 1)); + + gsl_matrix_set(V_e, i, j, ve = gsl_matrix_get(Ve_sub, 0, 1)); + gsl_matrix_set(V_e, j, i, ve = gsl_matrix_get(Ve_sub, 0, 1)); + } + } + + // Free matrices. + gsl_matrix_free(U_hat); + gsl_matrix_free(E_hat); + gsl_matrix_free(OmegaU); + gsl_matrix_free(OmegaE); + gsl_matrix_free(UltVehiY); + gsl_matrix_free(UltVehiBX); + gsl_matrix_free(UltVehiU); + gsl_matrix_free(UltVehiE); + + gsl_matrix_free(Hi_all); + gsl_matrix_free(Hiy_all); + gsl_matrix_free(xHi_all); + gsl_matrix_free(Hessian); + + gsl_matrix_free(Y_sub); + gsl_matrix_free(Vg_sub); + gsl_matrix_free(Ve_sub); + gsl_matrix_free(B_sub); + } + + // Calculate B hat using GSL estimate. + gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size); + + gsl_vector *D_l = gsl_vector_alloc(d_size); + gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *UltVehi = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *Qi = gsl_matrix_alloc(d_size * c_size, d_size * c_size); + gsl_vector *XHiy = gsl_vector_alloc(d_size * c_size); + gsl_vector *beta = gsl_vector_alloc(d_size * c_size); + + gsl_vector_set_zero(XHiy); + + double logdet_Ve, logdet_Q, dl, d, delta, dx, dy; + + // Eigen decomposition and calculate log|Ve|. + logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi); + + // Calculate Qi and log|Q|. + logdet_Q = CalcQi(eval, D_l, X, Qi); + + // Calculate UltVehiY. + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY); + + // calculate XHiy + for (size_t i = 0; i < d_size; i++) { + dl = gsl_vector_get(D_l, i); + + for (size_t j = 0; j < c_size; j++) { + d = 0.0; + for (size_t k = 0; k < n_size; k++) { + delta = gsl_vector_get(eval, k); + dx = gsl_matrix_get(X, j, k); + dy = gsl_matrix_get(UltVehiY, i, k); + d += dy * dx / (delta * dl + 1.0); + } + gsl_vector_set(XHiy, j * d_size + i, d); + } + } + + gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, XHiy, 0.0, beta); + + // Multiply beta by UltVeh and save to B. + for (size_t i = 0; i < c_size; i++) { + gsl_vector_view B_col = gsl_matrix_column(B, i); + gsl_vector_view beta_sub = gsl_vector_subvector(beta, i * d_size, d_size); + gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0, + &B_col.vector); + } + + // Free memory. + gsl_matrix_free(UltVehiY); + + gsl_vector_free(D_l); + gsl_matrix_free(UltVeh); + gsl_matrix_free(UltVehi); + gsl_matrix_free(Qi); + gsl_vector_free(XHiy); + gsl_vector_free(beta); + + return; } // p-value correction // mode=1 Wald; mode=2 LRT; mode=3 SCORE; -double PCRT (const size_t mode, const size_t d_size, const double p_value, - const double crt_a, const double crt_b, const double crt_c) { - double p_crt=0.0, chisq_crt=0.0, q=(double)d_size; - double chisq=gsl_cdf_chisq_Qinv(p_value, (double)d_size ); - - if (mode==1) { - double a=crt_c/(2.0*q*(q+2.0)); - double b=1.0+(crt_a+crt_b)/(2.0*q); - chisq_crt=(-1.0*b+sqrt(b*b+4.0*a*chisq))/(2.0*a); - } else if (mode==2) { - chisq_crt=chisq/(1.0+crt_a/(2.0*q) ); - } else { - chisq_crt=chisq; - } - - p_crt=gsl_cdf_chisq_Q (chisq_crt, (double)d_size ); - - return p_crt; +double PCRT(const size_t mode, const size_t d_size, const double p_value, + const double crt_a, const double crt_b, const double crt_c) { + double p_crt = 0.0, chisq_crt = 0.0, q = (double)d_size; + double chisq = gsl_cdf_chisq_Qinv(p_value, (double)d_size); + + if (mode == 1) { + double a = crt_c / (2.0 * q * (q + 2.0)); + double b = 1.0 + (crt_a + crt_b) / (2.0 * q); + chisq_crt = (-1.0 * b + sqrt(b * b + 4.0 * a * chisq)) / (2.0 * a); + } else if (mode == 2) { + chisq_crt = chisq / (1.0 + crt_a / (2.0 * q)); + } else { + chisq_crt = chisq; + } + + p_crt = gsl_cdf_chisq_Q(chisq_crt, (double)d_size); + + return p_crt; } // WJA added. -void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY) { - string file_bgen=file_oxford+".bgen"; - ifstream infile (file_bgen.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bgen file:"<<file_bgen<<endl; - return; - } - - clock_t time_start=clock(); - time_UtX=0; time_opt=0; - - string line; - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0; - double crt_a, crt_b, crt_c; - int n_miss, c_phen; - double geno, x_mean; - size_t c=0; - size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2; - - size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2; - - // Large matrices for EM. - gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size); - - // Large matrices for NR. Each dxd block is H_k^{-1}. - gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); - - // Each column is H_k^{-1}y_k. - gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); - - // Each dcxdc block is x_k\otimes H_k^{-1}. - gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); - gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); - gsl_vector *x=gsl_vector_alloc (n_size); - gsl_vector *x_miss=gsl_vector_alloc (n_size); - - gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size); - gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1); - gsl_vector *beta=gsl_vector_alloc (d_size); - gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size); - - // Null estimates for initial values. - gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1); - gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size); - - gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size); - gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size); - gsl_matrix_view xHi_all_sub = - gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size); - - gsl_matrix_transpose_memcpy (Y, UtY); - - gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW); - - gsl_vector_view X_row=gsl_matrix_row(X, c_size); - gsl_vector_set_zero(&X_row.vector); - gsl_vector_view B_col=gsl_matrix_column(B, c_size); - gsl_vector_set_zero(&B_col.vector); - - MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, - Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix); - logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub.matrix); - logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, - Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, - &B_sub.matrix, se_B_null); - - c=0; - Vg_remle_null.clear(); - Ve_remle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, - c+v_size) ); - c++; - } - } - beta_remle_null.clear(); - se_beta_remle_null.clear(); - for (size_t i=0; i<se_B_null->size1; i++) { - for (size_t j=0; j<se_B_null->size2; j++) { - beta_remle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) ); - } - } - logl_remle_H0=logl_H0; - - cout.setf(std::ios_base::fixed, std::ios_base::floatfield); - cout.precision(4); - - cout<<"REMLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE likelihood = "<<logl_H0<<endl; - - - logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub.matrix); - logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, - Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, - &B_sub.matrix, se_B_null); - - c=0; - Vg_mle_null.clear(); - Ve_mle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size)); - c++; - } - } - beta_mle_null.clear(); - se_beta_mle_null.clear(); - for (size_t i=0; i<se_B_null->size1; i++) { - for (size_t j=0; j<se_B_null->size2; j++) { - beta_mle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) ); - } - } - logl_mle_H0=logl_H0; - - cout<<"MLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"MLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"MLE likelihood = "<<logl_H0<<endl; - - - vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; - for (size_t i=0; i<d_size; i++) { - v_beta.push_back(0.0); - } - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg.push_back(0.0); - v_Ve.push_back(0.0); - v_Vbeta.push_back(0.0); - } - } - - gsl_matrix_memcpy (V_g_null, V_g); - gsl_matrix_memcpy (V_e_null, V_e); - gsl_matrix_memcpy (B_null, B); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4); - infile.read(reinterpret_cast<char*>(&bgen_header_length),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsnps),4); - bgen_snp_block_offset-=4; - infile.read(reinterpret_cast<char*>(&bgen_nsamples),4); - bgen_snp_block_offset-=4; - infile.ignore(4+bgen_header_length-20); - bgen_snp_block_offset-=4+bgen_header_length-20; - infile.read(reinterpret_cast<char*>(&bgen_flags),4); - bgen_snp_block_offset-=4; - bool CompressedSNPBlocks=bgen_flags&0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB; - double bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - std::cout<<"Warning: WJA hard coded SNP missingness threshold "<< - "of 10%"<<std::endl; - - // Start reading genotypes and analyze. - size_t csnp=0, t_last=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (indicator_snp[t]==0) {continue;} - t_last++; - } - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - if (indicator_snp[t]==0) {continue;} - - // Read SNP header. - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char*>(&bgen_N),4); - infile.read(reinterpret_cast<char*>(&bgen_LS),2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char*>(&bgen_LR),2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char*>(&bgen_LC),2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4); - - infile.read(reinterpret_cast<char*>(&bgen_LA),4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - infile.read(reinterpret_cast<char*>(&bgen_LB),4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3*bgen_N]; - - if (indicator_snp[t]==0) { - if(CompressedSNPBlocks) - infile.read(reinterpret_cast<char*>(&bgen_P),4); - else - bgen_P=6*bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if(CompressedSNPBlocks) { - - infile.read(reinterpret_cast<char*>(&bgen_P),4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size=6*bgen_N; - - infile.read(reinterpret_cast<char*>(zipped_data),bgen_P); - - int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), - reinterpret_cast<uLongf*>(&unzipped_data_size), - reinterpret_cast<Bytef*>(zipped_data), - static_cast<uLong> (bgen_P)); - assert(result == Z_OK); - - } else { - - bgen_P=6*bgen_N; - infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P); - } - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i<bgen_N; ++i) { - if (indicator_idv[i]==0) {continue;} - - bgen_geno_prob_AA = - static_cast<double>(unzipped_data[i*3])/32768.0; - bgen_geno_prob_AB = - static_cast<double>(unzipped_data[i*3+1])/32768.0; - bgen_geno_prob_BB = - static_cast<double>(unzipped_data[i*3+2])/32768.0; - - // WJA. - bgen_geno_prob_non_miss=bgen_geno_prob_AA + - bgen_geno_prob_AB+bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss<0.9) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } - else { - - bgen_geno_prob_AA/=bgen_geno_prob_non_miss; - bgen_geno_prob_AB/=bgen_geno_prob_non_miss; - bgen_geno_prob_BB/=bgen_geno_prob_non_miss; - - geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - - x_mean/=static_cast<double>(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);} - } - - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize); - gsl_vector_memcpy (&Xlarge_col.vector, x); - csnp++; - - if (csnp%msize==0 || csnp==t_last ) { - size_t l=0; - if (csnp%msize==0) {l=msize;} else {l=csnp%msize;} - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_matrix_set_zero (Xlarge); - - for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i); - gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector); - - // Initial values. - gsl_matrix_memcpy (V_g, V_g_null); - gsl_matrix_memcpy (V_e, V_e_null); - gsl_matrix_memcpy (B, B_null); - - time_start=clock(); - - // 3 is before 1. - if (a_mode==3 || a_mode==4) { - p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, - V_g_null, V_e_null, UltVehiY, beta, Vbeta); - if (p_score<p_nr && crt==1) { - logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, - xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, - crt_b, crt_c); - p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c); - } - } - - if (a_mode==2 || a_mode==4) { - logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, - UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size ); - - if (p_lrt<p_nr) { - logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, - Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, - crt_a, crt_b, crt_c); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), - (double)d_size ); - - if (crt==1) { - p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c); - } - } - } - - if (a_mode==1 || a_mode==4) { - logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, - UltVehiU, UltVehiE, V_g, V_e, B); - p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - - if (p_wald<p_nr) { - logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, - Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, - crt_a, crt_b, crt_c); - p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - - if (crt==1) { - p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c); - } - } - } - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - for (size_t i=0; i<d_size; i++) { - v_beta[i]=gsl_vector_get (beta, i); - } - - c=0; - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg[c]=gsl_matrix_get (V_g, i, j); - v_Ve[c]=gsl_matrix_get (V_e, i, j); - v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j); - c++; - } - } - - MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, - v_Vbeta}; - sumStat.push_back(SNPs); - } - } - } - cout<<endl; - - infile.close(); - infile.clear(); - - gsl_matrix_free(U_hat); - gsl_matrix_free(E_hat); - gsl_matrix_free(OmegaU); - gsl_matrix_free(OmegaE); - gsl_matrix_free(UltVehiY); - gsl_matrix_free(UltVehiBX); - gsl_matrix_free(UltVehiU); - gsl_matrix_free(UltVehiE); - - gsl_matrix_free(Hi_all); - gsl_matrix_free(Hiy_all); - gsl_matrix_free(xHi_all); - gsl_matrix_free(Hessian); - - gsl_vector_free(x); - gsl_vector_free(x_miss); - - gsl_matrix_free(Y); - gsl_matrix_free(X); - gsl_matrix_free(V_g); - gsl_matrix_free(V_e); - gsl_matrix_free(B); - gsl_vector_free(beta); - gsl_matrix_free(Vbeta); - - gsl_matrix_free(V_g_null); - gsl_matrix_free(V_e_null); - gsl_matrix_free(B_null); - gsl_matrix_free(se_B_null); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); - - return; +void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY) { + string file_bgen = file_oxford + ".bgen"; + ifstream infile(file_bgen.c_str(), ios::binary); + if (!infile) { + cout << "error reading bgen file:" << file_bgen << endl; + return; + } + + clock_t time_start = clock(); + time_UtX = 0; + time_opt = 0; + + string line; + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix_set_zero(Xlarge); + + double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0; + double crt_a, crt_b, crt_c; + int n_miss, c_phen; + double geno, x_mean; + size_t c = 0; + size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2; + + size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2; + + // Large matrices for EM. + gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size); + + // Large matrices for NR. Each dxd block is H_k^{-1}. + gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size); + + // Each column is H_k^{-1}y_k. + gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size); + + // Each dcxdc block is x_k\otimes H_k^{-1}. + gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size); + gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2); + gsl_vector *x = gsl_vector_alloc(n_size); + gsl_vector *x_miss = gsl_vector_alloc(n_size); + + gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size); + gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1); + gsl_vector *beta = gsl_vector_alloc(d_size); + gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size); + + // Null estimates for initial values. + gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1); + gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size); + + gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size); + gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size); + gsl_matrix_view xHi_all_sub = + gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size); + + gsl_matrix_transpose_memcpy(Y, UtY); + + gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW); + + gsl_vector_view X_row = gsl_matrix_row(X, c_size); + gsl_vector_set_zero(&X_row.vector); + gsl_vector_view B_col = gsl_matrix_column(B, c_size); + gsl_vector_set_zero(&B_col.vector); + + MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, + l_max, n_region, V_g, V_e, &B_sub.matrix); + logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub.matrix); + logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, + &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, + crt_c); + MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, + se_B_null); + + c = 0; + Vg_remle_null.clear(); + Ve_remle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_remle_null.clear(); + se_beta_remle_null.clear(); + for (size_t i = 0; i < se_B_null->size1; i++) { + for (size_t j = 0; j < se_B_null->size2; j++) { + beta_remle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j)); + } + } + logl_remle_H0 = logl_H0; + + cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + cout.precision(4); + + cout << "REMLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "REMLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "REMLE likelihood = " << logl_H0 << endl; + + logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub.matrix); + logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, + &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, + crt_c); + MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, + se_B_null); + + c = 0; + Vg_mle_null.clear(); + Ve_mle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_mle_null.clear(); + se_beta_mle_null.clear(); + for (size_t i = 0; i < se_B_null->size1; i++) { + for (size_t j = 0; j < se_B_null->size2; j++) { + beta_mle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j)); + } + } + logl_mle_H0 = logl_H0; + + cout << "MLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "MLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "MLE likelihood = " << logl_H0 << endl; + + vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; + for (size_t i = 0; i < d_size; i++) { + v_beta.push_back(0.0); + } + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg.push_back(0.0); + v_Ve.push_back(0.0); + v_Vbeta.push_back(0.0); + } + } + + gsl_matrix_memcpy(V_g_null, V_g); + gsl_matrix_memcpy(V_e_null, V_e); + gsl_matrix_memcpy(B_null, B); + + // Read in header. + uint32_t bgen_snp_block_offset; + uint32_t bgen_header_length; + uint32_t bgen_nsamples; + uint32_t bgen_nsnps; + uint32_t bgen_flags; + infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); + infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); + bgen_snp_block_offset -= 4; + infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); + bgen_snp_block_offset -= 4; + infile.ignore(4 + bgen_header_length - 20); + bgen_snp_block_offset -= 4 + bgen_header_length - 20; + infile.read(reinterpret_cast<char *>(&bgen_flags), 4); + bgen_snp_block_offset -= 4; + bool CompressedSNPBlocks = bgen_flags & 0x1; + + infile.ignore(bgen_snp_block_offset); + + double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB; + double bgen_geno_prob_non_miss; + + uint32_t bgen_N; + uint16_t bgen_LS; + uint16_t bgen_LR; + uint16_t bgen_LC; + uint32_t bgen_SNP_pos; + uint32_t bgen_LA; + std::string bgen_A_allele; + uint32_t bgen_LB; + std::string bgen_B_allele; + uint32_t bgen_P; + size_t unzipped_data_size; + string id; + string rs; + string chr; + std::cout << "Warning: WJA hard coded SNP missingness threshold " + << "of 10%" << std::endl; + + // Start reading genotypes and analyze. + size_t csnp = 0, t_last = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (indicator_snp[t] == 0) { + continue; + } + t_last++; + } + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // Read SNP header. + id.clear(); + rs.clear(); + chr.clear(); + bgen_A_allele.clear(); + bgen_B_allele.clear(); + + infile.read(reinterpret_cast<char *>(&bgen_N), 4); + infile.read(reinterpret_cast<char *>(&bgen_LS), 2); + + id.resize(bgen_LS); + infile.read(&id[0], bgen_LS); + + infile.read(reinterpret_cast<char *>(&bgen_LR), 2); + rs.resize(bgen_LR); + infile.read(&rs[0], bgen_LR); + + infile.read(reinterpret_cast<char *>(&bgen_LC), 2); + chr.resize(bgen_LC); + infile.read(&chr[0], bgen_LC); + + infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); + + infile.read(reinterpret_cast<char *>(&bgen_LA), 4); + bgen_A_allele.resize(bgen_LA); + infile.read(&bgen_A_allele[0], bgen_LA); + + infile.read(reinterpret_cast<char *>(&bgen_LB), 4); + bgen_B_allele.resize(bgen_LB); + infile.read(&bgen_B_allele[0], bgen_LB); + + uint16_t unzipped_data[3 * bgen_N]; + + if (indicator_snp[t] == 0) { + if (CompressedSNPBlocks) + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + else + bgen_P = 6 * bgen_N; + + infile.ignore(static_cast<size_t>(bgen_P)); + + continue; + } + + if (CompressedSNPBlocks) { + + infile.read(reinterpret_cast<char *>(&bgen_P), 4); + uint8_t zipped_data[bgen_P]; + + unzipped_data_size = 6 * bgen_N; + + infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); + + int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), + reinterpret_cast<uLongf *>(&unzipped_data_size), + reinterpret_cast<Bytef *>(zipped_data), + static_cast<uLong>(bgen_P)); + assert(result == Z_OK); + + } else { + + bgen_P = 6 * bgen_N; + infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); + } + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + gsl_vector_set_zero(x_miss); + for (size_t i = 0; i < bgen_N; ++i) { + if (indicator_idv[i] == 0) { + continue; + } + + bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; + bgen_geno_prob_AB = + static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; + bgen_geno_prob_BB = + static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; + + // WJA. + bgen_geno_prob_non_miss = + bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; + if (bgen_geno_prob_non_miss < 0.9) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + + bgen_geno_prob_AA /= bgen_geno_prob_non_miss; + bgen_geno_prob_AB /= bgen_geno_prob_non_miss; + bgen_geno_prob_BB /= bgen_geno_prob_non_miss; + + geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + + x_mean /= static_cast<double>(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean); + } + } + + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize); + gsl_vector_memcpy(&Xlarge_col.vector, x); + csnp++; + + if (csnp % msize == 0 || csnp == t_last) { + size_t l = 0; + if (csnp % msize == 0) { + l = msize; + } else { + l = csnp % msize; + } + + gsl_matrix_view Xlarge_sub = + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub = + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + + time_start = clock(); + eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, + &UtXlarge_sub.matrix); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Xlarge); + + for (size_t i = 0; i < l; i++) { + gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); + gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector); + + // Initial values. + gsl_matrix_memcpy(V_g, V_g_null); + gsl_matrix_memcpy(V_e, V_e_null); + gsl_matrix_memcpy(B, B_null); + + time_start = clock(); + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, + V_e_null, UltVehiY, beta, Vbeta); + if (p_score < p_nr && crt == 1) { + logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c); + } + } + + if (a_mode == 2 || a_mode == 4) { + logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, + E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, + UltVehiE, V_g, V_e, B); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + + if (p_lrt < p_nr) { + logl_H1 = + MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, + xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + + if (crt == 1) { + p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c); + } + } + } + + if (a_mode == 1 || a_mode == 4) { + logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, + E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, + UltVehiE, V_g, V_e, B); + p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (p_wald < p_nr) { + logl_H1 = + MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, + xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (crt == 1) { + p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c); + } + } + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + for (size_t i = 0; i < d_size; i++) { + v_beta[i] = gsl_vector_get(beta, i); + } + + c = 0; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg[c] = gsl_matrix_get(V_g, i, j); + v_Ve[c] = gsl_matrix_get(V_e, i, j); + v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j); + c++; + } + } + + MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta}; + sumStat.push_back(SNPs); + } + } + } + cout << endl; + + infile.close(); + infile.clear(); + + gsl_matrix_free(U_hat); + gsl_matrix_free(E_hat); + gsl_matrix_free(OmegaU); + gsl_matrix_free(OmegaE); + gsl_matrix_free(UltVehiY); + gsl_matrix_free(UltVehiBX); + gsl_matrix_free(UltVehiU); + gsl_matrix_free(UltVehiE); + + gsl_matrix_free(Hi_all); + gsl_matrix_free(Hiy_all); + gsl_matrix_free(xHi_all); + gsl_matrix_free(Hessian); + + gsl_vector_free(x); + gsl_vector_free(x_miss); + + gsl_matrix_free(Y); + gsl_matrix_free(X); + gsl_matrix_free(V_g); + gsl_matrix_free(V_e); + gsl_matrix_free(B); + gsl_vector_free(beta); + gsl_matrix_free(Vbeta); + + gsl_matrix_free(V_g_null); + gsl_matrix_free(V_e_null); + gsl_matrix_free(B_null); + gsl_matrix_free(se_B_null); + + gsl_matrix_free(Xlarge); + gsl_matrix_free(UtXlarge); + + return; } -void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return; - } - - clock_t time_start=clock(); - time_UtX=0; time_opt=0; - - string line; - char *ch_ptr; - - double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0; - double crt_a, crt_b, crt_c; - int n_miss, c_phen; - double geno, x_mean; - size_t c=0; - size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2; - - size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2; - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - // Large matrices for EM. - gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size); - - // Large matrices for NR. - // Each dxd block is H_k^{-1}. - gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); - - // Each column is H_k^{-1}y_k. - gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); - - // Each dcxdc block is x_k \otimes H_k^{-1}. - gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); - gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); - - gsl_vector *x=gsl_vector_alloc (n_size); - gsl_vector *x_miss=gsl_vector_alloc (n_size); - - gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size); - gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1); - gsl_vector *beta=gsl_vector_alloc (d_size); - gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size); - - // Null estimates for initial values. - gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1); - gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size); - - gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size); - gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size); - gsl_matrix_view xHi_all_sub = - gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size); - - gsl_matrix_transpose_memcpy (Y, UtY); - - gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW); - - gsl_vector_view X_row=gsl_matrix_row(X, c_size); - gsl_vector_set_zero(&X_row.vector); - gsl_vector_view B_col=gsl_matrix_column(B, c_size); - gsl_vector_set_zero(&B_col.vector); - - MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, - Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix); - logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub.matrix); - logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, - &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, - crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, - &B_sub.matrix, se_B_null); - - c=0; - Vg_remle_null.clear(); - Ve_remle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, - c+v_size) ); - c++; - } - } - beta_remle_null.clear(); - se_beta_remle_null.clear(); - for (size_t i=0; i<se_B_null->size1; i++) { - for (size_t j=0; j<se_B_null->size2; j++) { - beta_remle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) ); - } - } - logl_remle_H0=logl_H0; - - cout.setf(std::ios_base::fixed, std::ios_base::floatfield); - cout.precision(4); - - cout<<"REMLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE likelihood = "<<logl_H0<<endl; - - logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub.matrix); - logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, - Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, - &B_sub.matrix, se_B_null); - - c=0; - Vg_mle_null.clear(); - Ve_mle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size)); - c++; - } - } - beta_mle_null.clear(); - se_beta_mle_null.clear(); - for (size_t i=0; i<se_B_null->size1; i++) { - for (size_t j=0; j<se_B_null->size2; j++) { - beta_mle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) ); - } - } - logl_mle_H0=logl_H0; - - cout<<"MLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"MLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"MLE likelihood = "<<logl_H0<<endl; - - vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; - for (size_t i=0; i<d_size; i++) { - v_beta.push_back(0.0); - } - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg.push_back(0.0); - v_Ve.push_back(0.0); - v_Vbeta.push_back(0.0); - } - } - - gsl_matrix_memcpy (V_g_null, V_g); - gsl_matrix_memcpy (V_e_null, V_e); - gsl_matrix_memcpy (B_null, B); - - // Start reading genotypes and analyze. - size_t csnp=0, t_last=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (indicator_snp[t]==0) {continue;} - t_last++; - } - for (size_t t=0; t<indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i<ni_total; ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} - - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } - else { - geno=atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);} - geno=gsl_vector_get(x, i); - } - - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize); - gsl_vector_memcpy (&Xlarge_col.vector, x); - csnp++; - - if (csnp%msize==0 || csnp==t_last ) { - size_t l=0; - if (csnp%msize==0) {l=msize;} else {l=csnp%msize;} - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_matrix_set_zero (Xlarge); - - for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i); - gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector); - - // Initial values. - gsl_matrix_memcpy (V_g, V_g_null); - gsl_matrix_memcpy (V_e, V_e_null); - gsl_matrix_memcpy (B, B_null); - - time_start=clock(); - - // 3 is before 1. - if (a_mode==3 || a_mode==4) { - p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, - V_g_null, V_e_null, UltVehiY, beta, Vbeta); - if (p_score<p_nr && crt==1) { - logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, - xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, - crt_b, crt_c); - p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c); - } - } - - if (a_mode==2 || a_mode==4) { - logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, - UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size ); - - if (p_lrt<p_nr) { - logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, - Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, - crt_a, crt_b, crt_c); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), - (double)d_size ); - - if (crt==1) { - p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c); - } - } - } - - if (a_mode==1 || a_mode==4) { - logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, - UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B); - p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - - if (p_wald<p_nr) { - logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, - Hi_all, xHi_all, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - - if (crt==1) { - p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c); - } - } - } - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - for (size_t i=0; i<d_size; i++) { - v_beta[i]=gsl_vector_get (beta, i); - } - - c=0; - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg[c]=gsl_matrix_get (V_g, i, j); - v_Ve[c]=gsl_matrix_get (V_e, i, j); - v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j); - c++; - } - } - - MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, - v_Ve, v_Vbeta}; - sumStat.push_back(SNPs); - } - } - } - cout<<endl; - - infile.close(); - infile.clear(); - - gsl_matrix_free(U_hat); - gsl_matrix_free(E_hat); - gsl_matrix_free(OmegaU); - gsl_matrix_free(OmegaE); - gsl_matrix_free(UltVehiY); - gsl_matrix_free(UltVehiBX); - gsl_matrix_free(UltVehiU); - gsl_matrix_free(UltVehiE); - - gsl_matrix_free(Hi_all); - gsl_matrix_free(Hiy_all); - gsl_matrix_free(xHi_all); - gsl_matrix_free(Hessian); - - gsl_vector_free(x); - gsl_vector_free(x_miss); - - gsl_matrix_free(Y); - gsl_matrix_free(X); - gsl_matrix_free(V_g); - gsl_matrix_free(V_e); - gsl_matrix_free(B); - gsl_vector_free(beta); - gsl_matrix_free(Vbeta); - - gsl_matrix_free(V_g_null); - gsl_matrix_free(V_e_null); - gsl_matrix_free(B_null); - gsl_matrix_free(se_B_null); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); - - return; +void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return; + } + + clock_t time_start = clock(); + time_UtX = 0; + time_opt = 0; + + string line; + char *ch_ptr; + + double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0; + double crt_a, crt_b, crt_c; + int n_miss, c_phen; + double geno, x_mean; + size_t c = 0; + size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2; + + size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2; + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix_set_zero(Xlarge); + + // Large matrices for EM. + gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size); + + // Large matrices for NR. + // Each dxd block is H_k^{-1}. + gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size); + + // Each column is H_k^{-1}y_k. + gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size); + + // Each dcxdc block is x_k \otimes H_k^{-1}. + gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size); + gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2); + + gsl_vector *x = gsl_vector_alloc(n_size); + gsl_vector *x_miss = gsl_vector_alloc(n_size); + + gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size); + gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1); + gsl_vector *beta = gsl_vector_alloc(d_size); + gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size); + + // Null estimates for initial values. + gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1); + gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size); + + gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size); + gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size); + gsl_matrix_view xHi_all_sub = + gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size); + + gsl_matrix_transpose_memcpy(Y, UtY); + + gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW); + + gsl_vector_view X_row = gsl_matrix_row(X, c_size); + gsl_vector_set_zero(&X_row.vector); + gsl_vector_view B_col = gsl_matrix_column(B, c_size); + gsl_vector_set_zero(&B_col.vector); + + MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, + l_max, n_region, V_g, V_e, &B_sub.matrix); + logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub.matrix); + logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, + &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, + crt_c); + MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, + se_B_null); + + c = 0; + Vg_remle_null.clear(); + Ve_remle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_remle_null.clear(); + se_beta_remle_null.clear(); + for (size_t i = 0; i < se_B_null->size1; i++) { + for (size_t j = 0; j < se_B_null->size2; j++) { + beta_remle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j)); + } + } + logl_remle_H0 = logl_H0; + + cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + cout.precision(4); + + cout << "REMLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "REMLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "REMLE likelihood = " << logl_H0 << endl; + + logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub.matrix); + logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, + &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, + crt_c); + MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, + se_B_null); + + c = 0; + Vg_mle_null.clear(); + Ve_mle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_mle_null.clear(); + se_beta_mle_null.clear(); + for (size_t i = 0; i < se_B_null->size1; i++) { + for (size_t j = 0; j < se_B_null->size2; j++) { + beta_mle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j)); + } + } + logl_mle_H0 = logl_H0; + + cout << "MLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "MLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "MLE likelihood = " << logl_H0 << endl; + + vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; + for (size_t i = 0; i < d_size; i++) { + v_beta.push_back(0.0); + } + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg.push_back(0.0); + v_Ve.push_back(0.0); + v_Vbeta.push_back(0.0); + } + } + + gsl_matrix_memcpy(V_g_null, V_g); + gsl_matrix_memcpy(V_e_null, V_e); + gsl_matrix_memcpy(B_null, B); + + // Start reading genotypes and analyze. + size_t csnp = 0, t_last = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (indicator_snp[t] == 0) { + continue; + } + t_last++; + } + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + gsl_vector_set_zero(x_miss); + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } + + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + geno = atof(ch_ptr); + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean); + } + geno = gsl_vector_get(x, i); + } + + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize); + gsl_vector_memcpy(&Xlarge_col.vector, x); + csnp++; + + if (csnp % msize == 0 || csnp == t_last) { + size_t l = 0; + if (csnp % msize == 0) { + l = msize; + } else { + l = csnp % msize; + } + + gsl_matrix_view Xlarge_sub = + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub = + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + + time_start = clock(); + eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, + &UtXlarge_sub.matrix); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Xlarge); + + for (size_t i = 0; i < l; i++) { + gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); + gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector); + + // Initial values. + gsl_matrix_memcpy(V_g, V_g_null); + gsl_matrix_memcpy(V_e, V_e_null); + gsl_matrix_memcpy(B, B_null); + + time_start = clock(); + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, + V_e_null, UltVehiY, beta, Vbeta); + if (p_score < p_nr && crt == 1) { + logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c); + } + } + + if (a_mode == 2 || a_mode == 4) { + logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, + E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, + UltVehiE, V_g, V_e, B); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + + if (p_lrt < p_nr) { + logl_H1 = + MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, + xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + + if (crt == 1) { + p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c); + } + } + } + + if (a_mode == 1 || a_mode == 4) { + logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, + E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, + UltVehiE, V_g, V_e, B); + p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (p_wald < p_nr) { + logl_H1 = + MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, + xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (crt == 1) { + p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c); + } + } + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + for (size_t i = 0; i < d_size; i++) { + v_beta[i] = gsl_vector_get(beta, i); + } + + c = 0; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg[c] = gsl_matrix_get(V_g, i, j); + v_Ve[c] = gsl_matrix_get(V_e, i, j); + v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j); + c++; + } + } + + MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta}; + sumStat.push_back(SNPs); + } + } + } + cout << endl; + + infile.close(); + infile.clear(); + + gsl_matrix_free(U_hat); + gsl_matrix_free(E_hat); + gsl_matrix_free(OmegaU); + gsl_matrix_free(OmegaE); + gsl_matrix_free(UltVehiY); + gsl_matrix_free(UltVehiBX); + gsl_matrix_free(UltVehiU); + gsl_matrix_free(UltVehiE); + + gsl_matrix_free(Hi_all); + gsl_matrix_free(Hiy_all); + gsl_matrix_free(xHi_all); + gsl_matrix_free(Hessian); + + gsl_vector_free(x); + gsl_vector_free(x_miss); + + gsl_matrix_free(Y); + gsl_matrix_free(X); + gsl_matrix_free(V_g); + gsl_matrix_free(V_e); + gsl_matrix_free(B); + gsl_vector_free(beta); + gsl_matrix_free(Vbeta); + + gsl_matrix_free(V_g_null); + gsl_matrix_free(V_e_null); + gsl_matrix_free(B_null); + gsl_matrix_free(se_B_null); + + gsl_matrix_free(Xlarge); + gsl_matrix_free(UtXlarge); + + return; } -void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY) { - string file_bed=file_bfile+".bed"; - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;} - - clock_t time_start=clock(); - time_UtX=0; time_opt=0; - - char ch[1]; - bitset<8> b; - - double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0; - double crt_a, crt_b, crt_c; - int n_bit, n_miss, ci_total, ci_test; - double geno, x_mean; - size_t c=0; - size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2; - size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2; - - // Create a large matrix. - size_t msize=10000; - gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - // Large matrices for EM. - gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size); - - // Large matrices for NR. - // Each dxd block is H_k^{-1}. - gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); - - // Each column is H_k^{-1}y_k. - gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); - - // Each dcxdc block is x_k\otimes H_k^{-1}. - gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); - - gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); - - gsl_vector *x=gsl_vector_alloc (n_size); - - gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size); - gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1); - gsl_vector *beta=gsl_vector_alloc (d_size); - gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size); - - // Null estimates for initial values. - gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1); - gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size); - - gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size); - gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size); - gsl_matrix_view xHi_all_sub = - gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size); - - gsl_matrix_transpose_memcpy (Y, UtY); - gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW); - - gsl_vector_view X_row=gsl_matrix_row(X, c_size); - gsl_vector_set_zero(&X_row.vector); - gsl_vector_view B_col=gsl_matrix_column(B, c_size); - gsl_vector_set_zero(&B_col.vector); - - MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, - Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix); - - logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub.matrix); - logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, - &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, - crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, - &B_sub.matrix, se_B_null); - - c=0; - Vg_remle_null.clear(); - Ve_remle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size, - c+v_size)); - c++; - } - } - beta_remle_null.clear(); - se_beta_remle_null.clear(); - for (size_t i=0; i<se_B_null->size1; i++) { - for (size_t j=0; j<se_B_null->size2; j++) { - beta_remle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) ); - } - } - logl_remle_H0=logl_H0; - - cout.setf(std::ios_base::fixed, std::ios_base::floatfield); - cout.precision(4); - cout<<"REMLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE likelihood = "<<logl_H0<<endl; - - logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, - UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix); - logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, - Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, - &B_sub.matrix, se_B_null); - - c=0; - Vg_mle_null.clear(); - Ve_mle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size)); - c++; - } - } - beta_mle_null.clear(); - se_beta_mle_null.clear(); - for (size_t i=0; i<se_B_null->size1; i++) { - for (size_t j=0; j<se_B_null->size2; j++) { - beta_mle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) ); - } - } - logl_mle_H0=logl_H0; - - cout<<"MLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"MLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"MLE likelihood = "<<logl_H0<<endl; - - vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; - for (size_t i=0; i<d_size; i++) { - v_beta.push_back(0.0); - } - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg.push_back(0.0); - v_Ve.push_back(0.0); - v_Vbeta.push_back(0.0); - } - } - - gsl_matrix_memcpy (V_g_null, V_g); - gsl_matrix_memcpy (V_e_null, V_e); - gsl_matrix_memcpy (B_null, B); - - // Start reading genotypes and analyze. - // Calculate n_bit and c, the number of bit for each snp. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - size_t csnp=0, t_last=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (indicator_snp[t]==0) {continue;} - t_last++; - } - for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) { - ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - //read genotypes - x_mean=0.0; n_miss=0; ci_total=0; ci_test=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0; - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;} - if (indicator_idv[ci_total]==0) {ci_total++; continue;} - - if (b[2*j]==0) { - if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; } - else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } - } - else { - if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } - else {gsl_vector_set(x, ci_test, -9); n_miss++; } - } - - ci_total++; - ci_test++; - } - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - geno=gsl_vector_get(x,i); - if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;} - } - - gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize); - gsl_vector_memcpy (&Xlarge_col.vector, x); - csnp++; - - if (csnp%msize==0 || csnp==t_last ) { - size_t l=0; - if (csnp%msize==0) {l=msize;} else {l=csnp%msize;} - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start=clock(); - eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_matrix_set_zero (Xlarge); - - for (size_t i=0; i<l; i++) { - gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i); - gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector); - - // Initial values. - gsl_matrix_memcpy (V_g, V_g_null); - gsl_matrix_memcpy (V_e, V_e_null); - gsl_matrix_memcpy (B, B_null); - - time_start=clock(); - - // 3 is before 1. - if (a_mode==3 || a_mode==4) { - p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, - V_g_null, V_e_null, UltVehiY, beta, Vbeta); - - if (p_score<p_nr && crt==1) { - logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, - xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, - crt_b, crt_c); - p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c); - } - } - - if (a_mode==2 || a_mode==4) { - logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, - UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size ); - - if (p_lrt<p_nr) { - logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, - Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, - crt_a, crt_b, crt_c); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), - (double)d_size ); - if (crt==1) { - p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c); - } - } - } - - if (a_mode==1 || a_mode==4) { - logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, - UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B); - p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - - if (p_wald<p_nr) { - logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, - Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, - crt_a, crt_b, crt_c); - p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - - if (crt==1) { - p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c); - } - } - } - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - for (size_t i=0; i<d_size; i++) { - v_beta[i]=gsl_vector_get (beta, i); - } - - c=0; - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg[c]=gsl_matrix_get (V_g, i, j); - v_Ve[c]=gsl_matrix_get (V_e, i, j); - v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j); - c++; - } - } - - MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, - v_Ve, v_Vbeta}; - sumStat.push_back(SNPs); - } - } - } - cout<<endl; - - infile.close(); - infile.clear(); - - gsl_matrix_free(U_hat); - gsl_matrix_free(E_hat); - gsl_matrix_free(OmegaU); - gsl_matrix_free(OmegaE); - gsl_matrix_free(UltVehiY); - gsl_matrix_free(UltVehiBX); - gsl_matrix_free(UltVehiU); - gsl_matrix_free(UltVehiE); - - gsl_matrix_free(Hi_all); - gsl_matrix_free(Hiy_all); - gsl_matrix_free(xHi_all); - gsl_matrix_free(Hessian); - - gsl_vector_free(x); - - gsl_matrix_free(Y); - gsl_matrix_free(X); - gsl_matrix_free(V_g); - gsl_matrix_free(V_e); - gsl_matrix_free(B); - gsl_vector_free(beta); - gsl_matrix_free(Vbeta); - - gsl_matrix_free(V_g_null); - gsl_matrix_free(V_e_null); - gsl_matrix_free(B_null); - gsl_matrix_free(se_B_null); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); - - return; +void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY) { + string file_bed = file_bfile + ".bed"; + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return; + } + + clock_t time_start = clock(); + time_UtX = 0; + time_opt = 0; + + char ch[1]; + bitset<8> b; + + double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0; + double crt_a, crt_b, crt_c; + int n_bit, n_miss, ci_total, ci_test; + double geno, x_mean; + size_t c = 0; + size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2; + size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2; + + // Create a large matrix. + size_t msize = 10000; + gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); + gsl_matrix_set_zero(Xlarge); + + // Large matrices for EM. + gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size); + + // Large matrices for NR. + // Each dxd block is H_k^{-1}. + gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size); + + // Each column is H_k^{-1}y_k. + gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size); + + // Each dcxdc block is x_k\otimes H_k^{-1}. + gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size); + + gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2); + + gsl_vector *x = gsl_vector_alloc(n_size); + + gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size); + gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1); + gsl_vector *beta = gsl_vector_alloc(d_size); + gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size); + + // Null estimates for initial values. + gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1); + gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size); + + gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size); + gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size); + gsl_matrix_view xHi_all_sub = + gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size); + + gsl_matrix_transpose_memcpy(Y, UtY); + gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW); + + gsl_vector_view X_row = gsl_matrix_row(X, c_size); + gsl_vector_set_zero(&X_row.vector); + gsl_vector_view B_col = gsl_matrix_column(B, c_size); + gsl_vector_set_zero(&B_col.vector); + + MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, + l_max, n_region, V_g, V_e, &B_sub.matrix); + + logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub.matrix); + logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, + &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, + crt_c); + MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, + se_B_null); + + c = 0; + Vg_remle_null.clear(); + Ve_remle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_remle_null.clear(); + se_beta_remle_null.clear(); + for (size_t i = 0; i < se_B_null->size1; i++) { + for (size_t j = 0; j < se_B_null->size2; j++) { + beta_remle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j)); + } + } + logl_remle_H0 = logl_H0; + + cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + cout.precision(4); + cout << "REMLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "REMLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "REMLE likelihood = " << logl_H0 << endl; + + logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub.matrix); + logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, + &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, + crt_c); + MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, + se_B_null); + + c = 0; + Vg_mle_null.clear(); + Ve_mle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_mle_null.clear(); + se_beta_mle_null.clear(); + for (size_t i = 0; i < se_B_null->size1; i++) { + for (size_t j = 0; j < se_B_null->size2; j++) { + beta_mle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j)); + } + } + logl_mle_H0 = logl_H0; + + cout << "MLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "MLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "MLE likelihood = " << logl_H0 << endl; + + vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; + for (size_t i = 0; i < d_size; i++) { + v_beta.push_back(0.0); + } + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg.push_back(0.0); + v_Ve.push_back(0.0); + v_Vbeta.push_back(0.0); + } + } + + gsl_matrix_memcpy(V_g_null, V_g); + gsl_matrix_memcpy(V_e_null, V_e); + gsl_matrix_memcpy(B_null, B); + + // Start reading genotypes and analyze. + // Calculate n_bit and c, the number of bit for each snp. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + size_t csnp = 0, t_last = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (indicator_snp[t] == 0) { + continue; + } + t_last++; + } + for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { + if (t % d_pace == 0 || t == snpInfo.size() - 1) { + ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // read genotypes + x_mean = 0.0; + n_miss = 0; + ci_total = 0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0; + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == (int)ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(x, ci_test, 2); + x_mean += 2.0; + } else { + gsl_vector_set(x, ci_test, 1); + x_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(x, ci_test, 0); + } else { + gsl_vector_set(x, ci_test, -9); + n_miss++; + } + } + + ci_total++; + ci_test++; + } + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + geno = gsl_vector_get(x, i); + if (geno == -9) { + gsl_vector_set(x, i, x_mean); + geno = x_mean; + } + } + + gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize); + gsl_vector_memcpy(&Xlarge_col.vector, x); + csnp++; + + if (csnp % msize == 0 || csnp == t_last) { + size_t l = 0; + if (csnp % msize == 0) { + l = msize; + } else { + l = csnp % msize; + } + + gsl_matrix_view Xlarge_sub = + gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); + gsl_matrix_view UtXlarge_sub = + gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); + + time_start = clock(); + eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, + &UtXlarge_sub.matrix); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_matrix_set_zero(Xlarge); + + for (size_t i = 0; i < l; i++) { + gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); + gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector); + + // Initial values. + gsl_matrix_memcpy(V_g, V_g_null); + gsl_matrix_memcpy(V_e, V_e_null); + gsl_matrix_memcpy(B, B_null); + + time_start = clock(); + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, + V_e_null, UltVehiY, beta, Vbeta); + + if (p_score < p_nr && crt == 1) { + logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c); + } + } + + if (a_mode == 2 || a_mode == 4) { + logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, + E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, + UltVehiE, V_g, V_e, B); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + + if (p_lrt < p_nr) { + logl_H1 = + MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, + xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + if (crt == 1) { + p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c); + } + } + } + + if (a_mode == 1 || a_mode == 4) { + logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, + E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, + UltVehiE, V_g, V_e, B); + p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (p_wald < p_nr) { + logl_H1 = + MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, + xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (crt == 1) { + p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c); + } + } + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + for (size_t i = 0; i < d_size; i++) { + v_beta[i] = gsl_vector_get(beta, i); + } + + c = 0; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg[c] = gsl_matrix_get(V_g, i, j); + v_Ve[c] = gsl_matrix_get(V_e, i, j); + v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j); + c++; + } + } + + MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta}; + sumStat.push_back(SNPs); + } + } + } + cout << endl; + + infile.close(); + infile.clear(); + + gsl_matrix_free(U_hat); + gsl_matrix_free(E_hat); + gsl_matrix_free(OmegaU); + gsl_matrix_free(OmegaE); + gsl_matrix_free(UltVehiY); + gsl_matrix_free(UltVehiBX); + gsl_matrix_free(UltVehiU); + gsl_matrix_free(UltVehiE); + + gsl_matrix_free(Hi_all); + gsl_matrix_free(Hiy_all); + gsl_matrix_free(xHi_all); + gsl_matrix_free(Hessian); + + gsl_vector_free(x); + + gsl_matrix_free(Y); + gsl_matrix_free(X); + gsl_matrix_free(V_g); + gsl_matrix_free(V_e); + gsl_matrix_free(B); + gsl_vector_free(beta); + gsl_matrix_free(Vbeta); + + gsl_matrix_free(V_g_null); + gsl_matrix_free(V_e_null); + gsl_matrix_free(B_null); + gsl_matrix_free(se_B_null); + + gsl_matrix_free(Xlarge); + gsl_matrix_free(UtXlarge); + + return; } // Calculate Vg, Ve, B, se(B) in the null mvLMM model. // Both B and se_B are d by c matrices. -void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, - const gsl_matrix *UtY, const size_t em_iter, - const size_t nr_iter, const double em_prec, - const double nr_prec, const double l_min, - const double l_max, const size_t n_region, - gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, - gsl_matrix *se_B) { - size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2; - size_t dc_size=d_size*c_size, v_size=d_size*(d_size+1)/2; - - double logl, crt_a, crt_b, crt_c; - - // Large matrices for EM. - gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size); - - // Large matrices for NR. - // Each dxd block is H_k^{-1}. - gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); - - // Each column is H_k^{-1}y_k. - gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); - - // Each dcxdc block is x_k\otimes H_k^{-1}. - gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); - gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); - - // Transpose matrices. - gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *W=gsl_matrix_alloc (c_size, n_size); - gsl_matrix_transpose_memcpy (Y, UtY); - gsl_matrix_transpose_memcpy (W, UtW); - - // Initial, EM, NR, and calculate B. - MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, - l_min, l_max, n_region, V_g, V_e, B); - logl=MphEM ('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, - OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, B); - logl=MphNR ('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, - Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, W, Y, V_g, V_e, UltVehiY, B, se_B); - - // Free matrices. - gsl_matrix_free(U_hat); - gsl_matrix_free(E_hat); - gsl_matrix_free(OmegaU); - gsl_matrix_free(OmegaE); - gsl_matrix_free(UltVehiY); - gsl_matrix_free(UltVehiBX); - gsl_matrix_free(UltVehiU); - gsl_matrix_free(UltVehiE); - - gsl_matrix_free(Hi_all); - gsl_matrix_free(Hiy_all); - gsl_matrix_free(xHi_all); - gsl_matrix_free(Hessian); - - gsl_matrix_free(Y); - gsl_matrix_free(W); - - return; +void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_matrix *UtY, const size_t em_iter, + const size_t nr_iter, const double em_prec, + const double nr_prec, const double l_min, + const double l_max, const size_t n_region, + gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, + gsl_matrix *se_B) { + size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2; + size_t dc_size = d_size * c_size, v_size = d_size * (d_size + 1) / 2; + + double logl, crt_a, crt_b, crt_c; + + // Large matrices for EM. + gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size); + + // Large matrices for NR. + // Each dxd block is H_k^{-1}. + gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size); + + // Each column is H_k^{-1}y_k. + gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size); + + // Each dcxdc block is x_k\otimes H_k^{-1}. + gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size); + gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2); + + // Transpose matrices. + gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *W = gsl_matrix_alloc(c_size, n_size); + gsl_matrix_transpose_memcpy(Y, UtY); + gsl_matrix_transpose_memcpy(W, UtW); + + // Initial, EM, NR, and calculate B. + MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max, + n_region, V_g, V_e, B); + logl = MphEM('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE, + UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B); + logl = MphNR('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g, + V_e, Hessian, crt_a, crt_b, crt_c); + MphCalcBeta(eval, W, Y, V_g, V_e, UltVehiY, B, se_B); + + // Free matrices. + gsl_matrix_free(U_hat); + gsl_matrix_free(E_hat); + gsl_matrix_free(OmegaU); + gsl_matrix_free(OmegaE); + gsl_matrix_free(UltVehiY); + gsl_matrix_free(UltVehiBX); + gsl_matrix_free(UltVehiU); + gsl_matrix_free(UltVehiE); + + gsl_matrix_free(Hi_all); + gsl_matrix_free(Hiy_all); + gsl_matrix_free(xHi_all); + gsl_matrix_free(Hessian); + + gsl_matrix_free(Y); + gsl_matrix_free(W); + + return; } -void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY, - const gsl_vector *env) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return; - } - - clock_t time_start=clock(); - time_UtX=0; time_opt=0; - - string line; - char *ch_ptr; - - double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0; - double crt_a, crt_b, crt_c; - int n_miss, c_phen; - double geno, x_mean; - size_t c=0; - size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2; - size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2; - - // Large matrices for EM. - gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size); - - // Large matrices for NR. - // Each dxd block is H_k^{-1}. - gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); - - // Each column is H_k^{-1}y_k. - gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); - - // Each dcxdc block is x_k\otimes H_k^{-1}. - gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); - gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); - - gsl_vector *x=gsl_vector_alloc (n_size); - gsl_vector *x_miss=gsl_vector_alloc (n_size); - - gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size); - gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1); - gsl_vector *beta=gsl_vector_alloc (d_size); - gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size); - - // Null estimates for initial values; including env but not - // including x. - gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1); - gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1); - gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size); - - gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size); - gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1); - gsl_matrix_view xHi_all_sub1= - gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size); - - gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size); - gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size); - gsl_matrix_view xHi_all_sub2= - gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size); - - gsl_matrix_transpose_memcpy (Y, UtY); - - gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size); - gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW); - gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2); - gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector); - - gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1); - gsl_vector_set_zero(&X_row1.vector); - gsl_vector_view X_row2=gsl_matrix_row(X, c_size); - gsl_vector_set_zero(&X_row2.vector); - - gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1); - gsl_vector_set_zero(&B_col1.vector); - gsl_vector_view B_col2=gsl_matrix_column(B, c_size); - gsl_vector_set_zero(&B_col2.vector); - - MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, - Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix); - logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, - UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix); - logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, - Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, - &B_sub1.matrix, se_B_null1); - - c=0; - Vg_remle_null.clear(); - Ve_remle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size, - c+v_size)); - c++; - } - } - beta_remle_null.clear(); - se_beta_remle_null.clear(); - for (size_t i=0; i<se_B_null1->size1; i++) { - for (size_t j=0; j<se_B_null1->size2; j++) { - beta_remle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) ); - } - } - logl_remle_H0=logl_H0; - - cout.setf(std::ios_base::fixed, std::ios_base::floatfield); - cout.precision(4); - - cout<<"REMLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE likelihood = "<<logl_H0<<endl; - - logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub1.matrix); - logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, - Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, - &B_sub1.matrix, se_B_null1); - - c=0; - Vg_mle_null.clear(); - Ve_mle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size)); - c++; - } - } - beta_mle_null.clear(); - se_beta_mle_null.clear(); - for (size_t i=0; i<se_B_null1->size1; i++) { - for (size_t j=0; j<se_B_null1->size2; j++) { - beta_mle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) ); - } - } - logl_mle_H0=logl_H0; - - cout<<"MLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"MLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"MLE likelihood = "<<logl_H0<<endl; - - vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; - for (size_t i=0; i<d_size; i++) { - v_beta.push_back(0.0); - } - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg.push_back(0.0); - v_Ve.push_back(0.0); - v_Vbeta.push_back(0.0); - } - } - - gsl_matrix_memcpy (V_g_null, V_g); - gsl_matrix_memcpy (V_e_null, V_e); - gsl_matrix_memcpy (B_null, B); - - // Start reading genotypes and analyze. - for (size_t t=0; t<indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i<ni_total; ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==0) {continue;} - - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } - else { - geno=atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);} - geno=gsl_vector_get(x, i); - if (x_mean>1) { - gsl_vector_set(x, i, 2-geno); - } - } - - // Calculate statistics. - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector); - gsl_vector_mul (x, env); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //initial values - gsl_matrix_memcpy (V_g, V_g_null); - gsl_matrix_memcpy (V_e, V_e_null); - gsl_matrix_memcpy (B, B_null); - - if (a_mode==2 || a_mode==3 || a_mode==4) { - if (a_mode==3 || a_mode==4) { - logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, - &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, - OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub2.matrix); - logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, - &X_sub2.matrix, Y, Hi_all, - &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, - &B_sub2.matrix, se_B_null2); - } - - if (a_mode==2 || a_mode==4) { - logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, - &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, - OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub2.matrix); - logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, - &X_sub2.matrix, Y, Hi_all, - &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, - &B_sub2.matrix, se_B_null2); - } - } - - time_start=clock(); - - // 3 is before 1. - if (a_mode==3 || a_mode==4) { - p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, - V_g_null, V_e_null, UltVehiY, beta, Vbeta); - if (p_score<p_nr && crt==1) { - logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, - xHi_all, Hiy_all, V_g, V_e, Hessian, - crt_a, crt_b, crt_c); - p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c); - } - } - - if (a_mode==2 || a_mode==4) { - logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, - UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0),(double)d_size); - - if (p_lrt<p_nr) { - logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, - Hi_all, xHi_all, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q(2.0*(logl_H1-logl_H0), - (double)d_size ); - - if (crt==1) { - p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c); - } - } - } - - if (a_mode==1 || a_mode==4) { - logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, - UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B); - p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - - if (p_wald<p_nr) { - logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, - Hi_all, xHi_all, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, - V_g, V_e, UltVehiY, beta, Vbeta); - - if (crt==1) { - p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c); - } - } - } - - if (x_mean>1) {gsl_vector_scale(beta, -1.0);} - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - for (size_t i=0; i<d_size; i++) { - v_beta[i]=gsl_vector_get (beta, i); - } - - c=0; - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg[c]=gsl_matrix_get (V_g, i, j); - v_Ve[c]=gsl_matrix_get (V_e, i, j); - v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j); - c++; - } - } - - MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, - v_Ve, v_Vbeta}; - sumStat.push_back(SNPs); - } - cout<<endl; - - - infile.close(); - infile.clear(); - - gsl_matrix_free(U_hat); - gsl_matrix_free(E_hat); - gsl_matrix_free(OmegaU); - gsl_matrix_free(OmegaE); - gsl_matrix_free(UltVehiY); - gsl_matrix_free(UltVehiBX); - gsl_matrix_free(UltVehiU); - gsl_matrix_free(UltVehiE); - - gsl_matrix_free(Hi_all); - gsl_matrix_free(Hiy_all); - gsl_matrix_free(xHi_all); - gsl_matrix_free(Hessian); - - gsl_vector_free(x); - gsl_vector_free(x_miss); - - gsl_matrix_free(Y); - gsl_matrix_free(X); - gsl_matrix_free(V_g); - gsl_matrix_free(V_e); - gsl_matrix_free(B); - gsl_vector_free(beta); - gsl_matrix_free(Vbeta); - - gsl_matrix_free(V_g_null); - gsl_matrix_free(V_e_null); - gsl_matrix_free(B_null); - gsl_matrix_free(se_B_null1); - gsl_matrix_free(se_B_null2); - - return; +void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY, + const gsl_vector *env) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return; + } + + clock_t time_start = clock(); + time_UtX = 0; + time_opt = 0; + + string line; + char *ch_ptr; + + double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0; + double crt_a, crt_b, crt_c; + int n_miss, c_phen; + double geno, x_mean; + size_t c = 0; + size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2; + size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2; + + // Large matrices for EM. + gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size); + + // Large matrices for NR. + // Each dxd block is H_k^{-1}. + gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size); + + // Each column is H_k^{-1}y_k. + gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size); + + // Each dcxdc block is x_k\otimes H_k^{-1}. + gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size); + gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2); + + gsl_vector *x = gsl_vector_alloc(n_size); + gsl_vector *x_miss = gsl_vector_alloc(n_size); + + gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size); + gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1); + gsl_vector *beta = gsl_vector_alloc(d_size); + gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size); + + // Null estimates for initial values; including env but not + // including x. + gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1); + gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1); + gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size); + + gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size); + gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1); + gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix( + xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size); + + gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size); + gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size); + gsl_matrix_view xHi_all_sub2 = + gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size); + + gsl_matrix_transpose_memcpy(Y, UtY); + + gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size); + gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW); + gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2); + gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector); + + gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1); + gsl_vector_set_zero(&X_row1.vector); + gsl_vector_view X_row2 = gsl_matrix_row(X, c_size); + gsl_vector_set_zero(&X_row2.vector); + + gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1); + gsl_vector_set_zero(&B_col1.vector); + gsl_vector_view B_col2 = gsl_matrix_column(B, c_size); + gsl_vector_set_zero(&B_col2.vector); + + MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min, + l_max, n_region, V_g, V_e, &B_sub1.matrix); + logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub1.matrix); + logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, + &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, + crt_b, crt_c); + MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, + se_B_null1); + + c = 0; + Vg_remle_null.clear(); + Ve_remle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_remle_null.clear(); + se_beta_remle_null.clear(); + for (size_t i = 0; i < se_B_null1->size1; i++) { + for (size_t j = 0; j < se_B_null1->size2; j++) { + beta_remle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j)); + } + } + logl_remle_H0 = logl_H0; + + cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + cout.precision(4); + + cout << "REMLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "REMLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "REMLE likelihood = " << logl_H0 << endl; + + logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub1.matrix); + logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, + &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, + crt_b, crt_c); + MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, + se_B_null1); + + c = 0; + Vg_mle_null.clear(); + Ve_mle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_mle_null.clear(); + se_beta_mle_null.clear(); + for (size_t i = 0; i < se_B_null1->size1; i++) { + for (size_t j = 0; j < se_B_null1->size2; j++) { + beta_mle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j)); + } + } + logl_mle_H0 = logl_H0; + + cout << "MLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "MLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "MLE likelihood = " << logl_H0 << endl; + + vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; + for (size_t i = 0; i < d_size; i++) { + v_beta.push_back(0.0); + } + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg.push_back(0.0); + v_Ve.push_back(0.0); + v_Vbeta.push_back(0.0); + } + } + + gsl_matrix_memcpy(V_g_null, V_g); + gsl_matrix_memcpy(V_e_null, V_e); + gsl_matrix_memcpy(B_null, B); + + // Start reading genotypes and analyze. + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + gsl_vector_set_zero(x_miss); + for (size_t i = 0; i < ni_total; ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 0) { + continue; + } + + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + geno = atof(ch_ptr); + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean); + } + geno = gsl_vector_get(x, i); + if (x_mean > 1) { + gsl_vector_set(x, i, 2 - geno); + } + } + + // Calculate statistics. + time_start = clock(); + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector); + gsl_vector_mul(x, env); + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // initial values + gsl_matrix_memcpy(V_g, V_g_null); + gsl_matrix_memcpy(V_e, V_e_null); + gsl_matrix_memcpy(B, B_null); + + if (a_mode == 2 || a_mode == 3 || a_mode == 4) { + if (a_mode == 3 || a_mode == 4) { + logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix, + Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, + UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix); + logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix, + Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, + Hessian, crt_a, crt_b, crt_c); + MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, + se_B_null2); + } + + if (a_mode == 2 || a_mode == 4) { + logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix, + Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, + UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix); + logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix, + Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, + Hessian, crt_a, crt_b, crt_c); + MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, + se_B_null2); + } + } + + time_start = clock(); + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null, + V_e_null, UltVehiY, beta, Vbeta); + if (p_score < p_nr && crt == 1) { + logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c); + } + } + + if (a_mode == 2 || a_mode == 4) { + logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, + V_g, V_e, B); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + + if (p_lrt < p_nr) { + logl_H1 = + MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + + if (crt == 1) { + p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c); + } + } + } + + if (a_mode == 1 || a_mode == 4) { + logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, + V_g, V_e, B); + p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (p_wald < p_nr) { + logl_H1 = + MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (crt == 1) { + p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c); + } + } + } + + if (x_mean > 1) { + gsl_vector_scale(beta, -1.0); + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + for (size_t i = 0; i < d_size; i++) { + v_beta[i] = gsl_vector_get(beta, i); + } + + c = 0; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg[c] = gsl_matrix_get(V_g, i, j); + v_Ve[c] = gsl_matrix_get(V_e, i, j); + v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j); + c++; + } + } + + MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta}; + sumStat.push_back(SNPs); + } + cout << endl; + + infile.close(); + infile.clear(); + + gsl_matrix_free(U_hat); + gsl_matrix_free(E_hat); + gsl_matrix_free(OmegaU); + gsl_matrix_free(OmegaE); + gsl_matrix_free(UltVehiY); + gsl_matrix_free(UltVehiBX); + gsl_matrix_free(UltVehiU); + gsl_matrix_free(UltVehiE); + + gsl_matrix_free(Hi_all); + gsl_matrix_free(Hiy_all); + gsl_matrix_free(xHi_all); + gsl_matrix_free(Hessian); + + gsl_vector_free(x); + gsl_vector_free(x_miss); + + gsl_matrix_free(Y); + gsl_matrix_free(X); + gsl_matrix_free(V_g); + gsl_matrix_free(V_e); + gsl_matrix_free(B); + gsl_vector_free(beta); + gsl_matrix_free(Vbeta); + + gsl_matrix_free(V_g_null); + gsl_matrix_free(V_e_null); + gsl_matrix_free(B_null); + gsl_matrix_free(se_B_null1); + gsl_matrix_free(se_B_null2); + + return; } -void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY, - const gsl_vector *env) { - string file_bed=file_bfile+".bed"; - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return; - } - - clock_t time_start=clock(); - time_UtX=0; time_opt=0; - - char ch[1]; - bitset<8> b; - - double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0; - double crt_a, crt_b, crt_c; - int n_bit, n_miss, ci_total, ci_test; - double geno, x_mean; - size_t c=0; - size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2; - size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2; - - // Large matrices for EM. - gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size); - - // Large matrices for NR. - // Each dxd block is H_k^{-1}. - gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); - - // Each column is H_k^{-1}y_k - gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); - - // Each dcxdc block is x_k\otimes H_k^{-1}. - gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); - gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2); - - gsl_vector *x=gsl_vector_alloc (n_size); - - gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size); - gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size); - gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1); - gsl_vector *beta=gsl_vector_alloc (d_size); - gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size); - - // Null estimates for initial values. - gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size); - gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1); - gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1); - gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size); - - gsl_matrix_view X_sub1=gsl_matrix_submatrix(X,0,0,c_size-1,n_size); - gsl_matrix_view B_sub1=gsl_matrix_submatrix(B,0,0,d_size,c_size-1); - gsl_matrix_view xHi_all_sub1= - gsl_matrix_submatrix(xHi_all,0,0,d_size*(c_size-1),d_size*n_size); - - gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size); - gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size); - gsl_matrix_view xHi_all_sub2= - gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size); - - gsl_matrix_transpose_memcpy (Y, UtY); - - gsl_matrix_view X_sub0=gsl_matrix_submatrix(X,0,0,c_size-2,n_size); - gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW); - gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2); - gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector); - - gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1); - gsl_vector_set_zero(&X_row1.vector); - gsl_vector_view X_row2=gsl_matrix_row(X, c_size); - gsl_vector_set_zero(&X_row2.vector); - - gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1); - gsl_vector_set_zero(&B_col1.vector); - gsl_vector_view B_col2=gsl_matrix_column(B, c_size); - gsl_vector_set_zero(&B_col2.vector); - - MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, - Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix); - - logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, &B_sub1.matrix); - logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, - Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, - &B_sub1.matrix, se_B_null1); - - c=0; - Vg_remle_null.clear(); - Ve_remle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_remle_null.push_back(gsl_matrix_get(Hessian,c+v_size, - c+v_size)); - c++; - } - } - beta_remle_null.clear(); - se_beta_remle_null.clear(); - for (size_t i=0; i<se_B_null1->size1; i++) { - for (size_t j=0; j<se_B_null1->size2; j++) { - beta_remle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) ); - } - } - logl_remle_H0=logl_H0; - - cout.setf(std::ios_base::fixed, std::ios_base::floatfield); - cout.precision(4); - cout<<"REMLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"REMLE likelihood = "<<logl_H0<<endl; - - logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, - U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, - UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix); - logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, - Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, - Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, - &B_sub1.matrix, se_B_null1); - - c=0; - Vg_mle_null.clear(); - Ve_mle_null.clear(); - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) ); - Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) ); - VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) ); - VVe_mle_null.push_back(gsl_matrix_get(Hessian,c+v_size,c+v_size)); - c++; - } - } - beta_mle_null.clear(); - se_beta_mle_null.clear(); - for (size_t i=0; i<se_B_null1->size1; i++) { - for (size_t j=0; j<se_B_null1->size2; j++) { - beta_mle_null.push_back(gsl_matrix_get(B, i, j) ); - se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) ); - } - } - logl_mle_H0=logl_H0; - - cout<<"MLE estimate for Vg in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_g, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Vg): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t"; - } - cout<<endl; - } - cout<<"MLE estimate for Ve in the null model: "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - cout<<gsl_matrix_get(V_e, i, j)<<"\t"; - } - cout<<endl; - } - cout<<"se(Ve): "<<endl; - for (size_t i=0; i<d_size; i++) { - for (size_t j=0; j<=i; j++) { - c=GetIndex(i, j, d_size); - cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t"; - } - cout<<endl; - } - cout<<"MLE likelihood = "<<logl_H0<<endl; - - vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; - for (size_t i=0; i<d_size; i++) { - v_beta.push_back(0.0); - } - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg.push_back(0.0); - v_Ve.push_back(0.0); - v_Vbeta.push_back(0.0); - } - } - - gsl_matrix_memcpy (V_g_null, V_g); - gsl_matrix_memcpy (V_e_null, V_e); - gsl_matrix_memcpy (B_null, B); - - // Start reading genotypes and analyze. - // Calculate n_bit and c, the number of bit for each SNP. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) { - ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - x_mean=0.0; n_miss=0; ci_total=0; ci_test=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - - if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;} - if (indicator_idv[ci_total]==0) {ci_total++; continue;} - - if (b[2*j]==0) { - if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; } - else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } - } - else { - if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } - else {gsl_vector_set(x, ci_test, -9); n_miss++; } - } - - ci_total++; - ci_test++; - } - } - - x_mean/=(double)(ni_test-n_miss); - - for (size_t i=0; i<ni_test; ++i) { - geno=gsl_vector_get(x,i); - if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;} - if (x_mean>1) { - gsl_vector_set(x, i, 2-geno); - } - } - - // Calculate statistics. - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector); - gsl_vector_mul (x, env); - gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector); - time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Initial values. - gsl_matrix_memcpy (V_g, V_g_null); - gsl_matrix_memcpy (V_e, V_e_null); - gsl_matrix_memcpy (B, B_null); - - if (a_mode==2 || a_mode==3 || a_mode==4) { - if (a_mode==3 || a_mode==4) { - logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, - &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, - UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, - V_e, &B_sub2.matrix); - logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, - &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, - Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, - &B_sub2.matrix, se_B_null2); - } - - if (a_mode==2 || a_mode==4) { - logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, - &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, - UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, - V_e, &B_sub2.matrix); - logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, - &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, - Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); - MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, - &B_sub2.matrix, se_B_null2); - } - } - - time_start=clock(); - - // 3 is before 1. - if (a_mode==3 || a_mode==4) { - p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, - V_g_null, V_e_null, UltVehiY, beta, Vbeta); - - if (p_score<p_nr && crt==1) { - logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, - Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); - p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c); - } - } - - if (a_mode==2 || a_mode==4) { - logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, - UltVehiU, UltVehiE, V_g, V_e, B); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size ); - - if (p_lrt<p_nr) { - logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, - xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, - crt_b, crt_c); - - // Calculate beta and Vbeta. - p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size ); - if (crt==1) { - p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c); - } - } - } - - if (a_mode==1 || a_mode==4) { - logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, - UltVehiU, UltVehiE, V_g, V_e, B); - p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - - if (p_wald<p_nr) { - logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, - xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, - crt_b, crt_c); - p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, - V_e, UltVehiY, beta, Vbeta); - - if (crt==1) { - p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c); - } - } - } - - if (x_mean>1) {gsl_vector_scale(beta, -1.0);} - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - // Store summary data. - for (size_t i=0; i<d_size; i++) { - v_beta[i]=gsl_vector_get (beta, i); - } - - c=0; - for (size_t i=0; i<d_size; i++) { - for (size_t j=i; j<d_size; j++) { - v_Vg[c]=gsl_matrix_get (V_g, i, j); - v_Ve[c]=gsl_matrix_get (V_e, i, j); - v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j); - c++; - } - } - - MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, - v_Vg, v_Ve, v_Vbeta}; - sumStat.push_back(SNPs); - } - cout<<endl; - - infile.close(); - infile.clear(); - - gsl_matrix_free(U_hat); - gsl_matrix_free(E_hat); - gsl_matrix_free(OmegaU); - gsl_matrix_free(OmegaE); - gsl_matrix_free(UltVehiY); - gsl_matrix_free(UltVehiBX); - gsl_matrix_free(UltVehiU); - gsl_matrix_free(UltVehiE); - - gsl_matrix_free(Hi_all); - gsl_matrix_free(Hiy_all); - gsl_matrix_free(xHi_all); - gsl_matrix_free(Hessian); - - gsl_vector_free(x); - - gsl_matrix_free(Y); - gsl_matrix_free(X); - gsl_matrix_free(V_g); - gsl_matrix_free(V_e); - gsl_matrix_free(B); - gsl_vector_free(beta); - gsl_matrix_free(Vbeta); - - gsl_matrix_free(V_g_null); - gsl_matrix_free(V_e_null); - gsl_matrix_free(B_null); - gsl_matrix_free(se_B_null1); - gsl_matrix_free(se_B_null2); - - return; +void MVLMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY, + const gsl_vector *env) { + string file_bed = file_bfile + ".bed"; + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return; + } + + clock_t time_start = clock(); + time_UtX = 0; + time_opt = 0; + + char ch[1]; + bitset<8> b; + + double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0; + double crt_a, crt_b, crt_c; + int n_bit, n_miss, ci_total, ci_test; + double geno, x_mean; + size_t c = 0; + size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2 + 2; + size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2; + + // Large matrices for EM. + gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size); + + // Large matrices for NR. + // Each dxd block is H_k^{-1}. + gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size); + + // Each column is H_k^{-1}y_k + gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size); + + // Each dcxdc block is x_k\otimes H_k^{-1}. + gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size); + gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2); + + gsl_vector *x = gsl_vector_alloc(n_size); + + gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size); + gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size); + gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1); + gsl_vector *beta = gsl_vector_alloc(d_size); + gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size); + + // Null estimates for initial values. + gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size); + gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1); + gsl_matrix *se_B_null1 = gsl_matrix_alloc(d_size, c_size - 1); + gsl_matrix *se_B_null2 = gsl_matrix_alloc(d_size, c_size); + + gsl_matrix_view X_sub1 = gsl_matrix_submatrix(X, 0, 0, c_size - 1, n_size); + gsl_matrix_view B_sub1 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size - 1); + gsl_matrix_view xHi_all_sub1 = gsl_matrix_submatrix( + xHi_all, 0, 0, d_size * (c_size - 1), d_size * n_size); + + gsl_matrix_view X_sub2 = gsl_matrix_submatrix(X, 0, 0, c_size, n_size); + gsl_matrix_view B_sub2 = gsl_matrix_submatrix(B, 0, 0, d_size, c_size); + gsl_matrix_view xHi_all_sub2 = + gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size); + + gsl_matrix_transpose_memcpy(Y, UtY); + + gsl_matrix_view X_sub0 = gsl_matrix_submatrix(X, 0, 0, c_size - 2, n_size); + gsl_matrix_transpose_memcpy(&X_sub0.matrix, UtW); + gsl_vector_view X_row0 = gsl_matrix_row(X, c_size - 2); + gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &X_row0.vector); + + gsl_vector_view X_row1 = gsl_matrix_row(X, c_size - 1); + gsl_vector_set_zero(&X_row1.vector); + gsl_vector_view X_row2 = gsl_matrix_row(X, c_size); + gsl_vector_set_zero(&X_row2.vector); + + gsl_vector_view B_col1 = gsl_matrix_column(B, c_size - 1); + gsl_vector_set_zero(&B_col1.vector); + gsl_vector_view B_col2 = gsl_matrix_column(B, c_size); + gsl_vector_set_zero(&B_col2.vector); + + MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min, + l_max, n_region, V_g, V_e, &B_sub1.matrix); + + logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub1.matrix); + logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, + &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, + crt_b, crt_c); + MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, + se_B_null1); + + c = 0; + Vg_remle_null.clear(); + Ve_remle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_remle_null.clear(); + se_beta_remle_null.clear(); + for (size_t i = 0; i < se_B_null1->size1; i++) { + for (size_t j = 0; j < se_B_null1->size2; j++) { + beta_remle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j)); + } + } + logl_remle_H0 = logl_H0; + + cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + cout.precision(4); + cout << "REMLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "REMLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "REMLE likelihood = " << logl_H0 << endl; + + logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, + V_e, &B_sub1.matrix); + logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, + &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, + crt_b, crt_c); + MphCalcBeta(eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, + se_B_null1); + + c = 0; + Vg_mle_null.clear(); + Ve_mle_null.clear(); + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j)); + Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j)); + VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c)); + VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); + c++; + } + } + beta_mle_null.clear(); + se_beta_mle_null.clear(); + for (size_t i = 0; i < se_B_null1->size1; i++) { + for (size_t j = 0; j < se_B_null1->size2; j++) { + beta_mle_null.push_back(gsl_matrix_get(B, i, j)); + se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j)); + } + } + logl_mle_H0 = logl_H0; + + cout << "MLE estimate for Vg in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_g, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Vg): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; + } + cout << endl; + } + cout << "MLE estimate for Ve in the null model: " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + cout << gsl_matrix_get(V_e, i, j) << "\t"; + } + cout << endl; + } + cout << "se(Ve): " << endl; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = 0; j <= i; j++) { + c = GetIndex(i, j, d_size); + cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; + } + cout << endl; + } + cout << "MLE likelihood = " << logl_H0 << endl; + + vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; + for (size_t i = 0; i < d_size; i++) { + v_beta.push_back(0.0); + } + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg.push_back(0.0); + v_Ve.push_back(0.0); + v_Vbeta.push_back(0.0); + } + } + + gsl_matrix_memcpy(V_g_null, V_g); + gsl_matrix_memcpy(V_e_null, V_e); + gsl_matrix_memcpy(B_null, B); + + // Start reading genotypes and analyze. + // Calculate n_bit and c, the number of bit for each SNP. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { + if (t % d_pace == 0 || t == snpInfo.size() - 1) { + ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + x_mean = 0.0; + n_miss = 0; + ci_total = 0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + + if ((i == (n_bit - 1)) && ci_total == (int)ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(x, ci_test, 2); + x_mean += 2.0; + } else { + gsl_vector_set(x, ci_test, 1); + x_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(x, ci_test, 0); + } else { + gsl_vector_set(x, ci_test, -9); + n_miss++; + } + } + + ci_total++; + ci_test++; + } + } + + x_mean /= (double)(ni_test - n_miss); + + for (size_t i = 0; i < ni_test; ++i) { + geno = gsl_vector_get(x, i); + if (geno == -9) { + gsl_vector_set(x, i, x_mean); + geno = x_mean; + } + if (x_mean > 1) { + gsl_vector_set(x, i, 2 - geno); + } + } + + // Calculate statistics. + time_start = clock(); + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row1.vector); + gsl_vector_mul(x, env); + gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, &X_row2.vector); + time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Initial values. + gsl_matrix_memcpy(V_g, V_g_null); + gsl_matrix_memcpy(V_e, V_e_null); + gsl_matrix_memcpy(B, B_null); + + if (a_mode == 2 || a_mode == 3 || a_mode == 4) { + if (a_mode == 3 || a_mode == 4) { + logl_H0 = MphEM('R', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix, + Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, + UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix); + logl_H0 = MphNR('R', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix, + Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, + Hessian, crt_a, crt_b, crt_c); + MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, + se_B_null2); + } + + if (a_mode == 2 || a_mode == 4) { + logl_H0 = MphEM('L', em_iter / 10, em_prec * 10, eval, &X_sub2.matrix, + Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, + UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix); + logl_H0 = MphNR('L', nr_iter / 10, nr_prec * 10, eval, &X_sub2.matrix, + Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, + Hessian, crt_a, crt_b, crt_c); + MphCalcBeta(eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, + se_B_null2); + } + } + + time_start = clock(); + + // 3 is before 1. + if (a_mode == 3 || a_mode == 4) { + p_score = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null, + V_e_null, UltVehiY, beta, Vbeta); + + if (p_score < p_nr && crt == 1) { + logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c); + } + } + + if (a_mode == 2 || a_mode == 4) { + logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, + V_g, V_e, B); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + + if (p_lrt < p_nr) { + logl_H1 = + MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + + // Calculate beta and Vbeta. + p_lrt = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); + if (crt == 1) { + p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c); + } + } + } + + if (a_mode == 1 || a_mode == 4) { + logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, E_hat, + OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, + V_g, V_e, B); + p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (p_wald < p_nr) { + logl_H1 = + MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, + Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); + p_wald = MphCalcP(eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, + UltVehiY, beta, Vbeta); + + if (crt == 1) { + p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c); + } + } + } + + if (x_mean > 1) { + gsl_vector_scale(beta, -1.0); + } + + time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Store summary data. + for (size_t i = 0; i < d_size; i++) { + v_beta[i] = gsl_vector_get(beta, i); + } + + c = 0; + for (size_t i = 0; i < d_size; i++) { + for (size_t j = i; j < d_size; j++) { + v_Vg[c] = gsl_matrix_get(V_g, i, j); + v_Ve[c] = gsl_matrix_get(V_e, i, j); + v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j); + c++; + } + } + + MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta}; + sumStat.push_back(SNPs); + } + cout << endl; + + infile.close(); + infile.clear(); + + gsl_matrix_free(U_hat); + gsl_matrix_free(E_hat); + gsl_matrix_free(OmegaU); + gsl_matrix_free(OmegaE); + gsl_matrix_free(UltVehiY); + gsl_matrix_free(UltVehiBX); + gsl_matrix_free(UltVehiU); + gsl_matrix_free(UltVehiE); + + gsl_matrix_free(Hi_all); + gsl_matrix_free(Hiy_all); + gsl_matrix_free(xHi_all); + gsl_matrix_free(Hessian); + + gsl_vector_free(x); + + gsl_matrix_free(Y); + gsl_matrix_free(X); + gsl_matrix_free(V_g); + gsl_matrix_free(V_e); + gsl_matrix_free(B); + gsl_vector_free(beta); + gsl_matrix_free(Vbeta); + + gsl_matrix_free(V_g_null); + gsl_matrix_free(V_e_null); + gsl_matrix_free(B_null); + gsl_matrix_free(se_B_null1); + gsl_matrix_free(se_B_null2); + + return; } diff --git a/src/mvlmm.h b/src/mvlmm.h index d495c26..4329ad1 100644 --- a/src/mvlmm.h +++ b/src/mvlmm.h @@ -19,89 +19,86 @@ #ifndef __MVLMM_H__ #define __MVLMM_H__ -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" -#include "param.h" +#include "gsl/gsl_vector.h" #include "io.h" +#include "param.h" using namespace std; class MVLMM { public: - // IO-related parameters. - int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests. - size_t d_pace; // Display pace. - - string file_bfile; - string file_geno; - string file_oxford; - string file_out; - string path_out; - - // MVLMM-related parameters. - double l_min; - double l_max; - size_t n_region; - double logl_remle_H0, logl_mle_H0; - vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null; - vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null; - vector<double> VVe_mle_null; - vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null; - vector<double> se_beta_mle_null; - double p_nr; - size_t em_iter, nr_iter; - double em_prec, nr_prec; - size_t crt; - - // Summary statistics. - size_t ni_total, ni_test; // Number of individuals. - size_t ns_total, ns_test; // Number of SNPs. - size_t n_cvt; - size_t n_ph; - double time_UtX; // Time spent on optimization iterations. - double time_opt; // Time spent on optimization iterations. - - // Indicator for individuals (phenotypes): 0 missing, 1 - // available for analysis. - vector<int> indicator_idv; - - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; - - vector<SNPINFO> snpInfo; // Record SNP information. - - // Not included in PARAM. - vector<MPHSUMSTAT> sumStat; // Output SNPSummary Data. - - // Main functions - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY); - void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY); - void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY); - void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY, - const gsl_vector *env); - void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY, - const gsl_vector *env); - void WriteFiles (); - + // IO-related parameters. + int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests. + size_t d_pace; // Display pace. + + string file_bfile; + string file_geno; + string file_oxford; + string file_out; + string path_out; + + // MVLMM-related parameters. + double l_min; + double l_max; + size_t n_region; + double logl_remle_H0, logl_mle_H0; + vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null; + vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null; + vector<double> VVe_mle_null; + vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null; + vector<double> se_beta_mle_null; + double p_nr; + size_t em_iter, nr_iter; + double em_prec, nr_prec; + size_t crt; + + // Summary statistics. + size_t ni_total, ni_test; // Number of individuals. + size_t ns_total, ns_test; // Number of SNPs. + size_t n_cvt; + size_t n_ph; + double time_UtX; // Time spent on optimization iterations. + double time_opt; // Time spent on optimization iterations. + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + vector<SNPINFO> snpInfo; // Record SNP information. + + // Not included in PARAM. + vector<MPHSUMSTAT> sumStat; // Output SNPSummary Data. + + // Main functions + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY); + void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY); + void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY); + void AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY, + const gsl_vector *env); + void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY, + const gsl_vector *env); + void WriteFiles(); }; -void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, - const gsl_matrix *UtY, const size_t em_iter, - const size_t nr_iter, const double em_prec, - const double nr_prec, const double l_min, - const double l_max, const size_t n_region, - gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, - gsl_matrix *se_B); +void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_matrix *UtY, const size_t em_iter, + const size_t nr_iter, const double em_prec, + const double nr_prec, const double l_min, + const double l_max, const size_t n_region, + gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, + gsl_matrix *se_B); #endif - - diff --git a/src/param.cpp b/src/param.cpp index 413d517..2572bbb 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -16,1322 +16,1357 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include <algorithm> +#include <cmath> +#include <cstring> #include <fstream> +#include <iostream> #include <string> -#include <cstring> #include <sys/stat.h> -#include <cmath> -#include <algorithm> -#include "gsl/gsl_randist.h" +#include "gsl/gsl_blas.h" +#include "gsl/gsl_linalg.h" #include "gsl/gsl_matrix.h" -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" -#include "gsl/gsl_blas.h" +#include "gsl/gsl_randist.h" +#include "gsl/gsl_vector.h" #include "eigenlib.h" +#include "io.h" #include "mathfunc.h" #include "param.h" -#include "io.h" using namespace std; -PARAM::PARAM(void): -mode_silence (false), a_mode (0), k_mode(1), d_pace (100000), -file_out("result"), path_out("./output/"), -miss_level(0.05), maf_level(0.01), hwe_level(0), r2_level(0.9999), -l_min(1e-5), l_max(1e5), n_region(10),p_nr(0.001),em_prec(0.0001), -nr_prec(0.0001),em_iter(10000),nr_iter(100),crt(0), -pheno_mean(0), noconstrain (false), -h_min(-1), h_max(-1), h_scale(-1), -rho_min(0.0), rho_max(1.0), rho_scale(-1), -logp_min(0.0), logp_max(0.0), logp_scale(-1), -h_ngrid(10), rho_ngrid(10), -s_min(0), s_max(300), -w_step(100000), s_step(1000000), -r_pace(10), w_pace(1000), -n_accept(0), -n_mh(10), -geo_mean(2000.0), -randseed(-1), -window_cm(0), window_bp(0), window_ns(0), n_block(200), -error(false), -ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0), -time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), -time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) -{} +PARAM::PARAM(void) + : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000), + file_out("result"), path_out("./output/"), miss_level(0.05), + maf_level(0.01), hwe_level(0), r2_level(0.9999), l_min(1e-5), l_max(1e5), + n_region(10), p_nr(0.001), em_prec(0.0001), nr_prec(0.0001), + em_iter(10000), nr_iter(100), crt(0), pheno_mean(0), noconstrain(false), + h_min(-1), h_max(-1), h_scale(-1), rho_min(0.0), rho_max(1.0), + rho_scale(-1), logp_min(0.0), logp_max(0.0), logp_scale(-1), h_ngrid(10), + rho_ngrid(10), s_min(0), s_max(300), w_step(100000), s_step(1000000), + r_pace(10), w_pace(1000), n_accept(0), n_mh(10), geo_mean(2000.0), + randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200), + error(false), ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0), + time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), + time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {} // Read files: obtain ns_total, ng_total, ns_test, ni_test. -void PARAM::ReadFiles (void) { - string file_str; - - // Read cat file. - if (!file_mcat.empty()) { - if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;} - } else if (!file_cat.empty()) { - if (ReadFile_cat (file_cat, mapRS2cat, n_vc)==false) {error=true;} - } - - // Read snp weight files. - if (!file_wcat.empty()) { - if (ReadFile_wsnp (file_wcat, n_vc, mapRS2wcat)==false) {error=true;} - } - if (!file_wsnp.empty()) { - if (ReadFile_wsnp (file_wsnp, mapRS2wsnp)==false) {error=true;} - } - - // Count number of kinship files. - if (!file_mk.empty()) { - if (CountFileLines (file_mk, n_vc)==false) {error=true;} - } - - // Read SNP set. - if (!file_snps.empty()) { - if (ReadFile_snps (file_snps, setSnps)==false) {error=true;} - } else { - setSnps.clear(); - } - - // For prediction. - if (!file_epm.empty()) { - if (ReadFile_est (file_epm, est_column, mapRS2est)==false) { - error=true; - } - if (!file_bfile.empty()) { - file_str=file_bfile+".bim"; - if (ReadFile_bim (file_str, snpInfo)==false) { - error=true; - } - file_str=file_bfile+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, - mapID2num, p_column)==false) { - error=true; - } - } - - if (!file_geno.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, - pheno, p_column)==false) { - error=true; - } - - if (CountFileLines (file_geno, ns_total)==false) { - error=true; - } - } - - if (!file_ebv.empty() ) { - if (ReadFile_column (file_ebv, indicator_bv, - vec_bv, 1)==false) { - error=true; - } - } - - if (!file_log.empty() ) { - if (ReadFile_log (file_log, pheno_mean)==false) { - error=true; - } - } - - // Convert indicator_pheno to indicator_idv. - int k=1; - for (size_t i=0; i<indicator_pheno.size(); i++) { - k=1; - for (size_t j=0; j<indicator_pheno[i].size(); j++) { - if (indicator_pheno[i][j]==0) {k=0;} - } - indicator_idv.push_back(k); - } - - ns_test=0; - - return; - } - - // Read covariates before the genotype files. - if (!file_cvt.empty() ) { - if (ReadFile_cvt (file_cvt, indicator_cvt, - cvt, n_cvt)==false) { - error=true; - } - if ((indicator_cvt).size()==0) { - n_cvt=1; - } - } else { - n_cvt=1; - } - - if (!file_gxe.empty() ) { - if (ReadFile_column (file_gxe, indicator_gxe, gxe, 1)==false) { - error=true; - } - } - if (!file_weight.empty() ) { - if (ReadFile_column (file_weight, indicator_weight, - weight, 1)==false) { - error=true; - } - } - - // WJA added. - // Read genotype and phenotype file for bgen format. - if (!file_oxford.empty()) { - file_str=file_oxford+".sample"; - if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column, - indicator_cvt, cvt, n_cvt)==false) { - error=true; - } - if ((indicator_cvt).size()==0) { - n_cvt=1; - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - file_str=file_oxford+".bgen"; - if (ReadFile_bgen (file_str, setSnps, W, indicator_idv, - indicator_snp, snpInfo, maf_level, - miss_level, hwe_level, r2_level, - ns_test)==false) { - error=true; - } - gsl_matrix_free(W); - - ns_total=indicator_snp.size(); - } - - // Read genotype and phenotype file for PLINK format. - if (!file_bfile.empty()) { - file_str=file_bfile+".bim"; - snpInfo.clear(); - if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} - - // If both fam file and pheno files are used, use - // phenotypes inside the pheno file. - if (!file_pheno.empty()) { - - // Phenotype file before genotype file. - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) {error=true;} - } else { - file_str=file_bfile+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, - mapID2num, p_column)==false) {error=true;} - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - file_str=file_bfile+".bed"; - if (ReadFile_bed (file_str, setSnps, W, indicator_idv, - indicator_snp, snpInfo, maf_level, - miss_level, hwe_level, r2_level, - ns_test) == false) { - error=true; - } - gsl_matrix_free(W); - ns_total=indicator_snp.size(); - } - - // Read genotype and phenotype file for BIMBAM format. - if (!file_geno.empty()) { - - // Annotation file before genotype file. - if (!file_anno.empty() ) { - if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, - mapRS2cM)==false) { - error=true; - } - } - - // Phenotype file before genotype file. - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column) == false) { - error=true; - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - if (ReadFile_geno (file_geno, setSnps, W, indicator_idv, - indicator_snp, maf_level, miss_level, - hwe_level, r2_level, mapRS2chr, mapRS2bp, - mapRS2cM, snpInfo, ns_test)==false) { - error=true; - } - gsl_matrix_free(W); - ns_total=indicator_snp.size(); - } - - // Read genotype file for multiple PLINK files. - if (!file_mbfile.empty()) { - igzstream infile (file_mbfile.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open mbfile file: " << file_mbfile<<endl; - return; - } - - string file_name; - size_t t=0, ns_test_tmp=0; - gsl_matrix *W; - while (!safeGetline(infile, file_name).eof()) { - file_str=file_name+".bim"; - - if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} - - if (t==0) { - - // If both fam file and pheno files are used, use - // phenotypes inside the pheno file. - if (!file_pheno.empty()) { - - // Phenotype file before genotype file. - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) { - error=true; - } - } else { - file_str=file_name+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, - mapID2num, p_column)==false) { - error=true; - } - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - } - - file_str=file_name+".bed"; - if (ReadFile_bed (file_str, setSnps, W, indicator_idv, - indicator_snp, snpInfo, maf_level, - miss_level, hwe_level, r2_level, - ns_test_tmp)==false) { - error=true; - } - mindicator_snp.push_back(indicator_snp); - msnpInfo.push_back(snpInfo); - ns_test+=ns_test_tmp; - ns_total+=indicator_snp.size(); - - t++; - } - - gsl_matrix_free(W); - - infile.close(); - infile.clear(); - } - - // Read genotype and phenotype file for multiple BIMBAM files. - if (!file_mgeno.empty()) { - - // Annotation file before genotype file. - if (!file_anno.empty() ) { - if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, - mapRS2cM)==false) { - error=true; - } - } - - // Phenotype file before genotype file. - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) { - error=true; - } - - // Post-process covariates and phenotypes, obtain ni_test, - // save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - igzstream infile (file_mgeno.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open mgeno file: "<<file_mgeno<<endl; - return; - } - - string file_name; - size_t ns_test_tmp; - while (!safeGetline(infile, file_name).eof()) { - if (ReadFile_geno (file_name, setSnps, W, indicator_idv, - indicator_snp, maf_level, miss_level, - hwe_level, r2_level, mapRS2chr, mapRS2bp, - mapRS2cM, snpInfo, ns_test_tmp)==false) { - error=true; - } - - mindicator_snp.push_back(indicator_snp); - msnpInfo.push_back(snpInfo); - ns_test+=ns_test_tmp; - ns_total+=indicator_snp.size(); - } - - gsl_matrix_free(W); - - infile.close(); - infile.clear(); - } - - if (!file_gene.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) {error=true;} - - // Convert indicator_pheno to indicator_idv. - int k=1; - for (size_t i=0; i<indicator_pheno.size(); i++) { - k=1; - for (size_t j=0; j<indicator_pheno[i].size(); j++) { - if (indicator_pheno[i][j]==0) {k=0;} - } - indicator_idv.push_back(k); - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - if (ReadFile_gene (file_gene, vec_read, snpInfo, - ng_total)==false) { - error=true; - } - } - - // Read is after gene file. - if (!file_read.empty() ) { - if (ReadFile_column (file_read, indicator_read, - vec_read, 1)==false) { - error=true; - } - - ni_test=0; - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_idv[i]*=indicator_read[i]; - ni_test+=indicator_idv[i]; - } - - if (ni_test==0) { - error=true; - cout<<"error! number of analyzed individuals equals 0. "<< - endl; - return; - } - } - - // For ridge prediction, read phenotype only. - if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) { - error=true; - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - } - return; +void PARAM::ReadFiles(void) { + string file_str; + + // Read cat file. + if (!file_mcat.empty()) { + if (ReadFile_mcat(file_mcat, mapRS2cat, n_vc) == false) { + error = true; + } + } else if (!file_cat.empty()) { + if (ReadFile_cat(file_cat, mapRS2cat, n_vc) == false) { + error = true; + } + } + + // Read snp weight files. + if (!file_wcat.empty()) { + if (ReadFile_wsnp(file_wcat, n_vc, mapRS2wcat) == false) { + error = true; + } + } + if (!file_wsnp.empty()) { + if (ReadFile_wsnp(file_wsnp, mapRS2wsnp) == false) { + error = true; + } + } + + // Count number of kinship files. + if (!file_mk.empty()) { + if (CountFileLines(file_mk, n_vc) == false) { + error = true; + } + } + + // Read SNP set. + if (!file_snps.empty()) { + if (ReadFile_snps(file_snps, setSnps) == false) { + error = true; + } + } else { + setSnps.clear(); + } + + // For prediction. + if (!file_epm.empty()) { + if (ReadFile_est(file_epm, est_column, mapRS2est) == false) { + error = true; + } + if (!file_bfile.empty()) { + file_str = file_bfile + ".bim"; + if (ReadFile_bim(file_str, snpInfo) == false) { + error = true; + } + file_str = file_bfile + ".fam"; + if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, p_column) == + false) { + error = true; + } + } + + if (!file_geno.empty()) { + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == + false) { + error = true; + } + + if (CountFileLines(file_geno, ns_total) == false) { + error = true; + } + } + + if (!file_ebv.empty()) { + if (ReadFile_column(file_ebv, indicator_bv, vec_bv, 1) == false) { + error = true; + } + } + + if (!file_log.empty()) { + if (ReadFile_log(file_log, pheno_mean) == false) { + error = true; + } + } + + // Convert indicator_pheno to indicator_idv. + int k = 1; + for (size_t i = 0; i < indicator_pheno.size(); i++) { + k = 1; + for (size_t j = 0; j < indicator_pheno[i].size(); j++) { + if (indicator_pheno[i][j] == 0) { + k = 0; + } + } + indicator_idv.push_back(k); + } + + ns_test = 0; + + return; + } + + // Read covariates before the genotype files. + if (!file_cvt.empty()) { + if (ReadFile_cvt(file_cvt, indicator_cvt, cvt, n_cvt) == false) { + error = true; + } + if ((indicator_cvt).size() == 0) { + n_cvt = 1; + } + } else { + n_cvt = 1; + } + + if (!file_gxe.empty()) { + if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) { + error = true; + } + } + if (!file_weight.empty()) { + if (ReadFile_column(file_weight, indicator_weight, weight, 1) == false) { + error = true; + } + } + + // WJA added. + // Read genotype and phenotype file for bgen format. + if (!file_oxford.empty()) { + file_str = file_oxford + ".sample"; + if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column, + indicator_cvt, cvt, n_cvt) == false) { + error = true; + } + if ((indicator_cvt).size() == 0) { + n_cvt = 1; + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + file_str = file_oxford + ".bgen"; + if (ReadFile_bgen(file_str, setSnps, W, indicator_idv, indicator_snp, + snpInfo, maf_level, miss_level, hwe_level, r2_level, + ns_test) == false) { + error = true; + } + gsl_matrix_free(W); + + ns_total = indicator_snp.size(); + } + + // Read genotype and phenotype file for PLINK format. + if (!file_bfile.empty()) { + file_str = file_bfile + ".bim"; + snpInfo.clear(); + if (ReadFile_bim(file_str, snpInfo) == false) { + error = true; + } + + // If both fam file and pheno files are used, use + // phenotypes inside the pheno file. + if (!file_pheno.empty()) { + + // Phenotype file before genotype file. + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == + false) { + error = true; + } + } else { + file_str = file_bfile + ".fam"; + if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, p_column) == + false) { + error = true; + } + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + file_str = file_bfile + ".bed"; + if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp, + snpInfo, maf_level, miss_level, hwe_level, r2_level, + ns_test) == false) { + error = true; + } + gsl_matrix_free(W); + ns_total = indicator_snp.size(); + } + + // Read genotype and phenotype file for BIMBAM format. + if (!file_geno.empty()) { + + // Annotation file before genotype file. + if (!file_anno.empty()) { + if (ReadFile_anno(file_anno, mapRS2chr, mapRS2bp, mapRS2cM) == false) { + error = true; + } + } + + // Phenotype file before genotype file. + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { + error = true; + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp, + maf_level, miss_level, hwe_level, r2_level, mapRS2chr, + mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) { + error = true; + } + gsl_matrix_free(W); + ns_total = indicator_snp.size(); + } + + // Read genotype file for multiple PLINK files. + if (!file_mbfile.empty()) { + igzstream infile(file_mbfile.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open mbfile file: " << file_mbfile << endl; + return; + } + + string file_name; + size_t t = 0, ns_test_tmp = 0; + gsl_matrix *W; + while (!safeGetline(infile, file_name).eof()) { + file_str = file_name + ".bim"; + + if (ReadFile_bim(file_str, snpInfo) == false) { + error = true; + } + + if (t == 0) { + + // If both fam file and pheno files are used, use + // phenotypes inside the pheno file. + if (!file_pheno.empty()) { + + // Phenotype file before genotype file. + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == + false) { + error = true; + } + } else { + file_str = file_name + ".fam"; + if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, + p_column) == false) { + error = true; + } + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + } + + file_str = file_name + ".bed"; + if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp, + snpInfo, maf_level, miss_level, hwe_level, r2_level, + ns_test_tmp) == false) { + error = true; + } + mindicator_snp.push_back(indicator_snp); + msnpInfo.push_back(snpInfo); + ns_test += ns_test_tmp; + ns_total += indicator_snp.size(); + + t++; + } + + gsl_matrix_free(W); + + infile.close(); + infile.clear(); + } + + // Read genotype and phenotype file for multiple BIMBAM files. + if (!file_mgeno.empty()) { + + // Annotation file before genotype file. + if (!file_anno.empty()) { + if (ReadFile_anno(file_anno, mapRS2chr, mapRS2bp, mapRS2cM) == false) { + error = true; + } + } + + // Phenotype file before genotype file. + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { + error = true; + } + + // Post-process covariates and phenotypes, obtain ni_test, + // save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + igzstream infile(file_mgeno.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open mgeno file: " << file_mgeno << endl; + return; + } + + string file_name; + size_t ns_test_tmp; + while (!safeGetline(infile, file_name).eof()) { + if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp, + maf_level, miss_level, hwe_level, r2_level, mapRS2chr, + mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) { + error = true; + } + + mindicator_snp.push_back(indicator_snp); + msnpInfo.push_back(snpInfo); + ns_test += ns_test_tmp; + ns_total += indicator_snp.size(); + } + + gsl_matrix_free(W); + + infile.close(); + infile.clear(); + } + + if (!file_gene.empty()) { + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { + error = true; + } + + // Convert indicator_pheno to indicator_idv. + int k = 1; + for (size_t i = 0; i < indicator_pheno.size(); i++) { + k = 1; + for (size_t j = 0; j < indicator_pheno[i].size(); j++) { + if (indicator_pheno[i][j] == 0) { + k = 0; + } + } + indicator_idv.push_back(k); + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + if (ReadFile_gene(file_gene, vec_read, snpInfo, ng_total) == false) { + error = true; + } + } + + // Read is after gene file. + if (!file_read.empty()) { + if (ReadFile_column(file_read, indicator_read, vec_read, 1) == false) { + error = true; + } + + ni_test = 0; + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_idv[i] *= indicator_read[i]; + ni_test += indicator_idv[i]; + } + + if (ni_test == 0) { + error = true; + cout << "error! number of analyzed individuals equals 0. " << endl; + return; + } + } + + // For ridge prediction, read phenotype only. + if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) { + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { + error = true; + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + } + return; } -void PARAM::CheckParam (void) { - struct stat fileInfo; - string str; - - // Check parameters. - if (k_mode!=1 && k_mode!=2) { - cout<<"error! unknown kinship/relatedness input mode: "<< - k_mode<<endl; - error=true; - } - if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 - && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && - a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 && - a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && - a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && - a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && - a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 && - a_mode!=71) { - cout<<"error! unknown analysis mode: "<<a_mode<< - ". make sure -gk or -eigen or -lmm or -bslmm -predict or " << - "-calccov is sepcified correctly."<<endl; - error=true; - } - if (miss_level>1) { - cout<<"error! missing level needs to be between 0 and 1. " << - "current value = "<<miss_level<<endl; - error=true; - } - if (maf_level>0.5) { - cout<<"error! maf level needs to be between 0 and 0.5. " << - "current value = "<<maf_level<<endl; - error=true; - } - if (hwe_level>1) { - cout<<"error! hwe level needs to be between 0 and 1. " << - "current value = "<<hwe_level<<endl; - error=true; - } - if (r2_level>1) { - cout<<"error! r2 level needs to be between 0 and 1. " << - "current value = "<<r2_level<<endl; - error=true; - } - - if (l_max<l_min) { - cout<<"error! maximum lambda value must be larger than the " << - "minimal value. current values = "<<l_max<<" and "<<l_min<<endl; - error=true; - } - if (h_max<h_min) { - cout<<"error! maximum h value must be larger than the minimal "<< - "value. current values = "<<h_max<<" and "<<h_min<<endl; - error=true; - } - if (s_max<s_min) { - cout<<"error! maximum s value must be larger than the minimal "<< - "value. current values = "<<s_max<<" and "<<s_min<<endl; - error=true; - } - if (rho_max<rho_min) { - cout<<"error! maximum rho value must be larger than the"<< - "minimal value. current values = "<<rho_max<<" and "<< - rho_min<<endl; - error=true; - } - if (logp_max<logp_min) { - cout<<"error! maximum logp value must be larger than the "<< - "minimal value. current values = "<<logp_max/log(10)<< - " and "<<logp_min/log(10)<<endl; - error=true; - } - - if (h_max>1) { - cout<<"error! h values must be bewtween 0 and 1. current "<< - "values = "<<h_max<<" and "<<h_min<<endl; - error=true; - } - if (rho_max>1) { - cout<<"error! rho values must be between 0 and 1. current "<< - "values = "<<rho_max<<" and "<<rho_min<<endl; - error=true; - } - if (logp_max>0) { - cout<<"error! maximum logp value must be smaller than 0. "<< - "current values = "<<logp_max/log(10)<<" and "<< - logp_min/log(10)<<endl; - error=true; - } - if (l_max<l_min) { - cout<<"error! maximum lambda value must be larger than the "<< - "minimal value. current values = "<<l_max<<" and "<<l_min<<endl; - error=true; - } - - if (h_scale>1.0) { - cout<<"error! hscale value must be between 0 and 1. "<< - "current value = "<<h_scale<<endl; - error=true; - } - if (rho_scale>1.0) { - cout<<"error! rscale value must be between 0 and 1. "<< - "current value = "<<rho_scale<<endl; - error=true; - } - if (logp_scale>1.0) { - cout<<"error! pscale value must be between 0 and 1. "<< - "current value = "<<logp_scale<<endl; - error=true; - } - - if (rho_max==1 && rho_min==1 && a_mode==12) { - cout<<"error! ridge regression does not support a rho "<< - "parameter. current values = "<<rho_max<<" and "<<rho_min<<endl; - error=true; - } - - if (window_cm<0) { - cout<<"error! windowcm values must be non-negative. "<< - "current values = "<<window_cm<<endl; - error=true; - } - - if (window_cm==0 && window_bp==0 && window_ns==0) { - window_bp=1000000; - } - - // Check p_column, and (no need to) sort p_column into - // ascending order. - if (p_column.size()==0) { - p_column.push_back(1); - } else { - for (size_t i=0; i<p_column.size(); i++) { - for (size_t j=0; j<i; j++) { - if (p_column[i]==p_column[j]) { - cout<<"error! identical phenotype "<< - "columns: "<<p_column[i]<<endl; - error= - true;} - } - } - } - - n_ph=p_column.size(); - - // Only LMM option (and one prediction option) can deal with - // multiple phenotypes and no gene expression files. - if (n_ph>1 && a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && - a_mode!=43) { - cout<<"error! the current analysis mode "<<a_mode<< - " can not deal with multiple phenotypes."<<endl; - error=true; - } - if (n_ph>1 && !file_gene.empty() ) { - cout<<"error! multiple phenotype analysis option not "<< - "allowed with gene expression files. "<<endl; - error=true; - } - - if (p_nr>1) { - cout<<"error! pnr value must be between 0 and 1. current value = "<< - p_nr<<endl; - error=true; - } - - //check est_column - if (est_column.size()==0) { - if (file_ebv.empty()) { - est_column.push_back(2); - est_column.push_back(5); - est_column.push_back(6); - est_column.push_back(7); - } else { - est_column.push_back(2); - est_column.push_back(0); - est_column.push_back(6); - est_column.push_back(7); - } - } - - if (est_column.size()!=4) { - cout<<"error! -en not followed by four numbers. current number = "<< - est_column.size()<<endl; - error=true; - } - if (est_column[0]==0) { - cout<<"error! -en rs column can not be zero. current number = "<< - est_column.size()<<endl; - error=true; - } - - // Check if files are compatible with each other, and if files exist. - if (!file_bfile.empty()) { - str=file_bfile+".bim"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .bim file: "<<str<<endl; - error=true; - } - str=file_bfile+".bed"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .bed file: "<<str<<endl; - error=true; - } - str=file_bfile+".fam"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .fam file: "<<str<<endl; - error=true; - } - } - - if (!file_oxford.empty()) { - str=file_oxford+".bgen"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .bgen file: "<<str<<endl; - error=true; - } - str=file_oxford+".sample"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .sample file: "<<str<<endl; - error=true; - } - } - - if ((!file_geno.empty() || !file_gene.empty()) ) { - str=file_pheno; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open phenotype file: "<<str<<endl; - error=true; - } - } - - str=file_geno; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mean genotype file: "<<str<<endl; - error=true; - } - - str=file_gene; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open gene expression file: "<<str<<endl; - error=true; - } - - str=file_cat; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open category file: "<<str<<endl; - error=true; - } - - str=file_mcat; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mcategory file: "<<str<<endl; - error=true; - } - - str=file_beta; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open beta file: "<<str<<endl; - error=true; - } - - str=file_cor; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open correlation file: "<<str<<endl; - error=true; - } - - if (!file_study.empty()) { - str=file_study+".Vq.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .Vq.txt file: "<<str<<endl; - error=true; - } - str=file_study+".q.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .q.txt file: "<<str<<endl; - error=true; - } - str=file_study+".size.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .size.txt file: "<<str<<endl; - error=true; - } - } - - if (!file_ref.empty()) { - str=file_ref+".S.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .S.txt file: "<<str<<endl; - error=true; - } - str=file_ref+".size.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .size.txt file: "<<str<<endl; - error=true; - } - } - - str=file_mstudy; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mstudy file: "<<str<<endl; - error=true; - } - - str=file_mref; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mref file: "<<str<<endl; - error=true; - } - - str=file_mgeno; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mgeno file: "<<str<<endl; - error=true; - } - - str=file_mbfile; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mbfile file: "<<str<<endl; - error=true; - } - - size_t flag=0; - if (!file_bfile.empty()) {flag++;} - if (!file_geno.empty()) {flag++;} - if (!file_gene.empty()) {flag++;} - - // WJA added. - if (!file_oxford.empty()) {flag++;} - - if (flag!=1 && a_mode!=15 && a_mode!=27 && a_mode!=28 && - a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62 && - a_mode!=63 && a_mode!=66 && a_mode!=67) { - cout<<"error! either plink binary files, or bimbam mean"<< - "genotype files, or gene expression files are required."<<endl; - error=true; - } - - if (file_pheno.empty() && (a_mode==43 || a_mode==5) ) { - cout<<"error! phenotype file is required."<<endl; error=true; - } - - if (a_mode==61 || a_mode==62) { - if (!file_beta.empty()) { - if ( file_mbfile.empty() && file_bfile.empty() && - file_mgeno.empty() && file_geno.empty() && - file_mref.empty() && file_ref.empty() ) { - cout<<"error! missing genotype file or ref/mref file."<<endl; - error=true; - } - } else if (!file_pheno.empty()) { - if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) && - file_mk.empty() ) { - cout<<"error! missing relatedness file. "<<endl; error=true; - } - } else if ( (file_mstudy.empty() && file_study.empty()) || - (file_mref.empty() && file_ref.empty() ) ) { - cout<<"error! either beta file, or phenotype files or "<< - "study/ref mstudy/mref files are required."<<endl; - error=true; - } - } - - - if (a_mode==63) { - if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) && - file_mk.empty() ) { - cout<<"error! missing relatedness file. "<<endl; error=true; - } - if ( file_pheno.empty() ) { - cout<<"error! missing phenotype file."<<endl; error=true; - } - } - - if (a_mode==66 || a_mode==67) { - if (file_beta.empty() || - (file_mbfile.empty() && file_bfile.empty() && - file_mgeno.empty() && file_geno.empty()) ) { - cout<<"error! missing beta file or genotype file."<<endl; - error=true; - } - } - - - if (!file_epm.empty() && file_bfile.empty() && file_geno.empty()) { - cout<<"error! estimated parameter file also requires genotype "<< - "file."<<endl; - error=true; - } - if (!file_ebv.empty() && file_kin.empty()) { - cout<<"error! estimated breeding value file also requires "<< - "relatedness file."<<endl; - error=true; - } - - if (!file_log.empty() && pheno_mean!=0) { - cout<<"error! either log file or mu value can be provide."<<endl; - error=true; - } - - str=file_snps; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open snps file: "<<str<<endl; - error=true; - } - - str=file_log; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open log file: "<<str<<endl; - error=true; - } - - str=file_anno; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open annotation file: "<<str<<endl; - error=true; - } - - str=file_kin; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open relatedness matrix file: "<<str<<endl; - error=true; - } - - str=file_mk; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open relatedness matrix file: "<<str<<endl; - error=true; - } - - str=file_cvt; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open covariates file: "<<str<<endl; - error=true; - } - - str=file_gxe; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open environmental covariate file: "<< - str<<endl; - error=true; - } - - str=file_weight; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open the residual weight file: "<<str<<endl; - error=true; - } - - str=file_epm; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open estimated parameter file: "<<str<<endl; - error=true; - } - - str=file_ebv; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open estimated breeding value file: "<< - str<<endl; - error=true; - } - - str=file_read; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open total read file: "<<str<<endl; - error=true; - } - - // Check if files are compatible with analysis mode. - if (k_mode==2 && !file_geno.empty() ) { - cout<<"error! use \"-km 1\" when using bimbam mean genotype "<< - "file. "<<endl; - error=true; - } - - if ((a_mode==1 || a_mode==2 || a_mode==3 || a_mode==4 || - a_mode==5 || a_mode==31) && - (file_kin.empty() && (file_ku.empty()||file_kd.empty()))) { - cout<<"error! missing relatedness file. "<<endl; - error=true; - } - - if ((a_mode==43) && file_kin.empty()) { - cout<<"error! missing relatedness file. -predict option requires "<< - "-k option to provide a relatedness file."<<endl; - error=true; - } - - if ((a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14 || - a_mode==16) && !file_cvt.empty()) { - cout<<"error! -bslmm option does not support covariates files."<< - endl; - error=true; - } - - if (a_mode==41 || a_mode==42) { - if (!file_cvt.empty() ) { - cout<<"error! -predict option does not support "<< - "covariates files."<<endl; - error=true; - } - if (file_epm.empty() ) { - cout<<"error! -predict option requires estimated "<< - "parameter files."<<endl; - error=true; - } - } - - if (file_beta.empty() && (a_mode==27 || a_mode==28) ) { - cout<<"error! beta effects file is required."<<endl; - error=true; - } - - return; +void PARAM::CheckParam(void) { + struct stat fileInfo; + string str; + + // Check parameters. + if (k_mode != 1 && k_mode != 2) { + cout << "error! unknown kinship/relatedness input mode: " << k_mode << endl; + error = true; + } + if (a_mode != 1 && a_mode != 2 && a_mode != 3 && a_mode != 4 && a_mode != 5 && + a_mode != 11 && a_mode != 12 && a_mode != 13 && a_mode != 14 && + a_mode != 15 && a_mode != 21 && a_mode != 22 && a_mode != 25 && + a_mode != 26 && a_mode != 27 && a_mode != 28 && a_mode != 31 && + a_mode != 41 && a_mode != 42 && a_mode != 43 && a_mode != 51 && + a_mode != 52 && a_mode != 53 && a_mode != 54 && a_mode != 61 && + a_mode != 62 && a_mode != 63 && a_mode != 66 && a_mode != 67 && + a_mode != 71) { + cout << "error! unknown analysis mode: " << a_mode + << ". make sure -gk or -eigen or -lmm or -bslmm -predict or " + << "-calccov is sepcified correctly." << endl; + error = true; + } + if (miss_level > 1) { + cout << "error! missing level needs to be between 0 and 1. " + << "current value = " << miss_level << endl; + error = true; + } + if (maf_level > 0.5) { + cout << "error! maf level needs to be between 0 and 0.5. " + << "current value = " << maf_level << endl; + error = true; + } + if (hwe_level > 1) { + cout << "error! hwe level needs to be between 0 and 1. " + << "current value = " << hwe_level << endl; + error = true; + } + if (r2_level > 1) { + cout << "error! r2 level needs to be between 0 and 1. " + << "current value = " << r2_level << endl; + error = true; + } + + if (l_max < l_min) { + cout << "error! maximum lambda value must be larger than the " + << "minimal value. current values = " << l_max << " and " << l_min + << endl; + error = true; + } + if (h_max < h_min) { + cout << "error! maximum h value must be larger than the minimal " + << "value. current values = " << h_max << " and " << h_min << endl; + error = true; + } + if (s_max < s_min) { + cout << "error! maximum s value must be larger than the minimal " + << "value. current values = " << s_max << " and " << s_min << endl; + error = true; + } + if (rho_max < rho_min) { + cout << "error! maximum rho value must be larger than the" + << "minimal value. current values = " << rho_max << " and " << rho_min + << endl; + error = true; + } + if (logp_max < logp_min) { + cout << "error! maximum logp value must be larger than the " + << "minimal value. current values = " << logp_max / log(10) << " and " + << logp_min / log(10) << endl; + error = true; + } + + if (h_max > 1) { + cout << "error! h values must be bewtween 0 and 1. current " + << "values = " << h_max << " and " << h_min << endl; + error = true; + } + if (rho_max > 1) { + cout << "error! rho values must be between 0 and 1. current " + << "values = " << rho_max << " and " << rho_min << endl; + error = true; + } + if (logp_max > 0) { + cout << "error! maximum logp value must be smaller than 0. " + << "current values = " << logp_max / log(10) << " and " + << logp_min / log(10) << endl; + error = true; + } + if (l_max < l_min) { + cout << "error! maximum lambda value must be larger than the " + << "minimal value. current values = " << l_max << " and " << l_min + << endl; + error = true; + } + + if (h_scale > 1.0) { + cout << "error! hscale value must be between 0 and 1. " + << "current value = " << h_scale << endl; + error = true; + } + if (rho_scale > 1.0) { + cout << "error! rscale value must be between 0 and 1. " + << "current value = " << rho_scale << endl; + error = true; + } + if (logp_scale > 1.0) { + cout << "error! pscale value must be between 0 and 1. " + << "current value = " << logp_scale << endl; + error = true; + } + + if (rho_max == 1 && rho_min == 1 && a_mode == 12) { + cout << "error! ridge regression does not support a rho " + << "parameter. current values = " << rho_max << " and " << rho_min + << endl; + error = true; + } + + if (window_cm < 0) { + cout << "error! windowcm values must be non-negative. " + << "current values = " << window_cm << endl; + error = true; + } + + if (window_cm == 0 && window_bp == 0 && window_ns == 0) { + window_bp = 1000000; + } + + // Check p_column, and (no need to) sort p_column into + // ascending order. + if (p_column.size() == 0) { + p_column.push_back(1); + } else { + for (size_t i = 0; i < p_column.size(); i++) { + for (size_t j = 0; j < i; j++) { + if (p_column[i] == p_column[j]) { + cout << "error! identical phenotype " + << "columns: " << p_column[i] << endl; + error = true; + } + } + } + } + + n_ph = p_column.size(); + + // Only LMM option (and one prediction option) can deal with + // multiple phenotypes and no gene expression files. + if (n_ph > 1 && a_mode != 1 && a_mode != 2 && a_mode != 3 && a_mode != 4 && + a_mode != 43) { + cout << "error! the current analysis mode " << a_mode + << " can not deal with multiple phenotypes." << endl; + error = true; + } + if (n_ph > 1 && !file_gene.empty()) { + cout << "error! multiple phenotype analysis option not " + << "allowed with gene expression files. " << endl; + error = true; + } + + if (p_nr > 1) { + cout << "error! pnr value must be between 0 and 1. current value = " << p_nr + << endl; + error = true; + } + + // check est_column + if (est_column.size() == 0) { + if (file_ebv.empty()) { + est_column.push_back(2); + est_column.push_back(5); + est_column.push_back(6); + est_column.push_back(7); + } else { + est_column.push_back(2); + est_column.push_back(0); + est_column.push_back(6); + est_column.push_back(7); + } + } + + if (est_column.size() != 4) { + cout << "error! -en not followed by four numbers. current number = " + << est_column.size() << endl; + error = true; + } + if (est_column[0] == 0) { + cout << "error! -en rs column can not be zero. current number = " + << est_column.size() << endl; + error = true; + } + + // Check if files are compatible with each other, and if files exist. + if (!file_bfile.empty()) { + str = file_bfile + ".bim"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .bim file: " << str << endl; + error = true; + } + str = file_bfile + ".bed"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .bed file: " << str << endl; + error = true; + } + str = file_bfile + ".fam"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .fam file: " << str << endl; + error = true; + } + } + + if (!file_oxford.empty()) { + str = file_oxford + ".bgen"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .bgen file: " << str << endl; + error = true; + } + str = file_oxford + ".sample"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .sample file: " << str << endl; + error = true; + } + } + + if ((!file_geno.empty() || !file_gene.empty())) { + str = file_pheno; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open phenotype file: " << str << endl; + error = true; + } + } + + str = file_geno; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mean genotype file: " << str << endl; + error = true; + } + + str = file_gene; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open gene expression file: " << str << endl; + error = true; + } + + str = file_cat; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open category file: " << str << endl; + error = true; + } + + str = file_mcat; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mcategory file: " << str << endl; + error = true; + } + + str = file_beta; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open beta file: " << str << endl; + error = true; + } + + str = file_cor; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open correlation file: " << str << endl; + error = true; + } + + if (!file_study.empty()) { + str = file_study + ".Vq.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .Vq.txt file: " << str << endl; + error = true; + } + str = file_study + ".q.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .q.txt file: " << str << endl; + error = true; + } + str = file_study + ".size.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .size.txt file: " << str << endl; + error = true; + } + } + + if (!file_ref.empty()) { + str = file_ref + ".S.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .S.txt file: " << str << endl; + error = true; + } + str = file_ref + ".size.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .size.txt file: " << str << endl; + error = true; + } + } + + str = file_mstudy; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mstudy file: " << str << endl; + error = true; + } + + str = file_mref; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mref file: " << str << endl; + error = true; + } + + str = file_mgeno; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mgeno file: " << str << endl; + error = true; + } + + str = file_mbfile; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mbfile file: " << str << endl; + error = true; + } + + size_t flag = 0; + if (!file_bfile.empty()) { + flag++; + } + if (!file_geno.empty()) { + flag++; + } + if (!file_gene.empty()) { + flag++; + } + + // WJA added. + if (!file_oxford.empty()) { + flag++; + } + + if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 && + a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 && + a_mode != 63 && a_mode != 66 && a_mode != 67) { + cout << "error! either plink binary files, or bimbam mean" + << "genotype files, or gene expression files are required." << endl; + error = true; + } + + if (file_pheno.empty() && (a_mode == 43 || a_mode == 5)) { + cout << "error! phenotype file is required." << endl; + error = true; + } + + if (a_mode == 61 || a_mode == 62) { + if (!file_beta.empty()) { + if (file_mbfile.empty() && file_bfile.empty() && file_mgeno.empty() && + file_geno.empty() && file_mref.empty() && file_ref.empty()) { + cout << "error! missing genotype file or ref/mref file." << endl; + error = true; + } + } else if (!file_pheno.empty()) { + if (file_kin.empty() && (file_ku.empty() || file_kd.empty()) && + file_mk.empty()) { + cout << "error! missing relatedness file. " << endl; + error = true; + } + } else if ((file_mstudy.empty() && file_study.empty()) || + (file_mref.empty() && file_ref.empty())) { + cout << "error! either beta file, or phenotype files or " + << "study/ref mstudy/mref files are required." << endl; + error = true; + } + } + + if (a_mode == 63) { + if (file_kin.empty() && (file_ku.empty() || file_kd.empty()) && + file_mk.empty()) { + cout << "error! missing relatedness file. " << endl; + error = true; + } + if (file_pheno.empty()) { + cout << "error! missing phenotype file." << endl; + error = true; + } + } + + if (a_mode == 66 || a_mode == 67) { + if (file_beta.empty() || (file_mbfile.empty() && file_bfile.empty() && + file_mgeno.empty() && file_geno.empty())) { + cout << "error! missing beta file or genotype file." << endl; + error = true; + } + } + + if (!file_epm.empty() && file_bfile.empty() && file_geno.empty()) { + cout << "error! estimated parameter file also requires genotype " + << "file." << endl; + error = true; + } + if (!file_ebv.empty() && file_kin.empty()) { + cout << "error! estimated breeding value file also requires " + << "relatedness file." << endl; + error = true; + } + + if (!file_log.empty() && pheno_mean != 0) { + cout << "error! either log file or mu value can be provide." << endl; + error = true; + } + + str = file_snps; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open snps file: " << str << endl; + error = true; + } + + str = file_log; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open log file: " << str << endl; + error = true; + } + + str = file_anno; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open annotation file: " << str << endl; + error = true; + } + + str = file_kin; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open relatedness matrix file: " << str << endl; + error = true; + } + + str = file_mk; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open relatedness matrix file: " << str << endl; + error = true; + } + + str = file_cvt; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open covariates file: " << str << endl; + error = true; + } + + str = file_gxe; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open environmental covariate file: " << str << endl; + error = true; + } + + str = file_weight; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open the residual weight file: " << str << endl; + error = true; + } + + str = file_epm; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open estimated parameter file: " << str << endl; + error = true; + } + + str = file_ebv; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open estimated breeding value file: " << str + << endl; + error = true; + } + + str = file_read; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open total read file: " << str << endl; + error = true; + } + + // Check if files are compatible with analysis mode. + if (k_mode == 2 && !file_geno.empty()) { + cout << "error! use \"-km 1\" when using bimbam mean genotype " + << "file. " << endl; + error = true; + } + + if ((a_mode == 1 || a_mode == 2 || a_mode == 3 || a_mode == 4 || + a_mode == 5 || a_mode == 31) && + (file_kin.empty() && (file_ku.empty() || file_kd.empty()))) { + cout << "error! missing relatedness file. " << endl; + error = true; + } + + if ((a_mode == 43) && file_kin.empty()) { + cout << "error! missing relatedness file. -predict option requires " + << "-k option to provide a relatedness file." << endl; + error = true; + } + + if ((a_mode == 11 || a_mode == 12 || a_mode == 13 || a_mode == 14 || + a_mode == 16) && + !file_cvt.empty()) { + cout << "error! -bslmm option does not support covariates files." << endl; + error = true; + } + + if (a_mode == 41 || a_mode == 42) { + if (!file_cvt.empty()) { + cout << "error! -predict option does not support " + << "covariates files." << endl; + error = true; + } + if (file_epm.empty()) { + cout << "error! -predict option requires estimated " + << "parameter files." << endl; + error = true; + } + } + + if (file_beta.empty() && (a_mode == 27 || a_mode == 28)) { + cout << "error! beta effects file is required." << endl; + error = true; + } + + return; } -void PARAM::CheckData (void) { +void PARAM::CheckData(void) { // WJA NOTE: I added this condition so that covariates can be added // through sample, probably not exactly what is wanted. - if(file_oxford.empty()) - { - if ((file_cvt).empty() || (indicator_cvt).size()==0) { - n_cvt=1; - } - } - - if ( (a_mode==66 || a_mode==67) && (v_pve.size()!=n_vc)) { - cout<<"error! the number of pve estimates does not equal to "<< - "the number of categories in the cat file:"<<v_pve.size()<<" "<< - n_vc<<endl; - error=true; - } - - if ( (indicator_cvt).size()!=0 && - (indicator_cvt).size()!=(indicator_idv).size()) { - error=true; - cout << "error! number of rows in the covariates file do not "<< - "match the number of individuals. "<<endl; + if (file_oxford.empty()) { + if ((file_cvt).empty() || (indicator_cvt).size() == 0) { + n_cvt = 1; + } + } + + if ((a_mode == 66 || a_mode == 67) && (v_pve.size() != n_vc)) { + cout << "error! the number of pve estimates does not equal to " + << "the number of categories in the cat file:" << v_pve.size() << " " + << n_vc << endl; + error = true; + } + + if ((indicator_cvt).size() != 0 && + (indicator_cvt).size() != (indicator_idv).size()) { + error = true; + cout << "error! number of rows in the covariates file do not " + << "match the number of individuals. " << endl; return; } - if ( (indicator_gxe).size()!=0 && (indicator_gxe).size() != - (indicator_idv).size()) { - error=true; - cout<<"error! number of rows in the gxe file do not match the number "<< - "of individuals. "<<endl; + if ((indicator_gxe).size() != 0 && + (indicator_gxe).size() != (indicator_idv).size()) { + error = true; + cout << "error! number of rows in the gxe file do not match the number " + << "of individuals. " << endl; return; } - if ( (indicator_weight).size()!=0 && - (indicator_weight).size()!=(indicator_idv).size()) { - error=true; - cout<<"error! number of rows in the weight file do not match "<< - "the number of individuals. "<<endl; + if ((indicator_weight).size() != 0 && + (indicator_weight).size() != (indicator_idv).size()) { + error = true; + cout << "error! number of rows in the weight file do not match " + << "the number of individuals. " << endl; return; } - if ( (indicator_read).size()!=0 && - (indicator_read).size()!=(indicator_idv).size()) { - error=true; - cout<<"error! number of rows in the total read file do not "<< - "match the number of individuals. "<<endl; + if ((indicator_read).size() != 0 && + (indicator_read).size() != (indicator_idv).size()) { + error = true; + cout << "error! number of rows in the total read file do not " + << "match the number of individuals. " << endl; return; } - // Calculate ni_total and ni_test, and set indicator_idv to 0 - // whenever indicator_cvt=0, and calculate np_obs and np_miss. - ni_total=(indicator_idv).size(); - - ni_test=0; - for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) { - if (indicator_idv[i]==0) {continue;} - ni_test++; - } - - ni_cvt=0; - for (size_t i=0; i<indicator_cvt.size(); i++) { - if (indicator_cvt[i]==0) {continue;} - ni_cvt++; - } - - np_obs=0; np_miss=0; - for (size_t i=0; i<indicator_pheno.size(); i++) { - if (indicator_cvt.size()!=0) { - if (indicator_cvt[i]==0) {continue;} - } - - if (indicator_gxe.size()!=0) { - if (indicator_gxe[i]==0) {continue;} - } - - if (indicator_weight.size()!=0) { - if (indicator_weight[i]==0) {continue;} - } - - for (size_t j=0; j<indicator_pheno[i].size(); j++) { - if (indicator_pheno[i][j]==0) { - np_miss++; - } else { - np_obs++; - } - } - } - - if (ni_test==0 && file_cor.empty() && file_mstudy.empty() && - file_study.empty() && file_beta.empty() && file_bf.empty() ) { - error=true; - cout<<"error! number of analyzed individuals equals 0. "<<endl; - return; - } - - if (a_mode==43) { - if (ni_cvt==ni_test) { - error=true; - cout<<"error! no individual has missing "<< - "phenotypes."<<endl; - return; - } - if ((np_obs+np_miss)!=(ni_cvt*n_ph)) { - error=true; - cout<<"error! number of phenotypes do not match the "<< - "summation of missing and observed phenotypes."<< - endl; - return; - } - } - - // Output some information. - if (file_cor.empty() && file_mstudy.empty() && file_study.empty() && - a_mode!=15 && a_mode!=27 && a_mode!=28) { - cout<<"## number of total individuals = "<<ni_total<<endl; - if (a_mode==43) { - cout<<"## number of analyzed individuals = "<<ni_cvt<<endl; - cout<<"## number of individuals with full phenotypes = "<< - ni_test<<endl; - } else { - cout<<"## number of analyzed individuals = "<<ni_test<<endl; - } - cout<<"## number of covariates = "<<n_cvt<<endl; - cout<<"## number of phenotypes = "<<n_ph<<endl; - if (a_mode==43) { - cout<<"## number of observed data = "<<np_obs<<endl; - cout<<"## number of missing data = "<<np_miss<<endl; - } - if (!file_gene.empty()) { - cout<<"## number of total genes = "<<ng_total<<endl; - } else if (file_epm.empty() && a_mode!=43 && a_mode!=5) { - cout<<"## number of total SNPs = "<<ns_total<<endl; - cout<<"## number of analyzed SNPs = "<<ns_test<<endl; - } else {} - } - - // Set d_pace to 1000 for gene expression. - if (!file_gene.empty() && d_pace==100000) { - d_pace=1000; - } - - // For case-control studies, count # cases and # controls. - int flag_cc=0; - if (a_mode==13) { - ni_case=0; - ni_control=0; - for (size_t i=0; i<indicator_idv.size(); i++) { - if (indicator_idv[i]==0) {continue;} - - if (pheno[i][0]==0) {ni_control++;} - else if (pheno[i][0]==1) {ni_case++;} - else {flag_cc=1;} - } - cout<<"## number of cases = "<<ni_case<<endl; - cout<<"## number of controls = "<<ni_control<<endl; - } - - if (flag_cc==1) {cout<<"Unexpected non-binary phenotypes for "<< - "case/control analysis. Use default (BSLMM) analysis instead."<< - endl; - a_mode=11; - } - - // Set parameters for BSLMM and check for predict. - if (a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14) { - if (a_mode==11) { - n_mh=1; - } - if (logp_min==0) { - logp_min=-1.0*log((double)ns_test); - } - - if (h_scale==-1) { - h_scale=min(1.0, 10.0/sqrt((double)ni_test) ); - } - if (rho_scale==-1) { - rho_scale=min(1.0, 10.0/sqrt((double)ni_test) ); - } - if (logp_scale==-1) { - logp_scale=min(1.0, 5.0/sqrt((double)ni_test) ); - } - - if (h_min==-1) {h_min=0.0;} - if (h_max==-1) {h_max=1.0;} - - if (s_max>ns_test) { - s_max=ns_test; - cout<<"s_max is re-set to the number of analyzed SNPs."<< - endl; - } - if (s_max<s_min) { - cout<<"error! maximum s value must be larger than the "<< - "minimal value. current values = "<<s_max<<" and "<< - s_min<<endl; - error=true; - } - } else if (a_mode==41 || a_mode==42) { - if (indicator_bv.size()!=0) { - if (indicator_idv.size()!=indicator_bv.size()) { - cout<<"error! number of rows in the "<< - "phenotype file does not match that in the "<< - "estimated breeding value file: "<< - indicator_idv.size()<<"\t"<<indicator_bv.size()<< - endl; - error=true; - } else { - size_t flag_bv=0; - for (size_t i=0; i<(indicator_bv).size(); ++i) { - if (indicator_idv[i]!=indicator_bv[i]) {flag_bv++;} - } - if (flag_bv!=0) { - cout<<"error! individuals with missing value in the "<< - "phenotype file does not match that in the "<< - "estimated breeding value file: "<<flag_bv<<endl; - error=true; - } - } - } - } - - if (a_mode==62 && !file_beta.empty() && mapRS2wcat.size()==0) { - cout<<"vc analysis with beta files requires -wcat file."<<endl; - error=true; - } - if (a_mode==67 && mapRS2wcat.size()==0) { - cout<<"ci analysis with beta files requires -wcat file."<<endl; - error=true; - } - - // File_mk needs to contain more than one line. - if (n_vc==1 && !file_mk.empty()) { - cout<<"error! -mk file should contain more than one line."<<endl; - error=true; - } - - return; -} + // Calculate ni_total and ni_test, and set indicator_idv to 0 + // whenever indicator_cvt=0, and calculate np_obs and np_miss. + ni_total = (indicator_idv).size(); + + ni_test = 0; + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ni_test++; + } + + ni_cvt = 0; + for (size_t i = 0; i < indicator_cvt.size(); i++) { + if (indicator_cvt[i] == 0) { + continue; + } + ni_cvt++; + } + + np_obs = 0; + np_miss = 0; + for (size_t i = 0; i < indicator_pheno.size(); i++) { + if (indicator_cvt.size() != 0) { + if (indicator_cvt[i] == 0) { + continue; + } + } + + if (indicator_gxe.size() != 0) { + if (indicator_gxe[i] == 0) { + continue; + } + } + + if (indicator_weight.size() != 0) { + if (indicator_weight[i] == 0) { + continue; + } + } + + for (size_t j = 0; j < indicator_pheno[i].size(); j++) { + if (indicator_pheno[i][j] == 0) { + np_miss++; + } else { + np_obs++; + } + } + } + + if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() && + file_study.empty() && file_beta.empty() && file_bf.empty()) { + error = true; + cout << "error! number of analyzed individuals equals 0. " << endl; + return; + } + + if (a_mode == 43) { + if (ni_cvt == ni_test) { + error = true; + cout << "error! no individual has missing " + << "phenotypes." << endl; + return; + } + if ((np_obs + np_miss) != (ni_cvt * n_ph)) { + error = true; + cout << "error! number of phenotypes do not match the " + << "summation of missing and observed phenotypes." << endl; + return; + } + } + + // Output some information. + if (file_cor.empty() && file_mstudy.empty() && file_study.empty() && + a_mode != 15 && a_mode != 27 && a_mode != 28) { + cout << "## number of total individuals = " << ni_total << endl; + if (a_mode == 43) { + cout << "## number of analyzed individuals = " << ni_cvt << endl; + cout << "## number of individuals with full phenotypes = " << ni_test + << endl; + } else { + cout << "## number of analyzed individuals = " << ni_test << endl; + } + cout << "## number of covariates = " << n_cvt << endl; + cout << "## number of phenotypes = " << n_ph << endl; + if (a_mode == 43) { + cout << "## number of observed data = " << np_obs << endl; + cout << "## number of missing data = " << np_miss << endl; + } + if (!file_gene.empty()) { + cout << "## number of total genes = " << ng_total << endl; + } else if (file_epm.empty() && a_mode != 43 && a_mode != 5) { + cout << "## number of total SNPs = " << ns_total << endl; + cout << "## number of analyzed SNPs = " << ns_test << endl; + } else { + } + } + + // Set d_pace to 1000 for gene expression. + if (!file_gene.empty() && d_pace == 100000) { + d_pace = 1000; + } + + // For case-control studies, count # cases and # controls. + int flag_cc = 0; + if (a_mode == 13) { + ni_case = 0; + ni_control = 0; + for (size_t i = 0; i < indicator_idv.size(); i++) { + if (indicator_idv[i] == 0) { + continue; + } + + if (pheno[i][0] == 0) { + ni_control++; + } else if (pheno[i][0] == 1) { + ni_case++; + } else { + flag_cc = 1; + } + } + cout << "## number of cases = " << ni_case << endl; + cout << "## number of controls = " << ni_control << endl; + } + + if (flag_cc == 1) { + cout << "Unexpected non-binary phenotypes for " + << "case/control analysis. Use default (BSLMM) analysis instead." + << endl; + a_mode = 11; + } + + // Set parameters for BSLMM and check for predict. + if (a_mode == 11 || a_mode == 12 || a_mode == 13 || a_mode == 14) { + if (a_mode == 11) { + n_mh = 1; + } + if (logp_min == 0) { + logp_min = -1.0 * log((double)ns_test); + } + + if (h_scale == -1) { + h_scale = min(1.0, 10.0 / sqrt((double)ni_test)); + } + if (rho_scale == -1) { + rho_scale = min(1.0, 10.0 / sqrt((double)ni_test)); + } + if (logp_scale == -1) { + logp_scale = min(1.0, 5.0 / sqrt((double)ni_test)); + } + + if (h_min == -1) { + h_min = 0.0; + } + if (h_max == -1) { + h_max = 1.0; + } + + if (s_max > ns_test) { + s_max = ns_test; + cout << "s_max is re-set to the number of analyzed SNPs." << endl; + } + if (s_max < s_min) { + cout << "error! maximum s value must be larger than the " + << "minimal value. current values = " << s_max << " and " << s_min + << endl; + error = true; + } + } else if (a_mode == 41 || a_mode == 42) { + if (indicator_bv.size() != 0) { + if (indicator_idv.size() != indicator_bv.size()) { + cout << "error! number of rows in the " + << "phenotype file does not match that in the " + << "estimated breeding value file: " << indicator_idv.size() + << "\t" << indicator_bv.size() << endl; + error = true; + } else { + size_t flag_bv = 0; + for (size_t i = 0; i < (indicator_bv).size(); ++i) { + if (indicator_idv[i] != indicator_bv[i]) { + flag_bv++; + } + } + if (flag_bv != 0) { + cout << "error! individuals with missing value in the " + << "phenotype file does not match that in the " + << "estimated breeding value file: " << flag_bv << endl; + error = true; + } + } + } + } -void PARAM::PrintSummary () { - if (n_ph==1) { - cout<<"pve estimate ="<<pve_null<<endl; - cout<<"se(pve) ="<<pve_se_null<<endl; - } else { + if (a_mode == 62 && !file_beta.empty() && mapRS2wcat.size() == 0) { + cout << "vc analysis with beta files requires -wcat file." << endl; + error = true; + } + if (a_mode == 67 && mapRS2wcat.size() == 0) { + cout << "ci analysis with beta files requires -wcat file." << endl; + error = true; + } + + // File_mk needs to contain more than one line. + if (n_vc == 1 && !file_mk.empty()) { + cout << "error! -mk file should contain more than one line." << endl; + error = true; + } + + return; +} - } - return; +void PARAM::PrintSummary() { + if (n_ph == 1) { + cout << "pve estimate =" << pve_null << endl; + cout << "se(pve) =" << pve_se_null << endl; + } else { + } + return; } -void PARAM::ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { - string file_str; - - if (!file_bfile.empty()) { - file_str=file_bfile+".bed"; - if (ReadFile_bed (file_str, indicator_idv, indicator_snp, - UtX, K, calc_K)==false) { - error=true; - } - } - else { - if (ReadFile_geno (file_geno, indicator_idv, indicator_snp, - UtX, K, calc_K)==false) { - error=true; - } - } - - return; +void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { + string file_str; + + if (!file_bfile.empty()) { + file_str = file_bfile + ".bed"; + if (ReadFile_bed(file_str, indicator_idv, indicator_snp, UtX, K, calc_K) == + false) { + error = true; + } + } else { + if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K, + calc_K) == false) { + error = true; + } + } + + return; } -void PARAM::ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, - const bool calc_K) { - string file_str; - - if (!file_bfile.empty()) { - file_str=file_bfile+".bed"; - if (ReadFile_bed (file_str, indicator_idv, indicator_snp, - Xt, K, calc_K, ni_test, ns_test)==false) { - error=true; - } - } else { - if (ReadFile_geno (file_geno, indicator_idv, indicator_snp, - Xt, K, calc_K, ni_test, ns_test)==false) { - error=true; - } - } - - return; +void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K, + const bool calc_K) { + string file_str; + + if (!file_bfile.empty()) { + file_str = file_bfile + ".bed"; + if (ReadFile_bed(file_str, indicator_idv, indicator_snp, Xt, K, calc_K, + ni_test, ns_test) == false) { + error = true; + } + } else { + if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K, + ni_test, ns_test) == false) { + error = true; + } + } + + return; } -void PARAM::CalcKin (gsl_matrix *matrix_kin) { - string file_str; - - gsl_matrix_set_zero (matrix_kin); - - if (!file_bfile.empty() ) { - file_str=file_bfile+".bed"; - if (PlinkKin (file_str, indicator_snp, a_mode-20, d_pace, - matrix_kin)==false) { - error=true; - } - } - else if (!file_oxford.empty() ) { - file_str=file_oxford+".bgen"; - if (bgenKin (file_str, indicator_snp, a_mode-20, d_pace, - matrix_kin)==false) { - error=true; - } - } - else { - file_str=file_geno; - if (BimbamKin (file_str, indicator_snp, a_mode-20, d_pace, - matrix_kin)==false) { - error=true; - } - } - - return; +void PARAM::CalcKin(gsl_matrix *matrix_kin) { + string file_str; + + gsl_matrix_set_zero(matrix_kin); + + if (!file_bfile.empty()) { + file_str = file_bfile + ".bed"; + if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == + false) { + error = true; + } + } else if (!file_oxford.empty()) { + file_str = file_oxford + ".bgen"; + if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == + false) { + error = true; + } + } else { + file_str = file_geno; + if (BimbamKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == + false) { + error = true; + } + } + + return; } // From an existing n by nd A and K matrices, compute the d by d S // matrix (which is not necessary symmetric). -void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt, - gsl_matrix *S) { - size_t n_vc=S->size1, ni_test=A->size1; +void compAKtoS(const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt, + gsl_matrix *S) { + size_t n_vc = S->size1, ni_test = A->size1; double di, dj, tr_AK, sum_A, sum_K, s_A, s_K, sum_AK, tr_A, tr_K, d; - for (size_t i=0; i<n_vc; i++) { - for (size_t j=0; j<n_vc; j++) { - tr_AK=0; sum_A=0; sum_K=0; sum_AK=0; tr_A=0; tr_K=0; - for (size_t l=0; l<ni_test; l++) { - s_A=0; s_K=0; - for (size_t k=0; k<ni_test; k++) { - di=gsl_matrix_get(A, l, k+ni_test*i); - dj=gsl_matrix_get(K, l, k+ni_test*j); - s_A+=di; s_K+=dj; - - tr_AK+=di*dj; sum_A+=di; sum_K+=dj; - if (l==k) {tr_A+=di; tr_K+=dj;} - } - sum_AK+=s_A*s_K; - } - - sum_A/=(double)ni_test; - sum_K/=(double)ni_test; - sum_AK/=(double)ni_test; - tr_A-=sum_A; - tr_K-=sum_K; - d=tr_AK-2*sum_AK+sum_A*sum_K; - - if (tr_A==0 || tr_K==0) { - d=0; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = 0; j < n_vc; j++) { + tr_AK = 0; + sum_A = 0; + sum_K = 0; + sum_AK = 0; + tr_A = 0; + tr_K = 0; + for (size_t l = 0; l < ni_test; l++) { + s_A = 0; + s_K = 0; + for (size_t k = 0; k < ni_test; k++) { + di = gsl_matrix_get(A, l, k + ni_test * i); + dj = gsl_matrix_get(K, l, k + ni_test * j); + s_A += di; + s_K += dj; + + tr_AK += di * dj; + sum_A += di; + sum_K += dj; + if (l == k) { + tr_A += di; + tr_K += dj; + } + } + sum_AK += s_A * s_K; + } + + sum_A /= (double)ni_test; + sum_K /= (double)ni_test; + sum_AK /= (double)ni_test; + tr_A -= sum_A; + tr_K -= sum_K; + d = tr_AK - 2 * sum_AK + sum_A * sum_K; + + if (tr_A == 0 || tr_K == 0) { + d = 0; } else { - d=d/(tr_A*tr_K)-1/(double)(ni_test-n_cvt); + d = d / (tr_A * tr_K) - 1 / (double)(ni_test - n_cvt); } - gsl_matrix_set (S, i, j, d); + gsl_matrix_set(S, i, j, d); } } @@ -1340,187 +1375,195 @@ void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt, // Copied from lmm.cpp; is used in the following function compKtoV // map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1 -size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) { - if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) { - cout<<"error in GetabIndex."<<endl; - return 0; - } - size_t index; - size_t l, h; - if (b>a) {l=a; h=b;} else {l=b; h=a;} - - size_t n=n_cvt+2; - index=(2*n-l+2)*(l-1)/2+h-l; - - return index; +size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt) { + if (a > n_cvt + 2 || b > n_cvt + 2 || a <= 0 || b <= 0) { + cout << "error in GetabIndex." << endl; + return 0; + } + size_t index; + size_t l, h; + if (b > a) { + l = a; + h = b; + } else { + l = b; + h = a; + } + + size_t n = n_cvt + 2; + index = (2 * n - l + 2) * (l - 1) / 2 + h - l; + + return index; } // From an existing n by nd (centered) G matrix, compute the d+1 by // d*(d-1)/2*(d+1) Q matrix where inside i'th d+1 by d+1 matrix, each // element is tr(KiKlKjKm)-r*tr(KmKiKl)-r*tr(KlKjKm)+r^2*tr(KlKm), // where r=n/(n-1) -void compKtoV (const gsl_matrix *G, gsl_matrix *V) { - size_t n_vc=G->size2/G->size1, ni_test=G->size1; +void compKtoV(const gsl_matrix *G, gsl_matrix *V) { + size_t n_vc = G->size2 / G->size1, ni_test = G->size1; - gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, (n_vc*(n_vc+1))/2*ni_test); - gsl_vector *trKiKj=gsl_vector_alloc( n_vc*(n_vc+1)/2 ); - gsl_vector *trKi=gsl_vector_alloc(n_vc); + gsl_matrix *KiKj = + gsl_matrix_alloc(ni_test, (n_vc * (n_vc + 1)) / 2 * ni_test); + gsl_vector *trKiKj = gsl_vector_alloc(n_vc * (n_vc + 1) / 2); + gsl_vector *trKi = gsl_vector_alloc(n_vc); - double d, tr, r=(double)ni_test/(double)(ni_test-1); + double d, tr, r = (double)ni_test / (double)(ni_test - 1); size_t t, t_il, t_jm, t_lm, t_im, t_jl, t_ij; // Compute KiKj for all pairs of i and j (not including the identity // matrix). - t=0; - for (size_t i=0; i<n_vc; i++) { - gsl_matrix_const_view Ki= - gsl_matrix_const_submatrix(G, 0, i*ni_test, ni_test, ni_test); - for (size_t j=i; j<n_vc; j++) { - gsl_matrix_const_view Kj= - gsl_matrix_const_submatrix(G, 0, j*ni_test, ni_test, ni_test); - gsl_matrix_view KiKj_sub= - gsl_matrix_submatrix (KiKj, 0, t*ni_test, ni_test, ni_test); - eigenlib_dgemm ("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0, - &KiKj_sub.matrix); + t = 0; + for (size_t i = 0; i < n_vc; i++) { + gsl_matrix_const_view Ki = + gsl_matrix_const_submatrix(G, 0, i * ni_test, ni_test, ni_test); + for (size_t j = i; j < n_vc; j++) { + gsl_matrix_const_view Kj = + gsl_matrix_const_submatrix(G, 0, j * ni_test, ni_test, ni_test); + gsl_matrix_view KiKj_sub = + gsl_matrix_submatrix(KiKj, 0, t * ni_test, ni_test, ni_test); + eigenlib_dgemm("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0, + &KiKj_sub.matrix); t++; } } // Compute trKi, trKiKj. - t=0; - for (size_t i=0; i<n_vc; i++) { - for (size_t j=i; j<n_vc; j++) { - tr=0; - for (size_t k=0; k<ni_test; k++) { - tr+=gsl_matrix_get (KiKj, k, t*ni_test+k); + t = 0; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = i; j < n_vc; j++) { + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + tr += gsl_matrix_get(KiKj, k, t * ni_test + k); } - gsl_vector_set (trKiKj, t, tr); + gsl_vector_set(trKiKj, t, tr); t++; } - tr=0; - for (size_t k=0; k<ni_test; k++) { - tr+=gsl_matrix_get (G, k, i*ni_test+k); + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + tr += gsl_matrix_get(G, k, i * ni_test + k); } - gsl_vector_set (trKi, i, tr); + gsl_vector_set(trKi, i, tr); } // Compute V. - for (size_t i=0; i<n_vc; i++) { - for (size_t j=i; j<n_vc; j++) { - t_ij=GetabIndex (i+1, j+1, n_vc-2); - for (size_t l=0; l<n_vc+1; l++) { - for (size_t m=0; m<n_vc+1; m++) { - if (l!=n_vc && m!=n_vc) { - t_il=GetabIndex (i+1, l+1, n_vc-2); - t_jm=GetabIndex (j+1, m+1, n_vc-2); - t_lm=GetabIndex (l+1, m+1, n_vc-2); - tr=0; - for (size_t k=0; k<ni_test; k++) { - gsl_vector_const_view KiKl_row= - gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test); - gsl_vector_const_view KiKl_col= - gsl_matrix_const_column (KiKj, t_il*ni_test+k); - gsl_vector_const_view KjKm_row= - gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test); - gsl_vector_const_view KjKm_col= - gsl_matrix_const_column (KiKj, t_jm*ni_test+k); - - gsl_vector_const_view Kl_row= - gsl_matrix_const_subrow (G, k, l*ni_test, ni_test); - gsl_vector_const_view Km_row= - gsl_matrix_const_subrow (G, k, m*ni_test, ni_test); - - if (i<=l && j<=m) { - gsl_blas_ddot (&KiKl_row.vector, &KjKm_col.vector, &d); - tr+=d; - gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d); - tr-=r*d; - } else if (i<=l && j>m) { - gsl_blas_ddot (&KiKl_row.vector, &KjKm_row.vector, &d); - tr+=d; - gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d); - tr-=r*d; - } else if (i>l && j<=m) { - gsl_blas_ddot (&KiKl_col.vector, &KjKm_col.vector, &d); - tr+=d; - gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d); - tr-=r*d; - } else { - gsl_blas_ddot (&KiKl_col.vector, &KjKm_row.vector, &d); - tr+=d; - gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d); - tr-=r*d; - } - } - - tr+=r*r*gsl_vector_get (trKiKj, t_lm); - } else if (l!=n_vc && m==n_vc) { - t_il=GetabIndex (i+1, l+1, n_vc-2); - t_jl=GetabIndex (j+1, l+1, n_vc-2); - tr=0; - for (size_t k=0; k<ni_test; k++) { - gsl_vector_const_view KiKl_row= - gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test); - gsl_vector_const_view KiKl_col= - gsl_matrix_const_column (KiKj, t_il*ni_test+k); - gsl_vector_const_view Kj_row= - gsl_matrix_const_subrow (G, k, j*ni_test, ni_test); - - if (i<=l) { - gsl_blas_ddot (&KiKl_row.vector, &Kj_row.vector, &d); - tr+=d; - } else { - gsl_blas_ddot (&KiKl_col.vector, &Kj_row.vector, &d); - tr+=d; - } - } - tr+=-r*gsl_vector_get (trKiKj, t_il) - - r*gsl_vector_get (trKiKj, t_jl)+r*r*gsl_vector_get (trKi, l); - } else if (l==n_vc && m!=n_vc) { - t_jm=GetabIndex (j+1, m+1, n_vc-2); - t_im=GetabIndex (i+1, m+1, n_vc-2); - tr=0; - for (size_t k=0; k<ni_test; k++) { - gsl_vector_const_view KjKm_row= - gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test); - gsl_vector_const_view KjKm_col= - gsl_matrix_const_column (KiKj, t_jm*ni_test+k); - gsl_vector_const_view Ki_row= - gsl_matrix_const_subrow (G, k, i*ni_test, ni_test); - - if (j<=m) { - gsl_blas_ddot (&KjKm_row.vector, &Ki_row.vector, &d); - tr+=d; - } else { - gsl_blas_ddot (&KjKm_col.vector, &Ki_row.vector, &d); - tr+=d; - } - } - tr+=-r*gsl_vector_get (trKiKj, t_im) - - r*gsl_vector_get (trKiKj, t_jm)+r*r*gsl_vector_get (trKi, m); - } else { - tr=gsl_vector_get (trKiKj, t_ij) - - r*gsl_vector_get (trKi, i) - - r*gsl_vector_get (trKi, j)+r*r*(double)(ni_test-1); - } - - gsl_matrix_set (V, l, t_ij*(n_vc+1)+m, tr); - } - } - } - } - - gsl_matrix_scale (V, 1.0/pow((double)ni_test, 2) ); + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = i; j < n_vc; j++) { + t_ij = GetabIndex(i + 1, j + 1, n_vc - 2); + for (size_t l = 0; l < n_vc + 1; l++) { + for (size_t m = 0; m < n_vc + 1; m++) { + if (l != n_vc && m != n_vc) { + t_il = GetabIndex(i + 1, l + 1, n_vc - 2); + t_jm = GetabIndex(j + 1, m + 1, n_vc - 2); + t_lm = GetabIndex(l + 1, m + 1, n_vc - 2); + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + gsl_vector_const_view KiKl_row = + gsl_matrix_const_subrow(KiKj, k, t_il * ni_test, ni_test); + gsl_vector_const_view KiKl_col = + gsl_matrix_const_column(KiKj, t_il * ni_test + k); + gsl_vector_const_view KjKm_row = + gsl_matrix_const_subrow(KiKj, k, t_jm * ni_test, ni_test); + gsl_vector_const_view KjKm_col = + gsl_matrix_const_column(KiKj, t_jm * ni_test + k); + + gsl_vector_const_view Kl_row = + gsl_matrix_const_subrow(G, k, l * ni_test, ni_test); + gsl_vector_const_view Km_row = + gsl_matrix_const_subrow(G, k, m * ni_test, ni_test); + + if (i <= l && j <= m) { + gsl_blas_ddot(&KiKl_row.vector, &KjKm_col.vector, &d); + tr += d; + gsl_blas_ddot(&Km_row.vector, &KiKl_col.vector, &d); + tr -= r * d; + gsl_blas_ddot(&Kl_row.vector, &KjKm_col.vector, &d); + tr -= r * d; + } else if (i <= l && j > m) { + gsl_blas_ddot(&KiKl_row.vector, &KjKm_row.vector, &d); + tr += d; + gsl_blas_ddot(&Km_row.vector, &KiKl_col.vector, &d); + tr -= r * d; + gsl_blas_ddot(&Kl_row.vector, &KjKm_row.vector, &d); + tr -= r * d; + } else if (i > l && j <= m) { + gsl_blas_ddot(&KiKl_col.vector, &KjKm_col.vector, &d); + tr += d; + gsl_blas_ddot(&Km_row.vector, &KiKl_row.vector, &d); + tr -= r * d; + gsl_blas_ddot(&Kl_row.vector, &KjKm_col.vector, &d); + tr -= r * d; + } else { + gsl_blas_ddot(&KiKl_col.vector, &KjKm_row.vector, &d); + tr += d; + gsl_blas_ddot(&Km_row.vector, &KiKl_row.vector, &d); + tr -= r * d; + gsl_blas_ddot(&Kl_row.vector, &KjKm_row.vector, &d); + tr -= r * d; + } + } + + tr += r * r * gsl_vector_get(trKiKj, t_lm); + } else if (l != n_vc && m == n_vc) { + t_il = GetabIndex(i + 1, l + 1, n_vc - 2); + t_jl = GetabIndex(j + 1, l + 1, n_vc - 2); + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + gsl_vector_const_view KiKl_row = + gsl_matrix_const_subrow(KiKj, k, t_il * ni_test, ni_test); + gsl_vector_const_view KiKl_col = + gsl_matrix_const_column(KiKj, t_il * ni_test + k); + gsl_vector_const_view Kj_row = + gsl_matrix_const_subrow(G, k, j * ni_test, ni_test); + + if (i <= l) { + gsl_blas_ddot(&KiKl_row.vector, &Kj_row.vector, &d); + tr += d; + } else { + gsl_blas_ddot(&KiKl_col.vector, &Kj_row.vector, &d); + tr += d; + } + } + tr += -r * gsl_vector_get(trKiKj, t_il) - + r * gsl_vector_get(trKiKj, t_jl) + + r * r * gsl_vector_get(trKi, l); + } else if (l == n_vc && m != n_vc) { + t_jm = GetabIndex(j + 1, m + 1, n_vc - 2); + t_im = GetabIndex(i + 1, m + 1, n_vc - 2); + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + gsl_vector_const_view KjKm_row = + gsl_matrix_const_subrow(KiKj, k, t_jm * ni_test, ni_test); + gsl_vector_const_view KjKm_col = + gsl_matrix_const_column(KiKj, t_jm * ni_test + k); + gsl_vector_const_view Ki_row = + gsl_matrix_const_subrow(G, k, i * ni_test, ni_test); + + if (j <= m) { + gsl_blas_ddot(&KjKm_row.vector, &Ki_row.vector, &d); + tr += d; + } else { + gsl_blas_ddot(&KjKm_col.vector, &Ki_row.vector, &d); + tr += d; + } + } + tr += -r * gsl_vector_get(trKiKj, t_im) - + r * gsl_vector_get(trKiKj, t_jm) + + r * r * gsl_vector_get(trKi, m); + } else { + tr = gsl_vector_get(trKiKj, t_ij) - r * gsl_vector_get(trKi, i) - + r * gsl_vector_get(trKi, j) + r * r * (double)(ni_test - 1); + } + + gsl_matrix_set(V, l, t_ij * (n_vc + 1) + m, tr); + } + } + } + } + + gsl_matrix_scale(V, 1.0 / pow((double)ni_test, 2)); gsl_matrix_free(KiKj); gsl_vector_free(trKiKj); @@ -1530,21 +1573,21 @@ void compKtoV (const gsl_matrix *G, gsl_matrix *V) { } // Perform Jacknife sampling for variance of S. -void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A, - const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) { - size_t n_vc=Svar->size1, ni_test=A->size1, n_cvt=W->size2; +void JackknifeAKtoS(const gsl_matrix *W, const gsl_matrix *A, + const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) { + size_t n_vc = Svar->size1, ni_test = A->size1, n_cvt = W->size2; - vector<vector<vector<double> > > trAK, sumAK; - vector<vector<double> > sumA, sumK, trA, trK, sA, sK; + vector<vector<vector<double>>> trAK, sumAK; + vector<vector<double>> sumA, sumK, trA, trK, sA, sK; vector<double> vec_tmp; double di, dj, d, m, v; // Initialize and set all elements to zero. - for (size_t i=0; i<ni_test; i++) { + for (size_t i = 0; i < ni_test; i++) { vec_tmp.push_back(0); } - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { sumA.push_back(vec_tmp); sumK.push_back(vec_tmp); trA.push_back(vec_tmp); @@ -1553,82 +1596,93 @@ void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A, sK.push_back(vec_tmp); } - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { trAK.push_back(sumK); sumAK.push_back(sumK); } // Run jackknife. - for (size_t i=0; i<n_vc; i++) { - for (size_t l=0; l<ni_test; l++) { - for (size_t k=0; k<ni_test; k++) { - di=gsl_matrix_get(A, l, k+ni_test*i); - dj=gsl_matrix_get(K, l, k+ni_test*i); - - for (size_t t=0; t<ni_test; t++) { - if (t==l || t==k) {continue;} - sumA[i][t]+=di; - sumK[i][t]+=dj; - if (l==k) {trA[i][t]+=di; trK[i][t]+=dj;} - } - sA[i][l]+=di; - sK[i][l]+=dj; + for (size_t i = 0; i < n_vc; i++) { + for (size_t l = 0; l < ni_test; l++) { + for (size_t k = 0; k < ni_test; k++) { + di = gsl_matrix_get(A, l, k + ni_test * i); + dj = gsl_matrix_get(K, l, k + ni_test * i); + + for (size_t t = 0; t < ni_test; t++) { + if (t == l || t == k) { + continue; + } + sumA[i][t] += di; + sumK[i][t] += dj; + if (l == k) { + trA[i][t] += di; + trK[i][t] += dj; + } + } + sA[i][l] += di; + sK[i][l] += dj; } } - for (size_t t=0; t<ni_test; t++) { - sumA[i][t]/=(double)(ni_test-1); - sumK[i][t]/=(double)(ni_test-1); + for (size_t t = 0; t < ni_test; t++) { + sumA[i][t] /= (double)(ni_test - 1); + sumK[i][t] /= (double)(ni_test - 1); } } - for (size_t i=0; i<n_vc; i++) { - for (size_t j=0; j<n_vc; j++) { - for (size_t l=0; l<ni_test; l++) { - for (size_t k=0; k<ni_test; k++) { - di=gsl_matrix_get(A, l, k+ni_test*i); - dj=gsl_matrix_get(K, l, k+ni_test*j); - d=di*dj; - - for (size_t t=0; t<ni_test; t++) { - if (t==l || t==k) {continue;} - trAK[i][j][t]+=d; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = 0; j < n_vc; j++) { + for (size_t l = 0; l < ni_test; l++) { + for (size_t k = 0; k < ni_test; k++) { + di = gsl_matrix_get(A, l, k + ni_test * i); + dj = gsl_matrix_get(K, l, k + ni_test * j); + d = di * dj; + + for (size_t t = 0; t < ni_test; t++) { + if (t == l || t == k) { + continue; + } + trAK[i][j][t] += d; } - } + } - for (size_t t=0; t<ni_test; t++) { - if (t==l) {continue;} - di=gsl_matrix_get(A, l, t+ni_test*i); - dj=gsl_matrix_get(K, l, t+ni_test*j); + for (size_t t = 0; t < ni_test; t++) { + if (t == l) { + continue; + } + di = gsl_matrix_get(A, l, t + ni_test * i); + dj = gsl_matrix_get(K, l, t + ni_test * j); - sumAK[i][j][t]+=(sA[i][l]-di)*(sK[j][l]-dj); - } + sumAK[i][j][t] += (sA[i][l] - di) * (sK[j][l] - dj); + } } - for (size_t t=0; t<ni_test; t++) { - sumAK[i][j][t]/=(double)(ni_test-1); + for (size_t t = 0; t < ni_test; t++) { + sumAK[i][j][t] /= (double)(ni_test - 1); } - m=0; v=0; - for (size_t t=0; t<ni_test; t++) { - d=trAK[i][j][t]-2*sumAK[i][j][t]+sumA[i][t]*sumK[j][t]; - if ( (trA[i][t]-sumA[i][t])==0 || (trK[j][t]-sumK[j][t])==0) { - d=0; - } else { - d/=(trA[i][t]-sumA[i][t])*(trK[j][t]-sumK[j][t]); - d-=1/(double)(ni_test-n_cvt-1); - } - m+=d; v+=d*d; + m = 0; + v = 0; + for (size_t t = 0; t < ni_test; t++) { + d = trAK[i][j][t] - 2 * sumAK[i][j][t] + sumA[i][t] * sumK[j][t]; + if ((trA[i][t] - sumA[i][t]) == 0 || (trK[j][t] - sumK[j][t]) == 0) { + d = 0; + } else { + d /= (trA[i][t] - sumA[i][t]) * (trK[j][t] - sumK[j][t]); + d -= 1 / (double)(ni_test - n_cvt - 1); + } + m += d; + v += d * d; } - m/=(double)ni_test; - v/=(double)ni_test; - v-=m*m; - v*=(double)(ni_test-1); - gsl_matrix_set (Svar, i, j, v); - if (n_cvt==1) { - d=gsl_matrix_get (S, i, j); - d=(double)ni_test*d-(double)(ni_test-1)*m; - gsl_matrix_set (S, i, j, d); + m /= (double)ni_test; + v /= (double)ni_test; + v -= m * m; + v *= (double)(ni_test - 1); + gsl_matrix_set(Svar, i, j, v); + if (n_cvt == 1) { + d = gsl_matrix_get(S, i, j); + d = (double)ni_test * d - (double)(ni_test - 1) * m; + gsl_matrix_set(S, i, j, d); } } } @@ -1638,561 +1692,590 @@ void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A, // Compute the d by d S matrix with its d by d variance matrix of // Svar, and the d+1 by d(d+1) matrix of Q for V(q). -void PARAM::CalcS (const map<string, double> &mapRS2wA, - const map<string, double> &mapRS2wK, - const gsl_matrix *W, gsl_matrix *A, - gsl_matrix *K, gsl_matrix *S, - gsl_matrix *Svar, gsl_vector *ns) { +void PARAM::CalcS(const map<string, double> &mapRS2wA, + const map<string, double> &mapRS2wK, const gsl_matrix *W, + gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar, + gsl_vector *ns) { string file_str; - gsl_matrix_set_zero (S); - gsl_matrix_set_zero (Svar); - gsl_vector_set_zero (ns); + gsl_matrix_set_zero(S); + gsl_matrix_set_zero(Svar); + gsl_vector_set_zero(ns); // Compute the kinship matrix G for multiple categories; these // matrices are not centered, for convienence of Jacknife sampling. - if (!file_bfile.empty() ) { - file_str=file_bfile+".bed"; - if (mapRS2wA.size()==0) { - if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, - mapRS2cat, snpInfo, W, K, ns)==false) { - error=true; + if (!file_bfile.empty()) { + file_str = file_bfile + ".bed"; + if (mapRS2wA.size() == 0) { + if (PlinkKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, + mapRS2cat, snpInfo, W, K, ns) == false) { + error = true; } } else { - if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, - mapRS2cat, snpInfo, W, A, ns)==false) { - error=true; + if (PlinkKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, + mapRS2cat, snpInfo, W, A, ns) == false) { + error = true; } } } else if (!file_geno.empty()) { - file_str=file_geno; - if (mapRS2wA.size()==0) { - if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp, - mapRS2wK, mapRS2cat, snpInfo, W, K, ns)==false) { - error=true; + file_str = file_geno; + if (mapRS2wA.size() == 0) { + if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, + mapRS2cat, snpInfo, W, K, ns) == false) { + error = true; } } else { - if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp, - mapRS2wA, mapRS2cat, snpInfo, W, A, ns)==false) { - error=true; + if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, + mapRS2cat, snpInfo, W, A, ns) == false) { + error = true; } } - } else if (!file_mbfile.empty() ){ - if (mapRS2wA.size()==0) { - if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp, - mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) { - error=true; + } else if (!file_mbfile.empty()) { + if (mapRS2wA.size() == 0) { + if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp, + mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) { + error = true; } } else { - if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp, - mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) { - error=true; + if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp, + mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) { + error = true; } } } else if (!file_mgeno.empty()) { - if (mapRS2wA.size()==0) { - if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp, - mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) { - error=true; + if (mapRS2wA.size() == 0) { + if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp, + mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) { + error = true; } } else { - if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp, - mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) { - error=true; + if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp, + mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) { + error = true; } } } - if (mapRS2wA.size()==0) { - gsl_matrix_memcpy (A, K); + if (mapRS2wA.size() == 0) { + gsl_matrix_memcpy(A, K); } // Center and scale every kinship matrix inside G. - for (size_t i=0; i<n_vc; i++) { - gsl_matrix_view Ksub=gsl_matrix_submatrix(K,0,i*ni_test,ni_test,ni_test); + for (size_t i = 0; i < n_vc; i++) { + gsl_matrix_view Ksub = + gsl_matrix_submatrix(K, 0, i * ni_test, ni_test, ni_test); CenterMatrix(&Ksub.matrix); ScaleMatrix(&Ksub.matrix); - gsl_matrix_view Asub=gsl_matrix_submatrix(A,0,i*ni_test,ni_test,ni_test); + gsl_matrix_view Asub = + gsl_matrix_submatrix(A, 0, i * ni_test, ni_test, ni_test); CenterMatrix(&Asub.matrix); ScaleMatrix(&Asub.matrix); } // Cased on G, compute S. - compAKtoS (A, K, W->size2, S); + compAKtoS(A, K, W->size2, S); // Compute Svar and update S with Jacknife. - JackknifeAKtoS (W, A, K, S, Svar); + JackknifeAKtoS(W, A, K, S, Svar); return; } -void PARAM::WriteVector (const gsl_vector *q, const gsl_vector *s, - const size_t n_total, const string suffix) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+=suffix; - file_str+=".txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } +void PARAM::WriteVector(const gsl_vector *q, const gsl_vector *s, + const size_t n_total, const string suffix) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += suffix; + file_str += ".txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } - outfile.precision(10); + outfile.precision(10); - for (size_t i=0; i<q->size; ++i) { - outfile<<gsl_vector_get (q, i)<<endl; - } + for (size_t i = 0; i < q->size; ++i) { + outfile << gsl_vector_get(q, i) << endl; + } - for (size_t i=0; i<s->size; ++i) { - outfile<<gsl_vector_get (s, i)<<endl; - } + for (size_t i = 0; i < s->size; ++i) { + outfile << gsl_vector_get(s, i) << endl; + } - outfile<<n_total<<endl; + outfile << n_total << endl; - outfile.close(); - outfile.clear(); - return; + outfile.close(); + outfile.clear(); + return; } -void PARAM::WriteVar (const string suffix) { +void PARAM::WriteVar(const string suffix) { string file_str, rs; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+=suffix; - file_str+=".txt.gz"; - - ogzstream outfile (file_str.c_str(), ogzstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - outfile.precision(10); - - if (mindicator_snp.size()!=0) { - for (size_t t=0; t<mindicator_snp.size(); t++) { - indicator_snp=mindicator_snp[t]; - for (size_t i=0; i<indicator_snp.size(); i++) { - if (indicator_snp[i]==0) {continue;} - rs=snpInfo[i].rs_number; - outfile<<rs<<endl; - } - } - } else { - for (size_t i=0; i<indicator_snp.size(); i++) { - if (indicator_snp[i]==0) {continue;} - rs=snpInfo[i].rs_number; - outfile<<rs<<endl; - } - } - - outfile.close(); - outfile.clear(); - return; -} + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += suffix; + file_str += ".txt.gz"; + + ogzstream outfile(file_str.c_str(), ogzstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + outfile.precision(10); + + if (mindicator_snp.size() != 0) { + for (size_t t = 0; t < mindicator_snp.size(); t++) { + indicator_snp = mindicator_snp[t]; + for (size_t i = 0; i < indicator_snp.size(); i++) { + if (indicator_snp[i] == 0) { + continue; + } + rs = snpInfo[i].rs_number; + outfile << rs << endl; + } + } + } else { + for (size_t i = 0; i < indicator_snp.size(); i++) { + if (indicator_snp[i] == 0) { + continue; + } + rs = snpInfo[i].rs_number; + outfile << rs << endl; + } + } -void PARAM::WriteMatrix (const gsl_matrix *matrix_U, const string suffix) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+=suffix; - file_str+=".txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - outfile.precision(10); - - for (size_t i=0; i<matrix_U->size1; ++i) { - for (size_t j=0; j<matrix_U->size2; ++j) { - outfile<<gsl_matrix_get (matrix_U, i, j)<<"\t"; - } - outfile<<endl; - } - - outfile.close(); - outfile.clear(); - return; + outfile.close(); + outfile.clear(); + return; } -void PARAM::WriteVector (const gsl_vector *vector_D, const string suffix) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+=suffix; - file_str+=".txt"; +void PARAM::WriteMatrix(const gsl_matrix *matrix_U, const string suffix) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += suffix; + file_str += ".txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + outfile.precision(10); - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } + for (size_t i = 0; i < matrix_U->size1; ++i) { + for (size_t j = 0; j < matrix_U->size2; ++j) { + outfile << gsl_matrix_get(matrix_U, i, j) << "\t"; + } + outfile << endl; + } + + outfile.close(); + outfile.clear(); + return; +} + +void PARAM::WriteVector(const gsl_vector *vector_D, const string suffix) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += suffix; + file_str += ".txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } - outfile.precision(10); + outfile.precision(10); - for (size_t i=0; i<vector_D->size; ++i) { - outfile<<gsl_vector_get (vector_D, i)<<endl; - } + for (size_t i = 0; i < vector_D->size; ++i) { + outfile << gsl_vector_get(vector_D, i) << endl; + } - outfile.close(); - outfile.clear(); - return; + outfile.close(); + outfile.clear(); + return; } -void PARAM::CheckCvt () { - if (indicator_cvt.size()==0) {return;} - - size_t ci_test=0; - - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;} - for (size_t j=0; j<n_cvt; ++j) { - gsl_matrix_set (W, ci_test, j, (cvt)[i][j]); - } - ci_test++; - } - - size_t flag_ipt=0; - double v_min, v_max; - set<size_t> set_remove; - - // Check if any columns is an intercept. - for (size_t i=0; i<W->size2; i++) { - gsl_vector_view w_col=gsl_matrix_column (W, i); - gsl_vector_minmax (&w_col.vector, &v_min, &v_max); - if (v_min==v_max) {flag_ipt=1; set_remove.insert (i);} - } - - // Add an intecept term if needed. - if (n_cvt==set_remove.size()) { - indicator_cvt.clear(); - n_cvt=1; - } else if (flag_ipt==0) { - cout<<"no intecept term is found in the cvt file. "<< - "a column of 1s is added."<<endl; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_cvt[i]==0) { - continue; - } - cvt[i].push_back(1.0); - } - - n_cvt++; - } else {} - - gsl_matrix_free(W); - - return; +void PARAM::CheckCvt() { + if (indicator_cvt.size() == 0) { + return; + } + + size_t ci_test = 0; + + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) { + continue; + } + for (size_t j = 0; j < n_cvt; ++j) { + gsl_matrix_set(W, ci_test, j, (cvt)[i][j]); + } + ci_test++; + } + + size_t flag_ipt = 0; + double v_min, v_max; + set<size_t> set_remove; + + // Check if any columns is an intercept. + for (size_t i = 0; i < W->size2; i++) { + gsl_vector_view w_col = gsl_matrix_column(W, i); + gsl_vector_minmax(&w_col.vector, &v_min, &v_max); + if (v_min == v_max) { + flag_ipt = 1; + set_remove.insert(i); + } + } + + // Add an intecept term if needed. + if (n_cvt == set_remove.size()) { + indicator_cvt.clear(); + n_cvt = 1; + } else if (flag_ipt == 0) { + cout << "no intecept term is found in the cvt file. " + << "a column of 1s is added." << endl; + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) { + continue; + } + cvt[i].push_back(1.0); + } + + n_cvt++; + } else { + } + + gsl_matrix_free(W); + + return; } // Post-process phentoypes and covariates. -void PARAM::ProcessCvtPhen () { - - // Convert indicator_pheno to indicator_idv. - int k=1; - indicator_idv.clear(); - for (size_t i=0; i<indicator_pheno.size(); i++) { - k=1; - for (size_t j=0; j<indicator_pheno[i].size(); j++) { - if (indicator_pheno[i][j]==0) {k=0;} - } - indicator_idv.push_back(k); - } - - // Remove individuals with missing covariates. - if ((indicator_cvt).size()!=0) { - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_idv[i]*=indicator_cvt[i]; - } - } - - // Remove individuals with missing gxe variables. - if ((indicator_gxe).size()!=0) { - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_idv[i]*=indicator_gxe[i]; - } - } - - // Remove individuals with missing residual weights. - if ((indicator_weight).size()!=0) { - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_idv[i]*=indicator_weight[i]; - } - } - - // Obtain ni_test. - ni_test=0; - for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) { - if (indicator_idv[i]==0) {continue;} - ni_test++; - } - - // If subsample number is set, perform a random sub-sampling - // to determine the subsampled ids. - if (ni_subsample!=0) { - if (ni_test<ni_subsample) { - cout<<"error! number of subsamples is less than number of"<< - "analyzed individuals. "<<endl; - } else { - - // Set up random environment. - gsl_rng_env_setup(); - gsl_rng *gsl_r; - const gsl_rng_type * gslType; - gslType = gsl_rng_default; - if (randseed<0) { - time_t rawtime; - time (&rawtime); - tm * ptm = gmtime (&rawtime); - - randseed = (unsigned) - (ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - - // From ni_test, sub-sample ni_subsample. - vector<size_t> a, b; - for (size_t i=0; i<ni_subsample; i++) { - a.push_back(0); - } - for (size_t i=0; i<ni_test; i++) { - b.push_back(i); - } - - gsl_ran_choose (gsl_r, static_cast<void*>(&a[0]), ni_subsample, - static_cast<void*>(&b[0]),ni_test,sizeof (size_t)); - - // Re-set indicator_idv and ni_test. - int j=0; - for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) { - if (indicator_idv[i]==0) {continue;} - if(find(a.begin(), a.end(), j) == a.end()) { - indicator_idv[i]=0; - } - j++; - } - ni_test=ni_subsample; - } - } - - // Check ni_test. - if (ni_test==0 && a_mode!=15) { - error=true; - cout<<"error! number of analyzed individuals equals 0. "<<endl; - return; - } - - // Check covariates to see if they are correlated with each - // other, and to see if the intercept term is included. - // After getting ni_test. - // Add or remove covariates. - if (indicator_cvt.size()!=0) { - CheckCvt(); - } else { - vector<double> cvt_row; - cvt_row.push_back(1); - - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_cvt.push_back(1); - cvt.push_back(cvt_row); - } - } - - return; +void PARAM::ProcessCvtPhen() { + + // Convert indicator_pheno to indicator_idv. + int k = 1; + indicator_idv.clear(); + for (size_t i = 0; i < indicator_pheno.size(); i++) { + k = 1; + for (size_t j = 0; j < indicator_pheno[i].size(); j++) { + if (indicator_pheno[i][j] == 0) { + k = 0; + } + } + indicator_idv.push_back(k); + } + + // Remove individuals with missing covariates. + if ((indicator_cvt).size() != 0) { + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_idv[i] *= indicator_cvt[i]; + } + } + + // Remove individuals with missing gxe variables. + if ((indicator_gxe).size() != 0) { + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_idv[i] *= indicator_gxe[i]; + } + } + + // Remove individuals with missing residual weights. + if ((indicator_weight).size() != 0) { + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_idv[i] *= indicator_weight[i]; + } + } + + // Obtain ni_test. + ni_test = 0; + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ni_test++; + } + + // If subsample number is set, perform a random sub-sampling + // to determine the subsampled ids. + if (ni_subsample != 0) { + if (ni_test < ni_subsample) { + cout << "error! number of subsamples is less than number of" + << "analyzed individuals. " << endl; + } else { + + // Set up random environment. + gsl_rng_env_setup(); + gsl_rng *gsl_r; + const gsl_rng_type *gslType; + gslType = gsl_rng_default; + if (randseed < 0) { + time_t rawtime; + time(&rawtime); + tm *ptm = gmtime(&rawtime); + + randseed = (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + + ptm->tm_sec); + } + gsl_r = gsl_rng_alloc(gslType); + gsl_rng_set(gsl_r, randseed); + + // From ni_test, sub-sample ni_subsample. + vector<size_t> a, b; + for (size_t i = 0; i < ni_subsample; i++) { + a.push_back(0); + } + for (size_t i = 0; i < ni_test; i++) { + b.push_back(i); + } + + gsl_ran_choose(gsl_r, static_cast<void *>(&a[0]), ni_subsample, + static_cast<void *>(&b[0]), ni_test, sizeof(size_t)); + + // Re-set indicator_idv and ni_test. + int j = 0; + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + if (find(a.begin(), a.end(), j) == a.end()) { + indicator_idv[i] = 0; + } + j++; + } + ni_test = ni_subsample; + } + } + + // Check ni_test. + if (ni_test == 0 && a_mode != 15) { + error = true; + cout << "error! number of analyzed individuals equals 0. " << endl; + return; + } + + // Check covariates to see if they are correlated with each + // other, and to see if the intercept term is included. + // After getting ni_test. + // Add or remove covariates. + if (indicator_cvt.size() != 0) { + CheckCvt(); + } else { + vector<double> cvt_row; + cvt_row.push_back(1); + + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_cvt.push_back(1); + cvt.push_back(cvt_row); + } + } + + return; } -void PARAM::CopyCvt (gsl_matrix *W) { - size_t ci_test=0; +void PARAM::CopyCvt(gsl_matrix *W) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;} - for (size_t j=0; j<n_cvt; ++j) { - gsl_matrix_set (W, ci_test, j, (cvt)[i][j]); - } - ci_test++; - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) { + continue; + } + for (size_t j = 0; j < n_cvt; ++j) { + gsl_matrix_set(W, ci_test, j, (cvt)[i][j]); + } + ci_test++; + } - return; + return; } -void PARAM::CopyGxe (gsl_vector *env) { - size_t ci_test=0; +void PARAM::CopyGxe(gsl_vector *env) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_gxe[i]==0) {continue;} - gsl_vector_set (env, ci_test, gxe[i]); - ci_test++; - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_gxe[i] == 0) { + continue; + } + gsl_vector_set(env, ci_test, gxe[i]); + ci_test++; + } - return; + return; } -void PARAM::CopyWeight (gsl_vector *w) { - size_t ci_test=0; +void PARAM::CopyWeight(gsl_vector *w) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_weight[i]==0) {continue;} - gsl_vector_set (w, ci_test, weight[i]); - ci_test++; - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_weight[i] == 0) { + continue; + } + gsl_vector_set(w, ci_test, weight[i]); + ci_test++; + } - return; + return; } // If flag=0, then use indicator_idv to load W and Y; // else, use indicator_cvt to load them. -void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag) { - size_t ci_test=0; +void PARAM::CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (flag==0) { - if (indicator_idv[i]==0) {continue;} - } else { - if (indicator_cvt[i]==0) {continue;} - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (flag == 0) { + if (indicator_idv[i] == 0) { + continue; + } + } else { + if (indicator_cvt[i] == 0) { + continue; + } + } - gsl_vector_set (y, ci_test, (pheno)[i][0]); + gsl_vector_set(y, ci_test, (pheno)[i][0]); - for (size_t j=0; j<n_cvt; ++j) { - gsl_matrix_set (W, ci_test, j, (cvt)[i][j]); - } - ci_test++; - } + for (size_t j = 0; j < n_cvt; ++j) { + gsl_matrix_set(W, ci_test, j, (cvt)[i][j]); + } + ci_test++; + } - return; + return; } // If flag=0, then use indicator_idv to load W and Y; // else, use indicator_cvt to load them. -void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag) { - size_t ci_test=0; - - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (flag==0) { - if (indicator_idv[i]==0) {continue;} - } else { - if (indicator_cvt[i]==0) {continue;} - } - - for (size_t j=0; j<n_ph; ++j) { - gsl_matrix_set (Y, ci_test, j, (pheno)[i][j]); - } - for (size_t j=0; j<n_cvt; ++j) { - gsl_matrix_set (W, ci_test, j, (cvt)[i][j]); - } - - ci_test++; - } - - return; +void PARAM::CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag) { + size_t ci_test = 0; + + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (flag == 0) { + if (indicator_idv[i] == 0) { + continue; + } + } else { + if (indicator_cvt[i] == 0) { + continue; + } + } + + for (size_t j = 0; j < n_ph; ++j) { + gsl_matrix_set(Y, ci_test, j, (pheno)[i][j]); + } + for (size_t j = 0; j < n_cvt; ++j) { + gsl_matrix_set(W, ci_test, j, (cvt)[i][j]); + } + + ci_test++; + } + + return; } -void PARAM::CopyRead (gsl_vector *log_N) { - size_t ci_test=0; +void PARAM::CopyRead(gsl_vector *log_N) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0) {continue;} - gsl_vector_set (log_N, ci_test, log(vec_read[i]) ); - ci_test++; - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + gsl_vector_set(log_N, ci_test, log(vec_read[i])); + ci_test++; + } - return; + return; } -void PARAM::ObtainWeight (const set<string> &setSnps_beta, - map<string, double> &mapRS2wK) { +void PARAM::ObtainWeight(const set<string> &setSnps_beta, + map<string, double> &mapRS2wK) { mapRS2wK.clear(); vector<double> wsum, wcount; - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { wsum.push_back(0.0); wcount.push_back(0.0); } string rs; - if (msnpInfo.size()==0) { - for (size_t i=0; i<snpInfo.size(); i++) { - if (indicator_snp[i]==0) {continue;} - - rs=snpInfo[i].rs_number; - if ( (setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) && - (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) && - (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) && - (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) { - if (mapRS2wsnp.size()!=0) { - mapRS2wK[rs]=mapRS2wsnp[rs]; - if (mapRS2cat.size()==0) { - wsum[0]+=mapRS2wsnp[rs]; - } else { - wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs]; - } - wcount[0]++; - } else { - mapRS2wK[rs]=1; - } + if (msnpInfo.size() == 0) { + for (size_t i = 0; i < snpInfo.size(); i++) { + if (indicator_snp[i] == 0) { + continue; } + rs = snpInfo[i].rs_number; + if ((setSnps_beta.size() == 0 || setSnps_beta.count(rs) != 0) && + (mapRS2wsnp.size() == 0 || mapRS2wsnp.count(rs) != 0) && + (mapRS2wcat.size() == 0 || mapRS2wcat.count(rs) != 0) && + (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) { + if (mapRS2wsnp.size() != 0) { + mapRS2wK[rs] = mapRS2wsnp[rs]; + if (mapRS2cat.size() == 0) { + wsum[0] += mapRS2wsnp[rs]; + } else { + wsum[mapRS2cat[rs]] += mapRS2wsnp[rs]; + } + wcount[0]++; + } else { + mapRS2wK[rs] = 1; + } + } } } else { - for (size_t t=0; t<msnpInfo.size(); t++) { - snpInfo=msnpInfo[t]; - indicator_snp=mindicator_snp[t]; - - for (size_t i=0; i<snpInfo.size(); i++) { - if (indicator_snp[i]==0) {continue;} - - rs=snpInfo[i].rs_number; - if ((setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) && - (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) && - (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) && - (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) { - if (mapRS2wsnp.size()!=0) { - mapRS2wK[rs]=mapRS2wsnp[rs]; - if (mapRS2cat.size()==0) { - wsum[0]+=mapRS2wsnp[rs]; - } else { - wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs]; - } - wcount[0]++; - } else { - mapRS2wK[rs]=1; - } - } - } - } - } - - if (mapRS2wsnp.size()!=0) { - for (size_t i=0; i<n_vc; i++) { - wsum[i]/=wcount[i]; - } - - for (map<string, double>::iterator it=mapRS2wK.begin(); - it!=mapRS2wK.end(); - ++it) { - if (mapRS2cat.size()==0) { - it->second/=wsum[0]; + for (size_t t = 0; t < msnpInfo.size(); t++) { + snpInfo = msnpInfo[t]; + indicator_snp = mindicator_snp[t]; + + for (size_t i = 0; i < snpInfo.size(); i++) { + if (indicator_snp[i] == 0) { + continue; + } + + rs = snpInfo[i].rs_number; + if ((setSnps_beta.size() == 0 || setSnps_beta.count(rs) != 0) && + (mapRS2wsnp.size() == 0 || mapRS2wsnp.count(rs) != 0) && + (mapRS2wcat.size() == 0 || mapRS2wcat.count(rs) != 0) && + (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) { + if (mapRS2wsnp.size() != 0) { + mapRS2wK[rs] = mapRS2wsnp[rs]; + if (mapRS2cat.size() == 0) { + wsum[0] += mapRS2wsnp[rs]; + } else { + wsum[mapRS2cat[rs]] += mapRS2wsnp[rs]; + } + wcount[0]++; + } else { + mapRS2wK[rs] = 1; + } + } + } + } + } + + if (mapRS2wsnp.size() != 0) { + for (size_t i = 0; i < n_vc; i++) { + wsum[i] /= wcount[i]; + } + + for (map<string, double>::iterator it = mapRS2wK.begin(); + it != mapRS2wK.end(); ++it) { + if (mapRS2cat.size() == 0) { + it->second /= wsum[0]; } else { - it->second/=wsum[mapRS2cat[it->first]]; + it->second /= wsum[mapRS2cat[it->first]]; } } } @@ -2201,54 +2284,52 @@ void PARAM::ObtainWeight (const set<string> &setSnps_beta, // If pve_flag=0 then do not change pve; pve_flag==1, then change pve // to 0 if pve < 0 and pve to 1 if pve > 1. -void PARAM::UpdateWeight (const size_t pve_flag, - const map<string, double> &mapRS2wK, - const size_t ni_test, const gsl_vector *ns, - map<string, double> &mapRS2wA) { +void PARAM::UpdateWeight(const size_t pve_flag, + const map<string, double> &mapRS2wK, + const size_t ni_test, const gsl_vector *ns, + map<string, double> &mapRS2wA) { double d; vector<double> wsum, wcount; - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { wsum.push_back(0.0); wcount.push_back(0.0); } - for (map<string, double>::const_iterator it=mapRS2wK.begin(); - it!=mapRS2wK.end(); - ++it) { - d=1; - for (size_t i=0; i<n_vc; i++) { - if (v_pve[i]>=1 && pve_flag==1) { - d+=(double)ni_test/gsl_vector_get(ns, i)*mapRS2wcat[it->first][i]; - } else if (v_pve[i]<=0 && pve_flag==1) { - d+=0; + for (map<string, double>::const_iterator it = mapRS2wK.begin(); + it != mapRS2wK.end(); ++it) { + d = 1; + for (size_t i = 0; i < n_vc; i++) { + if (v_pve[i] >= 1 && pve_flag == 1) { + d += (double)ni_test / gsl_vector_get(ns, i) * mapRS2wcat[it->first][i]; + } else if (v_pve[i] <= 0 && pve_flag == 1) { + d += 0; } else { - d+=(double)ni_test/gsl_vector_get(ns, i)* - mapRS2wcat[it->first][i]*v_pve[i]; + d += (double)ni_test / gsl_vector_get(ns, i) * + mapRS2wcat[it->first][i] * v_pve[i]; } } - mapRS2wA[it->first]=1/(d*d); + mapRS2wA[it->first] = 1 / (d * d); - if (mapRS2cat.size()==0) { - wsum[0]+=mapRS2wA[it->first]; + if (mapRS2cat.size() == 0) { + wsum[0] += mapRS2wA[it->first]; wcount[0]++; } else { - wsum[mapRS2cat[it->first]]+=mapRS2wA[it->first]; + wsum[mapRS2cat[it->first]] += mapRS2wA[it->first]; wcount[mapRS2cat[it->first]]++; } } - for (size_t i=0; i<n_vc; i++) { - wsum[i]/=wcount[i]; + for (size_t i = 0; i < n_vc; i++) { + wsum[i] /= wcount[i]; } - for (map<string, double>::iterator it=mapRS2wA.begin(); - it!=mapRS2wA.end(); - ++it) { - if (mapRS2cat.size()==0) { - it->second/=wsum[0]; + for (map<string, double>::iterator it = mapRS2wA.begin(); + it != mapRS2wA.end(); ++it) { + if (mapRS2cat.size() == 0) { + it->second /= wsum[0]; } else { - it->second/=wsum[mapRS2cat[it->first]]; + it->second /= wsum[mapRS2cat[it->first]]; } } return; @@ -2256,61 +2337,64 @@ void PARAM::UpdateWeight (const size_t pve_flag, // This function updates indicator_snp, and save z-scores and other // values into vectors. -void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA, - const map<string, string> &mapRS2A1, - const map<string, double> &mapRS2z, - gsl_vector *w, gsl_vector *z, - vector<size_t> &vec_cat) { - gsl_vector_set_zero (w); - gsl_vector_set_zero (z); +void PARAM::UpdateSNPnZ(const map<string, double> &mapRS2wA, + const map<string, string> &mapRS2A1, + const map<string, double> &mapRS2z, gsl_vector *w, + gsl_vector *z, vector<size_t> &vec_cat) { + gsl_vector_set_zero(w); + gsl_vector_set_zero(z); vec_cat.clear(); string rs, a1; - size_t c=0; - if (msnpInfo.size()==0) { - for (size_t i=0; i<snpInfo.size(); i++) { - if (indicator_snp[i]==0) {continue;} - - rs=snpInfo[i].rs_number; - a1=snpInfo[i].a_minor; - - if (mapRS2wA.count(rs)!=0) { - if (a1==mapRS2A1.at(rs)) { - gsl_vector_set (z, c, mapRS2z.at(rs) ); - } else { - gsl_vector_set (z, c, -1*mapRS2z.at(rs) ); - } - vec_cat.push_back(mapRS2cat.at(rs) ); - gsl_vector_set (w, c, mapRS2wA.at(rs) ); - - c++; - } else { - indicator_snp[i]=0; + size_t c = 0; + if (msnpInfo.size() == 0) { + for (size_t i = 0; i < snpInfo.size(); i++) { + if (indicator_snp[i] == 0) { + continue; } - } - } else { - for (size_t t=0; t<msnpInfo.size(); t++) { - snpInfo=msnpInfo[t]; - for (size_t i=0; i<snpInfo.size(); i++) { - if (mindicator_snp[t][i]==0) {continue;} + rs = snpInfo[i].rs_number; + a1 = snpInfo[i].a_minor; - rs=snpInfo[i].rs_number; - a1=snpInfo[i].a_minor; + if (mapRS2wA.count(rs) != 0) { + if (a1 == mapRS2A1.at(rs)) { + gsl_vector_set(z, c, mapRS2z.at(rs)); + } else { + gsl_vector_set(z, c, -1 * mapRS2z.at(rs)); + } + vec_cat.push_back(mapRS2cat.at(rs)); + gsl_vector_set(w, c, mapRS2wA.at(rs)); - if (mapRS2wA.count(rs)!=0) { - if (a1==mapRS2A1.at(rs)) { - gsl_vector_set (z, c, mapRS2z.at(rs) ); - } else { - gsl_vector_set (z, c, -1*mapRS2z.at(rs) ); - } - vec_cat.push_back(mapRS2cat.at(rs) ); - gsl_vector_set (w, c, mapRS2wA.at(rs) ); + c++; + } else { + indicator_snp[i] = 0; + } + } + } else { + for (size_t t = 0; t < msnpInfo.size(); t++) { + snpInfo = msnpInfo[t]; + + for (size_t i = 0; i < snpInfo.size(); i++) { + if (mindicator_snp[t][i] == 0) { + continue; + } + + rs = snpInfo[i].rs_number; + a1 = snpInfo[i].a_minor; + + if (mapRS2wA.count(rs) != 0) { + if (a1 == mapRS2A1.at(rs)) { + gsl_vector_set(z, c, mapRS2z.at(rs)); + } else { + gsl_vector_set(z, c, -1 * mapRS2z.at(rs)); + } + vec_cat.push_back(mapRS2cat.at(rs)); + gsl_vector_set(w, c, mapRS2wA.at(rs)); - c++; - } else { - mindicator_snp[t][i]=0; - } + c++; + } else { + mindicator_snp[t][i] = 0; + } } } } @@ -2320,30 +2404,34 @@ void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA, // This function updates indicator_snp, and save z-scores and other // values into vectors. -void PARAM::UpdateSNP (const map<string, double> &mapRS2wA) { +void PARAM::UpdateSNP(const map<string, double> &mapRS2wA) { string rs; - if (msnpInfo.size()==0) { - for (size_t i=0; i<snpInfo.size(); i++) { - if (indicator_snp[i]==0) {continue;} + if (msnpInfo.size() == 0) { + for (size_t i = 0; i < snpInfo.size(); i++) { + if (indicator_snp[i] == 0) { + continue; + } - rs=snpInfo[i].rs_number; + rs = snpInfo[i].rs_number; - if (mapRS2wA.count(rs)==0) { - indicator_snp[i]=0; + if (mapRS2wA.count(rs) == 0) { + indicator_snp[i] = 0; } } } else { - for (size_t t=0; t<msnpInfo.size(); t++) { - snpInfo=msnpInfo[t]; + for (size_t t = 0; t < msnpInfo.size(); t++) { + snpInfo = msnpInfo[t]; - for (size_t i=0; i<mindicator_snp[t].size(); i++) { - if (mindicator_snp[t][i]==0) {continue;} + for (size_t i = 0; i < mindicator_snp[t].size(); i++) { + if (mindicator_snp[t][i] == 0) { + continue; + } - rs=snpInfo[i].rs_number; + rs = snpInfo[i].rs_number; - if (mapRS2wA.count(rs)==0) { - mindicator_snp[t][i]=0; - } + if (mapRS2wA.count(rs) == 0) { + mindicator_snp[t][i] = 0; + } } } } diff --git a/src/param.h b/src/param.h index f58da53..33e2431 100644 --- a/src/param.h +++ b/src/param.h @@ -19,340 +19,336 @@ #ifndef __PARAM_H__ #define __PARAM_H__ -#include <vector> +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" #include <map> #include <set> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" +#include <vector> using namespace std; class SNPINFO { public: - string chr; - string rs_number; - double cM; - long int base_position; - string a_minor; - string a_major; - size_t n_miss; - double missingness; - double maf; - size_t n_idv; // Number of non-missing individuals. - size_t n_nb; // Number of neighbours on the right hand side. - size_t file_position; // SNP location in file. + string chr; + string rs_number; + double cM; + long int base_position; + string a_minor; + string a_major; + size_t n_miss; + double missingness; + double maf; + size_t n_idv; // Number of non-missing individuals. + size_t n_nb; // Number of neighbours on the right hand side. + size_t file_position; // SNP location in file. }; // Results for LMM. class SUMSTAT { public: - double beta; // REML estimator for beta. - double se; // SE for beta. - double lambda_remle; // REML estimator for lambda. - double lambda_mle; // MLE estimator for lambda. - double p_wald; // p value from a Wald test. - double p_lrt; // p value from a likelihood ratio test. - double p_score; // p value from a score test. + double beta; // REML estimator for beta. + double se; // SE for beta. + double lambda_remle; // REML estimator for lambda. + double lambda_mle; // MLE estimator for lambda. + double p_wald; // p value from a Wald test. + double p_lrt; // p value from a likelihood ratio test. + double p_score; // p value from a score test. }; // Results for mvLMM. class MPHSUMSTAT { public: - vector<double> v_beta; // REML estimator for beta. - double p_wald; // p value from a Wald test. - double p_lrt; // p value from a likelihood ratio test. - double p_score; // p value from a score test. - vector<double> v_Vg; // Estimator for Vg, right half. - vector<double> v_Ve; // Estimator for Ve, right half. - vector<double> v_Vbeta; // Estimator for Vbeta, right half. + vector<double> v_beta; // REML estimator for beta. + double p_wald; // p value from a Wald test. + double p_lrt; // p value from a likelihood ratio test. + double p_score; // p value from a score test. + vector<double> v_Vg; // Estimator for Vg, right half. + vector<double> v_Ve; // Estimator for Ve, right half. + vector<double> v_Vbeta; // Estimator for Vbeta, right half. }; // Hyper-parameters for BSLMM. class HYPBSLMM { public: - double h; - double pve; - double rho; - double pge; - double logp; - size_t n_gamma; + double h; + double pve; + double rho; + double pge; + double logp; + size_t n_gamma; }; // Header class. class HEADER { public: - size_t rs_col; - size_t chr_col; - size_t pos_col; - size_t cm_col; - size_t a1_col; - size_t a0_col; - size_t z_col; - size_t beta_col; - size_t sebeta_col; - size_t chisq_col; - size_t p_col; - size_t n_col; - size_t nmis_col; - size_t nobs_col; - size_t ncase_col; - size_t ncontrol_col; - size_t af_col; - size_t var_col; - size_t ws_col; - size_t cor_col; - size_t coln; // Number of columns. - set<size_t> catc_col; - set<size_t> catd_col; + size_t rs_col; + size_t chr_col; + size_t pos_col; + size_t cm_col; + size_t a1_col; + size_t a0_col; + size_t z_col; + size_t beta_col; + size_t sebeta_col; + size_t chisq_col; + size_t p_col; + size_t n_col; + size_t nmis_col; + size_t nobs_col; + size_t ncase_col; + size_t ncontrol_col; + size_t af_col; + size_t var_col; + size_t ws_col; + size_t cor_col; + size_t coln; // Number of columns. + set<size_t> catc_col; + set<size_t> catd_col; }; class PARAM { public: - // IO-related parameters. - bool mode_silence; - int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests - int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; - vector<size_t> p_column; // Which phenotype column needs analysis. - size_t d_pace; // Display pace - - string file_bfile, file_mbfile; - string file_geno, file_mgeno; - string file_pheno; - string file_anno; // Optional. - string file_gxe; // Optional. - string file_cvt; // Optional. - string file_cat, file_mcat; - string file_catc, file_mcatc; - string file_var; - string file_beta; - string file_cor; - string file_kin, file_mk; - string file_ku, file_kd; - string file_study, file_mstudy; - string file_ref, file_mref; - string file_weight, file_wsnp, file_wcat; - string file_out; - string file_bf, file_hyp; - string path_out; - - string file_epm; // Estimated parameter file. - string file_ebv; // Estimated breeding value file. - string file_log; // Log file containing mean estimate. - string file_read; // File containing total number of reads. - string file_gene; // Gene expression file. - string file_snps; // File containing analyzed SNPs or genes. - - // WJA added. - string file_oxford; - - // QC-related parameters. - double miss_level; - double maf_level; - double hwe_level; - double r2_level; - - // LMM-related parameters. - double l_min; - double l_max; - size_t n_region; - double l_mle_null, l_remle_null; - double logl_mle_H0, logl_remle_H0; - double pve_null, pve_se_null, pve_total, se_pve_total; - double vg_remle_null, ve_remle_null, vg_mle_null, ve_mle_null; - vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null; - vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null; - vector<double> VVe_mle_null; - vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null; - vector<double> se_beta_mle_null; - double p_nr; - double em_prec, nr_prec; - size_t em_iter, nr_iter; - size_t crt; - double pheno_mean; // Phenotype mean from BSLMM fitting or prediction. - - // For fitting multiple variance components. - // The first 3 are of size (n_vc), and the next 2 are of size n_vc+1. - bool noconstrain; - vector<double> v_traceG; - vector<double> v_pve; - vector<double> v_se_pve; - - vector<double> v_sigma2; - vector<double> v_se_sigma2; - vector<double> v_enrich; - vector<double> v_se_enrich; - vector<double> v_beta; - vector<double> v_se_beta; - - // BSLMM/MCMC-related parameters. - double h_min, h_max, h_scale; // Priors for h. - double rho_min, rho_max, rho_scale; // Priors for rho. - double logp_min, logp_max, logp_scale; // Priors for log(pi). - size_t h_ngrid, rho_ngrid; - size_t s_min, s_max; // Min & max. number of gammas. - size_t w_step; // # warm up/burn in iter. - size_t s_step; // # sampling iterations. - size_t r_pace; // Record pace. - size_t w_pace; // Write pace. - size_t n_accept; // Number of acceptance. - size_t n_mh; // # MH steps in each iter. - double geo_mean; // Mean of geometric dist. - long int randseed; - double trace_G; - - HYPBSLMM cHyp_initial; - - // VARCOV-related parameters. - double window_cm; - size_t window_bp; - size_t window_ns; - - // vc-related parameters. - size_t n_block; - - // Summary statistics. - bool error; - - // Number of individuals. - size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; - - // Number of observed and missing phenotypes. - size_t np_obs, np_miss; - - // Number of SNPs. - size_t ns_total, ns_test, ns_study, ns_ref; - - size_t ng_total, ng_test; // Number of genes. - size_t ni_control, ni_case; // Number of controls and number of cases. - size_t ni_subsample; // Number of subsampled individuals. - size_t n_cvt; // Number of covariates. - size_t n_cat; // Number of continuous categories. - size_t n_ph; // Number of phenotypes. - size_t n_vc; // Number of variance components - // (including the diagonal matrix). - double time_total; // Record total time. - double time_G; // Time spent on reading files the - // second time and calculate K. - double time_eigen; // Time spent on eigen-decomposition. - double time_UtX; // Time spent on calculating UX and Uy. - double time_UtZ; // Time calculating UtZ for probit BSLMM. - double time_opt; // Time on optimization iterations/MCMC. - double time_Omega; // Time spent on calculating Omega. - double time_hyp; // Time sampling hyperparameters in PMM. - double time_Proposal; // Time spent on constructing the - // proposal distribution (i.e. the - // initial LMM or LM analysis). - - // Data. - // Vector recording all phenotypes (NA replaced with -9). - vector<vector<double> > pheno; - - // Vector recording all covariates (NA replaced with -9). - vector<vector<double> > cvt; - - // Vector recording all covariates (NA replaced with -9). - vector<double> gxe; - - // Vector recording weights for the individuals, which is - // useful for animal breeding studies. - vector<double> weight; - - // Matrix recording when a phenotype is missing for an - // individual; 0 missing, 1 available. - vector<vector<int> > indicator_pheno; - - // Indicator for individuals (phenotypes): 0 missing, 1 - // available for analysis - vector<int> indicator_idv; - - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; - - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector< vector<int> > mindicator_snp; - - // Indicator for covariates: 0 missing, 1 available for - // analysis. - vector<int> indicator_cvt; - - // Indicator for gxe: 0 missing, 1 available for analysis. - vector<int> indicator_gxe; - - // Indicator for weight: 0 missing, 1 available for analysis. - vector<int> indicator_weight; - - // Indicator for estimated breeding value file: 0 missing, 1 - // available for analysis. - vector<int> indicator_bv; - - // Indicator for read file: 0 missing, 1 available for analysis. - vector<int> indicator_read; - vector<double> vec_read; // Total number of reads. - vector<double> vec_bv; // Breeding values. - vector<size_t> est_column; - - map<string, int> mapID2num; // Map small ID to number, 0 to n-1. - map<string, string> mapRS2chr; // Map rs# to chromosome location. - map<string, long int> mapRS2bp; // Map rs# to base position. - map<string, double> mapRS2cM; // Map rs# to cM. - map<string, double> mapRS2est; // Map rs# to parameters. - map<string, size_t> mapRS2cat; // Map rs# to category number. - map<string, vector<double> > mapRS2catc; // Map rs# to cont. cat's. - map<string, double> mapRS2wsnp; // Map rs# to SNP weights. - map<string, vector<double> > mapRS2wcat; // Map rs# to SNP cat weights. - - vector<SNPINFO> snpInfo; // Record SNP information. - vector< vector<SNPINFO> > msnpInfo; // Record SNP information. - set<string> setSnps; // Set of snps for analysis. - - // Constructor. - PARAM(); - - // Functions. - void ReadFiles (); - void CheckParam (); - void CheckData (); - void PrintSummary (); - void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, - const bool calc_K); - void ReadGenotypes (vector<vector<unsigned char> > &Xt, - gsl_matrix *K, const bool calc_K); - void CheckCvt (); - void CopyCvt (gsl_matrix *W); - void CopyA (size_t flag, gsl_matrix *A); - void CopyGxe (gsl_vector *gxe); - void CopyWeight (gsl_vector *w); - void ProcessCvtPhen(); - void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag); - void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag); - void CalcKin (gsl_matrix *matrix_kin); - void CalcS (const map<string, double> &mapRS2wA, - const map<string, double> &mapRS2wK, - const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K, - gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns); - void WriteVector (const gsl_vector *q, const gsl_vector *s, - const size_t n_total, const string suffix); - void WriteVar (const string suffix); - void WriteMatrix (const gsl_matrix *matrix_U, const string suffix); - void WriteVector (const gsl_vector *vector_D, const string suffix); - void CopyRead (gsl_vector *log_N); - void ObtainWeight (const set<string> &setSnps_beta, map<string, - double> &mapRS2wK); - void UpdateWeight (const size_t pve_flag, - const map<string,double> &mapRS2wK, - const size_t ni_test, const gsl_vector *ns, - map<string, double> &mapRS2wA); - void UpdateSNPnZ (const map<string, double> &mapRS2wA, - const map<string, string> &mapRS2A1, - const map<string, double> &mapRS2z, - gsl_vector *w, gsl_vector *z, - vector<size_t> &vec_cat); - void UpdateSNP (const map<string, double> &mapRS2wA); + // IO-related parameters. + bool mode_silence; + int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests + int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; + vector<size_t> p_column; // Which phenotype column needs analysis. + size_t d_pace; // Display pace + + string file_bfile, file_mbfile; + string file_geno, file_mgeno; + string file_pheno; + string file_anno; // Optional. + string file_gxe; // Optional. + string file_cvt; // Optional. + string file_cat, file_mcat; + string file_catc, file_mcatc; + string file_var; + string file_beta; + string file_cor; + string file_kin, file_mk; + string file_ku, file_kd; + string file_study, file_mstudy; + string file_ref, file_mref; + string file_weight, file_wsnp, file_wcat; + string file_out; + string file_bf, file_hyp; + string path_out; + + string file_epm; // Estimated parameter file. + string file_ebv; // Estimated breeding value file. + string file_log; // Log file containing mean estimate. + string file_read; // File containing total number of reads. + string file_gene; // Gene expression file. + string file_snps; // File containing analyzed SNPs or genes. + + // WJA added. + string file_oxford; + + // QC-related parameters. + double miss_level; + double maf_level; + double hwe_level; + double r2_level; + + // LMM-related parameters. + double l_min; + double l_max; + size_t n_region; + double l_mle_null, l_remle_null; + double logl_mle_H0, logl_remle_H0; + double pve_null, pve_se_null, pve_total, se_pve_total; + double vg_remle_null, ve_remle_null, vg_mle_null, ve_mle_null; + vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null; + vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null; + vector<double> VVe_mle_null; + vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null; + vector<double> se_beta_mle_null; + double p_nr; + double em_prec, nr_prec; + size_t em_iter, nr_iter; + size_t crt; + double pheno_mean; // Phenotype mean from BSLMM fitting or prediction. + + // For fitting multiple variance components. + // The first 3 are of size (n_vc), and the next 2 are of size n_vc+1. + bool noconstrain; + vector<double> v_traceG; + vector<double> v_pve; + vector<double> v_se_pve; + + vector<double> v_sigma2; + vector<double> v_se_sigma2; + vector<double> v_enrich; + vector<double> v_se_enrich; + vector<double> v_beta; + vector<double> v_se_beta; + + // BSLMM/MCMC-related parameters. + double h_min, h_max, h_scale; // Priors for h. + double rho_min, rho_max, rho_scale; // Priors for rho. + double logp_min, logp_max, logp_scale; // Priors for log(pi). + size_t h_ngrid, rho_ngrid; + size_t s_min, s_max; // Min & max. number of gammas. + size_t w_step; // # warm up/burn in iter. + size_t s_step; // # sampling iterations. + size_t r_pace; // Record pace. + size_t w_pace; // Write pace. + size_t n_accept; // Number of acceptance. + size_t n_mh; // # MH steps in each iter. + double geo_mean; // Mean of geometric dist. + long int randseed; + double trace_G; + + HYPBSLMM cHyp_initial; + + // VARCOV-related parameters. + double window_cm; + size_t window_bp; + size_t window_ns; + + // vc-related parameters. + size_t n_block; + + // Summary statistics. + bool error; + + // Number of individuals. + size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; + + // Number of observed and missing phenotypes. + size_t np_obs, np_miss; + + // Number of SNPs. + size_t ns_total, ns_test, ns_study, ns_ref; + + size_t ng_total, ng_test; // Number of genes. + size_t ni_control, ni_case; // Number of controls and number of cases. + size_t ni_subsample; // Number of subsampled individuals. + size_t n_cvt; // Number of covariates. + size_t n_cat; // Number of continuous categories. + size_t n_ph; // Number of phenotypes. + size_t n_vc; // Number of variance components + // (including the diagonal matrix). + double time_total; // Record total time. + double time_G; // Time spent on reading files the + // second time and calculate K. + double time_eigen; // Time spent on eigen-decomposition. + double time_UtX; // Time spent on calculating UX and Uy. + double time_UtZ; // Time calculating UtZ for probit BSLMM. + double time_opt; // Time on optimization iterations/MCMC. + double time_Omega; // Time spent on calculating Omega. + double time_hyp; // Time sampling hyperparameters in PMM. + double time_Proposal; // Time spent on constructing the + // proposal distribution (i.e. the + // initial LMM or LM analysis). + + // Data. + // Vector recording all phenotypes (NA replaced with -9). + vector<vector<double>> pheno; + + // Vector recording all covariates (NA replaced with -9). + vector<vector<double>> cvt; + + // Vector recording all covariates (NA replaced with -9). + vector<double> gxe; + + // Vector recording weights for the individuals, which is + // useful for animal breeding studies. + vector<double> weight; + + // Matrix recording when a phenotype is missing for an + // individual; 0 missing, 1 available. + vector<vector<int>> indicator_pheno; + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<vector<int>> mindicator_snp; + + // Indicator for covariates: 0 missing, 1 available for + // analysis. + vector<int> indicator_cvt; + + // Indicator for gxe: 0 missing, 1 available for analysis. + vector<int> indicator_gxe; + + // Indicator for weight: 0 missing, 1 available for analysis. + vector<int> indicator_weight; + + // Indicator for estimated breeding value file: 0 missing, 1 + // available for analysis. + vector<int> indicator_bv; + + // Indicator for read file: 0 missing, 1 available for analysis. + vector<int> indicator_read; + vector<double> vec_read; // Total number of reads. + vector<double> vec_bv; // Breeding values. + vector<size_t> est_column; + + map<string, int> mapID2num; // Map small ID to number, 0 to n-1. + map<string, string> mapRS2chr; // Map rs# to chromosome location. + map<string, long int> mapRS2bp; // Map rs# to base position. + map<string, double> mapRS2cM; // Map rs# to cM. + map<string, double> mapRS2est; // Map rs# to parameters. + map<string, size_t> mapRS2cat; // Map rs# to category number. + map<string, vector<double>> mapRS2catc; // Map rs# to cont. cat's. + map<string, double> mapRS2wsnp; // Map rs# to SNP weights. + map<string, vector<double>> mapRS2wcat; // Map rs# to SNP cat weights. + + vector<SNPINFO> snpInfo; // Record SNP information. + vector<vector<SNPINFO>> msnpInfo; // Record SNP information. + set<string> setSnps; // Set of snps for analysis. + + // Constructor. + PARAM(); + + // Functions. + void ReadFiles(); + void CheckParam(); + void CheckData(); + void PrintSummary(); + void ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); + void ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K, + const bool calc_K); + void CheckCvt(); + void CopyCvt(gsl_matrix *W); + void CopyA(size_t flag, gsl_matrix *A); + void CopyGxe(gsl_vector *gxe); + void CopyWeight(gsl_vector *w); + void ProcessCvtPhen(); + void CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag); + void CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag); + void CalcKin(gsl_matrix *matrix_kin); + void CalcS(const map<string, double> &mapRS2wA, + const map<string, double> &mapRS2wK, const gsl_matrix *W, + gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar, + gsl_vector *ns); + void WriteVector(const gsl_vector *q, const gsl_vector *s, + const size_t n_total, const string suffix); + void WriteVar(const string suffix); + void WriteMatrix(const gsl_matrix *matrix_U, const string suffix); + void WriteVector(const gsl_vector *vector_D, const string suffix); + void CopyRead(gsl_vector *log_N); + void ObtainWeight(const set<string> &setSnps_beta, + map<string, double> &mapRS2wK); + void UpdateWeight(const size_t pve_flag, const map<string, double> &mapRS2wK, + const size_t ni_test, const gsl_vector *ns, + map<string, double> &mapRS2wA); + void UpdateSNPnZ(const map<string, double> &mapRS2wA, + const map<string, string> &mapRS2A1, + const map<string, double> &mapRS2z, gsl_vector *w, + gsl_vector *z, vector<size_t> &vec_cat); + void UpdateSNP(const map<string, double> &mapRS2wA); }; -size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt); +size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt); #endif - diff --git a/src/prdt.cpp b/src/prdt.cpp index b29d150..3e7c004 100644 --- a/src/prdt.cpp +++ b/src/prdt.cpp @@ -16,527 +16,537 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> -#include <sstream> +#include "gsl/gsl_blas.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" +#include <bitset> +#include <cmath> #include <fstream> -#include <string> #include <iomanip> -#include <bitset> -#include <vector> +#include <iostream> +#include <sstream> #include <stdio.h> #include <stdlib.h> -#include <cmath> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" -#include "gsl/gsl_blas.h" +#include <string> +#include <vector> -#include "io.h" -#include "lapack.h" #include "gzstream.h" #include "io.h" -#include "prdt.h" +#include "io.h" +#include "lapack.h" #include "mathfunc.h" +#include "prdt.h" using namespace std; -void PRDT::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; +void PRDT::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; + d_pace = cPar.d_pace; - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_out = cPar.file_out; + path_out = cPar.path_out; - indicator_pheno=cPar.indicator_pheno; - indicator_cvt=cPar.indicator_cvt; - indicator_idv=cPar.indicator_idv; + indicator_pheno = cPar.indicator_pheno; + indicator_cvt = cPar.indicator_cvt; + indicator_idv = cPar.indicator_idv; - snpInfo=cPar.snpInfo; - mapRS2est=cPar.mapRS2est; + snpInfo = cPar.snpInfo; + mapRS2est = cPar.mapRS2est; - time_eigen=0; + time_eigen = 0; - n_ph=cPar.n_ph; - np_obs=cPar.np_obs; - np_miss=cPar.np_miss; - ns_total=cPar.ns_total; - ns_test=0; + n_ph = cPar.n_ph; + np_obs = cPar.np_obs; + np_miss = cPar.np_miss; + ns_total = cPar.ns_total; + ns_test = 0; - return; + return; } -void PRDT::CopyToParam (PARAM &cPar) { - cPar.ns_test=ns_test; - cPar.time_eigen=time_eigen; +void PRDT::CopyToParam(PARAM &cPar) { + cPar.ns_test = ns_test; + cPar.time_eigen = time_eigen; - return; + return; } -void PRDT::WriteFiles (gsl_vector *y_prdt) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+="prdt"; - file_str+=".txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - size_t ci_test=0; - for (size_t i=0; i<indicator_idv.size(); i++) { - if (indicator_idv[i]==1) { - outfile<<"NA"<<endl; - } else { - outfile<<gsl_vector_get (y_prdt, ci_test)<<endl; - ci_test++; - } - } - - outfile.close(); - outfile.clear(); - return; +void PRDT::WriteFiles(gsl_vector *y_prdt) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += "prdt"; + file_str += ".txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + size_t ci_test = 0; + for (size_t i = 0; i < indicator_idv.size(); i++) { + if (indicator_idv[i] == 1) { + outfile << "NA" << endl; + } else { + outfile << gsl_vector_get(y_prdt, ci_test) << endl; + ci_test++; + } + } + + outfile.close(); + outfile.clear(); + return; } -void PRDT::WriteFiles (gsl_matrix *Y_full) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".prdt.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - size_t ci_test=0; - for (size_t i=0; i<indicator_cvt.size(); i++) { - if (indicator_cvt[i]==0) { - outfile<<"NA"<<endl; - } else { - for (size_t j=0; j<Y_full->size2; j++) { - outfile << gsl_matrix_get(Y_full,ci_test,j) << - "\t"; - } - outfile<<endl; - ci_test++; - } - } - - outfile.close(); - outfile.clear(); - return; +void PRDT::WriteFiles(gsl_matrix *Y_full) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".prdt.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + size_t ci_test = 0; + for (size_t i = 0; i < indicator_cvt.size(); i++) { + if (indicator_cvt[i] == 0) { + outfile << "NA" << endl; + } else { + for (size_t j = 0; j < Y_full->size2; j++) { + outfile << gsl_matrix_get(Y_full, ci_test, j) << "\t"; + } + outfile << endl; + ci_test++; + } + } + + outfile.close(); + outfile.clear(); + return; } -void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { - size_t ni_test=u_hat->size, ni_total=G->size1; - - gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test); - gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test); - gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test); - gsl_vector *eval=gsl_vector_alloc (ni_test); - gsl_vector *Utu=gsl_vector_alloc (ni_test); - gsl_vector *w=gsl_vector_alloc (ni_total); - gsl_permutation *pmt=gsl_permutation_alloc (ni_test); - - //center matrix G based on indicator_idv - for (size_t i=0; i<ni_total; i++) { - gsl_vector_set(w, i, indicator_idv[i]); - } - CenterMatrix(G, w); - - //obtain Koo and Kfo - size_t o_i=0, o_j=0; - double d; - for (size_t i=0; i<indicator_idv.size(); i++) { - o_j=0; - for (size_t j=0; j<indicator_idv.size(); j++) { - d=gsl_matrix_get(G, i, j); - if (indicator_idv[i]==1 && indicator_idv[j]==1) { - gsl_matrix_set(Goo, o_i, o_j, d); - } - if (indicator_idv[i]==0 && indicator_idv[j]==1) { - gsl_matrix_set(Gfo, i-o_i, o_j, d); - } - if (indicator_idv[j]==1) {o_j++;} - } - if (indicator_idv[i]==1) {o_i++;} - } - - //matrix operations to get u_prdt - cout<<"Start Eigen-Decomposition..."<<endl; - clock_t time_start=clock(); - EigenDecomp (Goo, U, eval, 0); - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get(eval,i)<1e-10) { - gsl_vector_set(eval, i, 0); - } - } - - time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu); - for (size_t i=0; i<eval->size; i++) { - d=gsl_vector_get(eval, i); - if (d!=0) { - d=gsl_vector_get(Utu, i)/d; - gsl_vector_set(Utu, i, d); - } - } - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval); - gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt); - - // Free matrices. - gsl_matrix_free(Goo); - gsl_matrix_free(Gfo); - gsl_matrix_free(U); - gsl_vector_free(eval); - gsl_vector_free(Utu); - gsl_vector_free(w); - gsl_permutation_free(pmt); - - return; +void PRDT::AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { + size_t ni_test = u_hat->size, ni_total = G->size1; + + gsl_matrix *Goo = gsl_matrix_alloc(ni_test, ni_test); + gsl_matrix *Gfo = gsl_matrix_alloc(ni_total - ni_test, ni_test); + gsl_matrix *U = gsl_matrix_alloc(ni_test, ni_test); + gsl_vector *eval = gsl_vector_alloc(ni_test); + gsl_vector *Utu = gsl_vector_alloc(ni_test); + gsl_vector *w = gsl_vector_alloc(ni_total); + gsl_permutation *pmt = gsl_permutation_alloc(ni_test); + + // center matrix G based on indicator_idv + for (size_t i = 0; i < ni_total; i++) { + gsl_vector_set(w, i, indicator_idv[i]); + } + CenterMatrix(G, w); + + // obtain Koo and Kfo + size_t o_i = 0, o_j = 0; + double d; + for (size_t i = 0; i < indicator_idv.size(); i++) { + o_j = 0; + for (size_t j = 0; j < indicator_idv.size(); j++) { + d = gsl_matrix_get(G, i, j); + if (indicator_idv[i] == 1 && indicator_idv[j] == 1) { + gsl_matrix_set(Goo, o_i, o_j, d); + } + if (indicator_idv[i] == 0 && indicator_idv[j] == 1) { + gsl_matrix_set(Gfo, i - o_i, o_j, d); + } + if (indicator_idv[j] == 1) { + o_j++; + } + } + if (indicator_idv[i] == 1) { + o_i++; + } + } + + // matrix operations to get u_prdt + cout << "Start Eigen-Decomposition..." << endl; + clock_t time_start = clock(); + EigenDecomp(Goo, U, eval, 0); + for (size_t i = 0; i < eval->size; i++) { + if (gsl_vector_get(eval, i) < 1e-10) { + gsl_vector_set(eval, i, 0); + } + } + + time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_blas_dgemv(CblasTrans, 1.0, U, u_hat, 0.0, Utu); + for (size_t i = 0; i < eval->size; i++) { + d = gsl_vector_get(eval, i); + if (d != 0) { + d = gsl_vector_get(Utu, i) / d; + gsl_vector_set(Utu, i, d); + } + } + gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, eval); + gsl_blas_dgemv(CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt); + + // Free matrices. + gsl_matrix_free(Goo); + gsl_matrix_free(Gfo); + gsl_matrix_free(U); + gsl_vector_free(eval); + gsl_vector_free(Utu); + gsl_vector_free(w); + gsl_permutation_free(pmt); + + return; } -void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return; - } - - string line; - char *ch_ptr; - string rs; - - size_t n_miss, n_train_nomiss, c_phen; - double geno, x_mean, x_train_mean, effect_size; - - gsl_vector *x=gsl_vector_alloc (y_prdt->size); - gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size); - - ns_test=0; - - // Start reading genotypes and analyze. - for (size_t t=0; t<ns_total; ++t) { - !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - if (mapRS2est.count(rs)==0) { - continue; - } else { - effect_size=mapRS2est[rs]; - } - - x_mean=0.0; - c_phen=0; - n_miss=0; - x_train_mean=0; - n_train_nomiss=0; - - gsl_vector_set_zero(x_miss); - - for (size_t i=0; i<indicator_idv.size(); ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==1) { - if (strcmp(ch_ptr, "NA")!=0) { - geno=atof(ch_ptr); - x_train_mean+=geno; - n_train_nomiss++; - } - } else { - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } else { - geno=atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - } - - if (x->size==n_miss) { - cout << "snp " << rs << " has missing genotype for all " << - "individuals and will be ignored." << endl; - continue;} - - - x_mean/=(double)(x->size-n_miss); - x_train_mean/=(double)(n_train_nomiss); - - - for (size_t i=0; i<x->size; ++i) { - geno=gsl_vector_get(x, i); - if (gsl_vector_get (x_miss, i)==0) { - gsl_vector_set(x, i, x_mean-x_train_mean); - } else { - gsl_vector_set(x, i, geno-x_train_mean); - } - } - - gsl_vector_scale (x, effect_size); - gsl_vector_add (y_prdt, x); - - ns_test++; - } - cout<<endl; - - gsl_vector_free (x); - gsl_vector_free (x_miss); - - infile.close(); - infile.clear(); - - return; +void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return; + } + + string line; + char *ch_ptr; + string rs; + + size_t n_miss, n_train_nomiss, c_phen; + double geno, x_mean, x_train_mean, effect_size; + + gsl_vector *x = gsl_vector_alloc(y_prdt->size); + gsl_vector *x_miss = gsl_vector_alloc(y_prdt->size); + + ns_test = 0; + + // Start reading genotypes and analyze. + for (size_t t = 0; t < ns_total; ++t) { + !safeGetline(infile, line).eof(); + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + if (mapRS2est.count(rs) == 0) { + continue; + } else { + effect_size = mapRS2est[rs]; + } + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + x_train_mean = 0; + n_train_nomiss = 0; + + gsl_vector_set_zero(x_miss); + + for (size_t i = 0; i < indicator_idv.size(); ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 1) { + if (strcmp(ch_ptr, "NA") != 0) { + geno = atof(ch_ptr); + x_train_mean += geno; + n_train_nomiss++; + } + } else { + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + geno = atof(ch_ptr); + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + } + + if (x->size == n_miss) { + cout << "snp " << rs << " has missing genotype for all " + << "individuals and will be ignored." << endl; + continue; + } + + x_mean /= (double)(x->size - n_miss); + x_train_mean /= (double)(n_train_nomiss); + + for (size_t i = 0; i < x->size; ++i) { + geno = gsl_vector_get(x, i); + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean - x_train_mean); + } else { + gsl_vector_set(x, i, geno - x_train_mean); + } + } + + gsl_vector_scale(x, effect_size); + gsl_vector_add(y_prdt, x); + + ns_test++; + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(x_miss); + + infile.close(); + infile.clear(); + + return; } -void PRDT::AnalyzePlink (gsl_vector *y_prdt) { - string file_bed=file_bfile+".bed"; - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return; - } - - char ch[1]; - bitset<8> b; - string rs; - - size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss; - double geno, x_mean, x_train_mean, effect_size; - - gsl_vector *x=gsl_vector_alloc (y_prdt->size); - - // Calculate n_bit and c, the number of bit for each SNP. - if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;} - else {n_bit=indicator_idv.size()/4+1; } - - // Print the first 3 magic numbers. - for (size_t i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - ns_test=0; - - for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) { - ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); - } - - rs=snpInfo[t].rs_number; - - if (mapRS2est.count(rs)==0) { - continue; - } else { - effect_size=mapRS2est[rs]; - } - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - x_mean=0.0; - n_miss=0; - ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0; - for (size_t i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && - ci_total==indicator_idv.size()) { - break; - } - if (indicator_idv[ci_total]==1) { - if (b[2*j]==0) { - if (b[2*j+1]==0) { - x_train_mean+=2.0; - n_train_nomiss++; - } - else { - x_train_mean+=1.0; - n_train_nomiss++; - } - } - else { - if (b[2*j+1]==1) { - n_train_nomiss++; - } - else {} - } - } else { - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(x,ci_test,2); - x_mean+=2.0; - } - else { - gsl_vector_set(x,ci_test,1); - x_mean+=1.0; - } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(x,ci_test,0); - } - else { - gsl_vector_set(x,ci_test,-9); - n_miss++; - } - } - ci_test++; - } - ci_total++; - - } - } - - if (x->size==n_miss) { - cout << "snp " << rs << " has missing genotype for all " << - "individuals and will be ignored."<<endl; - continue; - } - - x_mean/=(double)(x->size-n_miss); - x_train_mean/=(double)(n_train_nomiss); - - for (size_t i=0; i<x->size; ++i) { - geno=gsl_vector_get(x, i); - if (geno==-9) { - gsl_vector_set(x, i, x_mean-x_train_mean); - } else { - gsl_vector_set(x, i, geno-x_train_mean); - } - } - - gsl_vector_scale (x, effect_size); - gsl_vector_add (y_prdt, x); - - ns_test++; - } - cout<<endl; - - gsl_vector_free (x); - - infile.close(); - infile.clear(); - - return; +void PRDT::AnalyzePlink(gsl_vector *y_prdt) { + string file_bed = file_bfile + ".bed"; + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return; + } + + char ch[1]; + bitset<8> b; + string rs; + + size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss; + double geno, x_mean, x_train_mean, effect_size; + + gsl_vector *x = gsl_vector_alloc(y_prdt->size); + + // Calculate n_bit and c, the number of bit for each SNP. + if (indicator_idv.size() % 4 == 0) { + n_bit = indicator_idv.size() / 4; + } else { + n_bit = indicator_idv.size() / 4 + 1; + } + + // Print the first 3 magic numbers. + for (size_t i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + ns_test = 0; + + for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { + if (t % d_pace == 0 || t == snpInfo.size() - 1) { + ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); + } + + rs = snpInfo[t].rs_number; + + if (mapRS2est.count(rs) == 0) { + continue; + } else { + effect_size = mapRS2est[rs]; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + x_mean = 0.0; + n_miss = 0; + ci_total = 0; + ci_test = 0; + x_train_mean = 0; + n_train_nomiss = 0; + for (size_t i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == indicator_idv.size()) { + break; + } + if (indicator_idv[ci_total] == 1) { + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + x_train_mean += 2.0; + n_train_nomiss++; + } else { + x_train_mean += 1.0; + n_train_nomiss++; + } + } else { + if (b[2 * j + 1] == 1) { + n_train_nomiss++; + } else { + } + } + } else { + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(x, ci_test, 2); + x_mean += 2.0; + } else { + gsl_vector_set(x, ci_test, 1); + x_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(x, ci_test, 0); + } else { + gsl_vector_set(x, ci_test, -9); + n_miss++; + } + } + ci_test++; + } + ci_total++; + } + } + + if (x->size == n_miss) { + cout << "snp " << rs << " has missing genotype for all " + << "individuals and will be ignored." << endl; + continue; + } + + x_mean /= (double)(x->size - n_miss); + x_train_mean /= (double)(n_train_nomiss); + + for (size_t i = 0; i < x->size; ++i) { + geno = gsl_vector_get(x, i); + if (geno == -9) { + gsl_vector_set(x, i, x_mean - x_train_mean); + } else { + gsl_vector_set(x, i, geno - x_train_mean); + } + } + + gsl_vector_scale(x, effect_size); + gsl_vector_add(y_prdt, x); + + ns_test++; + } + cout << endl; + + gsl_vector_free(x); + + infile.close(); + infile.clear(); + + return; } // Predict missing phenotypes using ridge regression. // Y_hat contains fixed effects -void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, - gsl_matrix *Y_full) { - gsl_vector *y_obs=gsl_vector_alloc (np_obs); - gsl_vector *y_miss=gsl_vector_alloc (np_miss); - gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs); - gsl_matrix *H_mo=gsl_matrix_alloc (np_miss, np_obs); - gsl_vector *Hiy=gsl_vector_alloc (np_obs); - - size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0; - - // Obtain H_oo, H_mo. - c_obs1=0; c_miss1=0; - for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) { - if (indicator_cvt[i1]==0) {continue;} - for (vector<int>::size_type j1=0; j1<n_ph; ++j1) { - - c_obs2=0; c_miss2=0; - for (vector<int>::size_type i2=0; - i2<indicator_pheno.size(); ++i2) { - if (indicator_cvt[i2]==0) {continue;} - for (vector<int>::size_type j2=0; - j2<n_ph; j2++) { - - if (indicator_pheno[i2][j2]==1) { - if (indicator_pheno[i1][j1]==1) { - gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) ); - } else { - gsl_matrix_set (H_mo, c_miss1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) ); - } - c_obs2++; - } else { - c_miss2++; - } - } - } - - if (indicator_pheno[i1][j1]==1) { - c_obs1++; - } else { - c_miss1++; - } - } - - } - - // Do LU decomposition of H_oo. - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (np_obs); - LUDecomp (H_oo, pmt, &sig); - - // Obtain y_obs=y_full-y_hat. - // Add the fixed effects part to y_miss: y_miss=y_hat. - c_obs1=0; c_miss1=0; - for (vector<int>::size_type i=0; - i<indicator_pheno.size(); ++i) { - if (indicator_cvt[i]==0) {continue;} - - for (vector<int>::size_type j=0; j<n_ph; ++j) { - if (indicator_pheno[i][j]==1) { - gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j)-gsl_matrix_get (Y_hat, i, j) ); - c_obs1++; - } else { - gsl_vector_set (y_miss, c_miss1, gsl_matrix_get (Y_hat, i, j) ); - c_miss1++; - } - } - } - - LUSolve (H_oo, pmt, y_obs, Hiy); - - gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss); - - // Put back predicted y_miss to Y_full. - c_miss1=0; - for (vector<int>::size_type i=0; - i<indicator_pheno.size(); ++i) { - if (indicator_cvt[i]==0) {continue;} - - for (vector<int>::size_type j=0; j<n_ph; ++j) { - if (indicator_pheno[i][j]==0) { - gsl_matrix_set (Y_full, i, j, gsl_vector_get (y_miss, c_miss1) ); - c_miss1++; - } - } - } - - // Free matrices. - gsl_vector_free(y_obs); - gsl_vector_free(y_miss); - gsl_matrix_free(H_oo); - gsl_matrix_free(H_mo); - gsl_vector_free(Hiy); - - return; +void PRDT::MvnormPrdt(const gsl_matrix *Y_hat, const gsl_matrix *H, + gsl_matrix *Y_full) { + gsl_vector *y_obs = gsl_vector_alloc(np_obs); + gsl_vector *y_miss = gsl_vector_alloc(np_miss); + gsl_matrix *H_oo = gsl_matrix_alloc(np_obs, np_obs); + gsl_matrix *H_mo = gsl_matrix_alloc(np_miss, np_obs); + gsl_vector *Hiy = gsl_vector_alloc(np_obs); + + size_t c_obs1 = 0, c_obs2 = 0, c_miss1 = 0, c_miss2 = 0; + + // Obtain H_oo, H_mo. + c_obs1 = 0; + c_miss1 = 0; + for (vector<int>::size_type i1 = 0; i1 < indicator_pheno.size(); ++i1) { + if (indicator_cvt[i1] == 0) { + continue; + } + for (vector<int>::size_type j1 = 0; j1 < n_ph; ++j1) { + + c_obs2 = 0; + c_miss2 = 0; + for (vector<int>::size_type i2 = 0; i2 < indicator_pheno.size(); ++i2) { + if (indicator_cvt[i2] == 0) { + continue; + } + for (vector<int>::size_type j2 = 0; j2 < n_ph; j2++) { + + if (indicator_pheno[i2][j2] == 1) { + if (indicator_pheno[i1][j1] == 1) { + gsl_matrix_set( + H_oo, c_obs1, c_obs2, + gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2)); + } else { + gsl_matrix_set( + H_mo, c_miss1, c_obs2, + gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2)); + } + c_obs2++; + } else { + c_miss2++; + } + } + } + + if (indicator_pheno[i1][j1] == 1) { + c_obs1++; + } else { + c_miss1++; + } + } + } + + // Do LU decomposition of H_oo. + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(np_obs); + LUDecomp(H_oo, pmt, &sig); + + // Obtain y_obs=y_full-y_hat. + // Add the fixed effects part to y_miss: y_miss=y_hat. + c_obs1 = 0; + c_miss1 = 0; + for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) { + if (indicator_cvt[i] == 0) { + continue; + } + + for (vector<int>::size_type j = 0; j < n_ph; ++j) { + if (indicator_pheno[i][j] == 1) { + gsl_vector_set(y_obs, c_obs1, gsl_matrix_get(Y_full, i, j) - + gsl_matrix_get(Y_hat, i, j)); + c_obs1++; + } else { + gsl_vector_set(y_miss, c_miss1, gsl_matrix_get(Y_hat, i, j)); + c_miss1++; + } + } + } + + LUSolve(H_oo, pmt, y_obs, Hiy); + + gsl_blas_dgemv(CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss); + + // Put back predicted y_miss to Y_full. + c_miss1 = 0; + for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) { + if (indicator_cvt[i] == 0) { + continue; + } + + for (vector<int>::size_type j = 0; j < n_ph; ++j) { + if (indicator_pheno[i][j] == 0) { + gsl_matrix_set(Y_full, i, j, gsl_vector_get(y_miss, c_miss1)); + c_miss1++; + } + } + } + + // Free matrices. + gsl_vector_free(y_obs); + gsl_vector_free(y_miss); + gsl_matrix_free(H_oo); + gsl_matrix_free(H_mo); + gsl_vector_free(Hiy); + + return; } - - @@ -19,58 +19,50 @@ #ifndef __PRDT_H__ #define __PRDT_H__ -#include <vector> -#include <map> -#include <string.h> -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" #include "param.h" +#include <map> +#include <string.h> +#include <vector> using namespace std; class PRDT { public: - // IO-related parameters. - size_t a_mode; - size_t d_pace; - - string file_bfile; - string file_geno; - string file_out; - string path_out; - - vector<vector<int> > indicator_pheno; - vector<int> indicator_cvt; - vector<int> indicator_idv; - vector<SNPINFO> snpInfo; - map<string, double> mapRS2est; - - size_t n_ph; - size_t np_obs, np_miss; - size_t ns_total; - size_t ns_test; - - double time_eigen; - - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - void WriteFiles (gsl_vector *y_prdt); - void WriteFiles (gsl_matrix *Y_full); - void AddBV (gsl_matrix *G, const gsl_vector *u_hat, - gsl_vector *y_prdt); - void AnalyzeBimbam (gsl_vector *y_prdt); - void AnalyzePlink (gsl_vector *y_prdt); - void MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, - gsl_matrix *Y_full); + // IO-related parameters. + size_t a_mode; + size_t d_pace; + + string file_bfile; + string file_geno; + string file_out; + string path_out; + + vector<vector<int>> indicator_pheno; + vector<int> indicator_cvt; + vector<int> indicator_idv; + vector<SNPINFO> snpInfo; + map<string, double> mapRS2est; + + size_t n_ph; + size_t np_obs, np_miss; + size_t ns_total; + size_t ns_test; + + double time_eigen; + + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + void WriteFiles(gsl_vector *y_prdt); + void WriteFiles(gsl_matrix *Y_full); + void AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt); + void AnalyzeBimbam(gsl_vector *y_prdt); + void AnalyzePlink(gsl_vector *y_prdt); + void MvnormPrdt(const gsl_matrix *Y_hat, const gsl_matrix *H, + gsl_matrix *Y_full); }; #endif - - - - - - - diff --git a/src/varcov.cpp b/src/varcov.cpp index 46b5bf8..0f87ba8 100644 --- a/src/varcov.cpp +++ b/src/varcov.cpp @@ -16,103 +16,126 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include <bitset> +#include <cmath> +#include <cstring> #include <fstream> -#include <sstream> -#include <string> #include <iomanip> -#include <bitset> -#include <vector> +#include <iostream> #include <map> #include <set> -#include <cstring> -#include <cmath> +#include <sstream> #include <stdio.h> #include <stdlib.h> +#include <string> +#include <vector> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" -#include "lapack.h" #include "gzstream.h" -#include "param.h" -#include "varcov.h" #include "io.h" +#include "lapack.h" #include "mathfunc.h" +#include "param.h" +#include "varcov.h" using namespace std; -void VARCOV::CopyFromParam (PARAM &cPar) { - d_pace=cPar.d_pace; +void VARCOV::CopyFromParam(PARAM &cPar) { + d_pace = cPar.d_pace; - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_out = cPar.file_out; + path_out = cPar.path_out; - time_opt=0.0; + time_opt = 0.0; - window_cm=cPar.window_cm; - window_bp=cPar.window_bp; - window_ns=cPar.window_ns; + window_cm = cPar.window_cm; + window_bp = cPar.window_bp; + window_ns = cPar.window_ns; - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; + indicator_idv = cPar.indicator_idv; + indicator_snp = cPar.indicator_snp; + snpInfo = cPar.snpInfo; - return; + return; } -void VARCOV::CopyToParam (PARAM &cPar) { - cPar.time_opt=time_opt; - return; +void VARCOV::CopyToParam(PARAM &cPar) { + cPar.time_opt = time_opt; + return; } -void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, - const vector<vector<double> > &Cov_mat) { +void VARCOV::WriteCov(const int flag, const vector<SNPINFO> &snpInfo_sub, + const vector<vector<double>> &Cov_mat) { string file_cov; - file_cov=path_out+"/"+file_out; - file_cov+=".cor.txt"; + file_cov = path_out + "/" + file_out; + file_cov += ".cor.txt"; ofstream outfile; - if (flag==0) { - outfile.open (file_cov.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;} + if (flag == 0) { + outfile.open(file_cov.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_cov << endl; + return; + } - outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis" - <<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0" - <<"\t"<<"af"<<"\t"<<"window_size" - <<"\t"<<"var"<<"\t"<<"cor"<<endl; + outfile << "chr" + << "\t" + << "rs" + << "\t" + << "ps" + << "\t" + << "n_mis" + << "\t" + << "n_obs" + << "\t" + << "allele1" + << "\t" + << "allele0" + << "\t" + << "af" + << "\t" + << "window_size" + << "\t" + << "var" + << "\t" + << "cor" << endl; } else { - outfile.open (file_cov.c_str(), ofstream::app); - if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;} - - for (size_t i=0; i<Cov_mat.size(); i++) { - outfile << snpInfo_sub[i].chr << "\t" << snpInfo_sub[i].rs_number << - "\t" << snpInfo_sub[i].base_position << "\t" << - snpInfo_sub[i].n_miss << "\t" << snpInfo_sub[i].n_idv << "\t" << - snpInfo_sub[i].a_minor << "\t" << snpInfo_sub[i].a_major << "\t" << - fixed << setprecision(3) << snpInfo_sub[i].maf << "\t" << - Cov_mat[i].size()-1 << "\t"; - outfile<<scientific<<setprecision(6)<<Cov_mat[i][0]<<"\t"; - - if (Cov_mat[i].size()==1) { - outfile<<"NA"; + outfile.open(file_cov.c_str(), ofstream::app); + if (!outfile) { + cout << "error writing file: " << file_cov << endl; + return; + } + + for (size_t i = 0; i < Cov_mat.size(); i++) { + outfile << snpInfo_sub[i].chr << "\t" << snpInfo_sub[i].rs_number << "\t" + << snpInfo_sub[i].base_position << "\t" << snpInfo_sub[i].n_miss + << "\t" << snpInfo_sub[i].n_idv << "\t" << snpInfo_sub[i].a_minor + << "\t" << snpInfo_sub[i].a_major << "\t" << fixed + << setprecision(3) << snpInfo_sub[i].maf << "\t" + << Cov_mat[i].size() - 1 << "\t"; + outfile << scientific << setprecision(6) << Cov_mat[i][0] << "\t"; + + if (Cov_mat[i].size() == 1) { + outfile << "NA"; } else { - for (size_t j=1; j<Cov_mat[i].size(); j++) { - if (j==(Cov_mat[i].size()-1)) { - outfile<<Cov_mat[i][j]; - } else { - outfile<<Cov_mat[i][j]<<","; - } - } + for (size_t j = 1; j < Cov_mat[i].size(); j++) { + if (j == (Cov_mat[i].size() - 1)) { + outfile << Cov_mat[i][j]; + } else { + outfile << Cov_mat[i][j] << ","; + } + } } - outfile<<endl; + outfile << endl; } } @@ -121,18 +144,18 @@ void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, return; } -bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) { - int c_chr=snpInfo1.chr.compare(snpInfo2.chr); - long int c_bp=snpInfo1.base_position-snpInfo2.base_position; +bool CompareSNPinfo(const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) { + int c_chr = snpInfo1.chr.compare(snpInfo2.chr); + long int c_bp = snpInfo1.base_position - snpInfo2.base_position; - if(c_chr<0) { + if (c_chr < 0) { return true; - } else if (c_chr>0) { + } else if (c_chr > 0) { return false; } else { - if (c_bp<0) { + if (c_bp < 0) { return true; - } else if (c_bp>0) { + } else if (c_bp > 0) { return false; } else { return true; @@ -140,64 +163,73 @@ bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2) { } } - // Do not sort SNPs (because gzip files do not support random access) // then calculate n_nb, the number of neighbours, for each SNP. -void VARCOV::CalcNB (vector<SNPINFO> &snpInfo_sort) { - size_t t2=0, n_nb=0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (indicator_snp[t]==0) {continue;} - - if (snpInfo_sort[t].chr=="-9" || - (snpInfo_sort[t].cM==-9 && window_cm!=0) || - (snpInfo_sort[t].base_position==-9 && window_bp!=0) ) { - snpInfo_sort[t].n_nb=0; continue; +void VARCOV::CalcNB(vector<SNPINFO> &snpInfo_sort) { + size_t t2 = 0, n_nb = 0; + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (indicator_snp[t] == 0) { + continue; + } + + if (snpInfo_sort[t].chr == "-9" || + (snpInfo_sort[t].cM == -9 && window_cm != 0) || + (snpInfo_sort[t].base_position == -9 && window_bp != 0)) { + snpInfo_sort[t].n_nb = 0; + continue; } - if (t==indicator_snp.size()-1) {snpInfo_sort[t].n_nb=0; continue;} + if (t == indicator_snp.size() - 1) { + snpInfo_sort[t].n_nb = 0; + continue; + } - t2=t+1; n_nb=0; + t2 = t + 1; + n_nb = 0; - while (t2<indicator_snp.size() && - snpInfo_sort[t2].chr == snpInfo_sort[t].chr && - indicator_snp[t2]==0) { + while (t2 < indicator_snp.size() && + snpInfo_sort[t2].chr == snpInfo_sort[t].chr && + indicator_snp[t2] == 0) { t2++; } - while (t2<indicator_snp.size() && - snpInfo_sort[t2].chr==snpInfo_sort[t].chr && - (snpInfo_sort[t2].cM-snpInfo_sort[t].cM<window_cm || - window_cm==0) && - (snpInfo_sort[t2].base_position-snpInfo_sort[t].base_position < - window_bp || window_bp==0) && (n_nb<window_ns|| window_ns==0)) { - t2++; n_nb++; - while (t2<indicator_snp.size() && - snpInfo_sort[t2].chr==snpInfo_sort[t].chr && - indicator_snp[t2]==0) { - t2++; + while (t2 < indicator_snp.size() && + snpInfo_sort[t2].chr == snpInfo_sort[t].chr && + (snpInfo_sort[t2].cM - snpInfo_sort[t].cM < window_cm || + window_cm == 0) && + (snpInfo_sort[t2].base_position - snpInfo_sort[t].base_position < + window_bp || + window_bp == 0) && + (n_nb < window_ns || window_ns == 0)) { + t2++; + n_nb++; + while (t2 < indicator_snp.size() && + snpInfo_sort[t2].chr == snpInfo_sort[t].chr && + indicator_snp[t2] == 0) { + t2++; } } - snpInfo_sort[t].n_nb=n_nb; + snpInfo_sort[t].n_nb = n_nb; } return; } // Vector double is centered to have mean 0. -void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec) { +void Calc_Cor(vector<vector<double>> &X_mat, vector<double> &cov_vec) { cov_vec.clear(); double v1, v2, r; - vector<double> x_vec=X_mat[0]; + vector<double> x_vec = X_mat[0]; lapack_ddot(x_vec, x_vec, v1); - cov_vec.push_back(v1/(double)x_vec.size() ); + cov_vec.push_back(v1 / (double)x_vec.size()); - for (size_t i=1; i<X_mat.size(); i++) { + for (size_t i = 1; i < X_mat.size(); i++) { lapack_ddot(X_mat[i], x_vec, r); lapack_ddot(X_mat[i], X_mat[i], v2); - r/=sqrt(v1*v2); + r /= sqrt(v1 * v2); cov_vec.push_back(r); } @@ -214,10 +246,10 @@ void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec) { // window_size (which can vary if cM was used) read bimbam mean // genotype file and calculate the covariance matrix for neighboring // SNPs output values at 10000-SNP-interval. -void VARCOV::AnalyzeBimbam () { - igzstream infile (file_geno.c_str(), igzstream::in); +void VARCOV::AnalyzeBimbam() { + igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; + cout << "error reading genotype file:" << file_geno << endl; return; } @@ -225,58 +257,64 @@ void VARCOV::AnalyzeBimbam () { vector<SNPINFO> snpInfo_sub; CalcNB(snpInfo); - size_t ni_test=0; - for (size_t i=0; i<indicator_idv.size(); i++) { - ni_test+=indicator_idv[i]; + size_t ni_test = 0; + for (size_t i = 0; i < indicator_idv.size(); i++) { + ni_test += indicator_idv[i]; } - gsl_vector *geno=gsl_vector_alloc (ni_test); + gsl_vector *geno = gsl_vector_alloc(ni_test); double geno_mean; vector<double> x_vec, cov_vec; - vector<vector<double> > X_mat, Cov_mat; + vector<vector<double>> X_mat, Cov_mat; - for (size_t i=0; i<ni_test; i++) { + for (size_t i = 0; i < ni_test; i++) { x_vec.push_back(0); } - WriteCov (0, snpInfo_sub, Cov_mat); + WriteCov(0, snpInfo_sub, Cov_mat); - size_t t2=0, inc; - int n_nb=0; + size_t t2 = 0, inc; + int n_nb = 0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (t%d_pace==0 || t==(indicator_snp.size()-1)) - {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);} - if (indicator_snp[t]==0) {continue;} + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % d_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } - if (X_mat.size()==0) { - n_nb=snpInfo[t].n_nb+1; + if (X_mat.size() == 0) { + n_nb = snpInfo[t].n_nb + 1; } else { - n_nb=snpInfo[t].n_nb-n_nb+1; + n_nb = snpInfo[t].n_nb - n_nb + 1; } - for (int i=0; i<n_nb; i++) { - if (X_mat.size()==0) {t2=t;} + for (int i = 0; i < n_nb; i++) { + if (X_mat.size() == 0) { + t2 = t; + } // Read a line of the snp is filtered out. - inc=0; - while (t2<indicator_snp.size() && indicator_snp[t2]==0) { - t2++; inc++; + inc = 0; + while (t2 < indicator_snp.size() && indicator_snp[t2] == 0) { + t2++; + inc++; } - Bimbam_ReadOneSNP (inc, indicator_idv, infile, geno, geno_mean); - gsl_vector_add_constant (geno, -1.0*geno_mean); + Bimbam_ReadOneSNP(inc, indicator_idv, infile, geno, geno_mean); + gsl_vector_add_constant(geno, -1.0 * geno_mean); - for (size_t j=0; j<geno->size; j++) { - x_vec[j]=gsl_vector_get(geno, j); + for (size_t j = 0; j < geno->size; j++) { + x_vec[j] = gsl_vector_get(geno, j); } X_mat.push_back(x_vec); t2++; } - n_nb=snpInfo[t].n_nb; + n_nb = snpInfo[t].n_nb; Calc_Cor(X_mat, cov_vec); Cov_mat.push_back(cov_vec); @@ -285,15 +323,15 @@ void VARCOV::AnalyzeBimbam () { X_mat.erase(X_mat.begin()); // Write out var/cov values. - if (Cov_mat.size()==10000) { - WriteCov (1, snpInfo_sub, Cov_mat); + if (Cov_mat.size() == 10000) { + WriteCov(1, snpInfo_sub, Cov_mat); Cov_mat.clear(); snpInfo_sub.clear(); } } - if (Cov_mat.size()!=0) { - WriteCov (1, snpInfo_sub, Cov_mat); + if (Cov_mat.size() != 0) { + WriteCov(1, snpInfo_sub, Cov_mat); Cov_mat.clear(); snpInfo_sub.clear(); } @@ -306,68 +344,76 @@ void VARCOV::AnalyzeBimbam () { return; } -void VARCOV::AnalyzePlink () { - string file_bed=file_bfile+".bed"; - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;} +void VARCOV::AnalyzePlink() { + string file_bed = file_bfile + ".bed"; + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return; + } // Calculate the number of right-hand-side neighbours for each SNP. vector<SNPINFO> snpInfo_sub; CalcNB(snpInfo); - size_t ni_test=0; - for (size_t i=0; i<indicator_idv.size(); i++) { - ni_test+=indicator_idv[i]; + size_t ni_test = 0; + for (size_t i = 0; i < indicator_idv.size(); i++) { + ni_test += indicator_idv[i]; } - gsl_vector *geno=gsl_vector_alloc (ni_test); + gsl_vector *geno = gsl_vector_alloc(ni_test); double geno_mean; vector<double> x_vec, cov_vec; - vector<vector<double> > X_mat, Cov_mat; + vector<vector<double>> X_mat, Cov_mat; - for (size_t i=0; i<ni_test; i++) { + for (size_t i = 0; i < ni_test; i++) { x_vec.push_back(0); } - WriteCov (0, snpInfo_sub, Cov_mat); + WriteCov(0, snpInfo_sub, Cov_mat); - size_t t2=0, inc; - int n_nb=0; + size_t t2 = 0, inc; + int n_nb = 0; - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (t%d_pace==0 || t==(indicator_snp.size()-1)) - {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);} - if (indicator_snp[t]==0) {continue;} + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % d_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } - if (X_mat.size()==0) { - n_nb=snpInfo[t].n_nb+1; + if (X_mat.size() == 0) { + n_nb = snpInfo[t].n_nb + 1; } else { - n_nb=snpInfo[t].n_nb-n_nb+1; + n_nb = snpInfo[t].n_nb - n_nb + 1; } - for (int i=0; i<n_nb; i++) { - if (X_mat.size()==0) {t2=t;} + for (int i = 0; i < n_nb; i++) { + if (X_mat.size() == 0) { + t2 = t; + } // Read a line if the SNP is filtered out. - inc=0; - while (t2<indicator_snp.size() && indicator_snp[t2]==0) { - t2++; - inc++; + inc = 0; + while (t2 < indicator_snp.size() && indicator_snp[t2] == 0) { + t2++; + inc++; } - Plink_ReadOneSNP (t2, indicator_idv, infile, geno, geno_mean); - gsl_vector_add_constant (geno, -1.0*geno_mean); + Plink_ReadOneSNP(t2, indicator_idv, infile, geno, geno_mean); + gsl_vector_add_constant(geno, -1.0 * geno_mean); - for (size_t j=0; j<geno->size; j++) { - x_vec[j]=gsl_vector_get(geno, j); + for (size_t j = 0; j < geno->size; j++) { + x_vec[j] = gsl_vector_get(geno, j); } X_mat.push_back(x_vec); t2++; } - n_nb=snpInfo[t].n_nb; + n_nb = snpInfo[t].n_nb; Calc_Cor(X_mat, cov_vec); Cov_mat.push_back(cov_vec); @@ -376,15 +422,15 @@ void VARCOV::AnalyzePlink () { X_mat.erase(X_mat.begin()); // Write out var/cov values. - if (Cov_mat.size()==10000) { - WriteCov (1, snpInfo_sub, Cov_mat); + if (Cov_mat.size() == 10000) { + WriteCov(1, snpInfo_sub, Cov_mat); Cov_mat.clear(); snpInfo_sub.clear(); } } - if (Cov_mat.size()!=0) { - WriteCov (1, snpInfo_sub, Cov_mat); + if (Cov_mat.size() != 0) { + WriteCov(1, snpInfo_sub, Cov_mat); Cov_mat.clear(); snpInfo_sub.clear(); } diff --git a/src/varcov.h b/src/varcov.h index 4a1eb3a..47b4f9d 100644 --- a/src/varcov.h +++ b/src/varcov.h @@ -19,45 +19,43 @@ #ifndef __VARCOV_H__ #define __VARCOV_H__ -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" -#include "param.h" +#include "gsl/gsl_vector.h" #include "io.h" +#include "param.h" using namespace std; class VARCOV { public: - // IO-related parameters. - string file_out; - string path_out; - string file_geno; - string file_bfile; - int d_pace; - - vector<int> indicator_idv; - vector<int> indicator_snp; - - vector<SNPINFO> snpInfo; - - double time_opt; - - // Class-specific parameters. - double window_cm; - size_t window_bp; - size_t window_ns; - - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - void CalcNB (vector<SNPINFO> &snpInfo_sort); - void WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, - const vector<vector<double> > &Cov_mat); - void AnalyzeBimbam (); - void AnalyzePlink (); + // IO-related parameters. + string file_out; + string path_out; + string file_geno; + string file_bfile; + int d_pace; + + vector<int> indicator_idv; + vector<int> indicator_snp; + + vector<SNPINFO> snpInfo; + + double time_opt; + + // Class-specific parameters. + double window_cm; + size_t window_bp; + size_t window_ns; + + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + void CalcNB(vector<SNPINFO> &snpInfo_sort); + void WriteCov(const int flag, const vector<SNPINFO> &snpInfo_sub, + const vector<vector<double>> &Cov_mat); + void AnalyzeBimbam(); + void AnalyzePlink(); }; #endif - - @@ -16,216 +16,216 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> #include <fstream> +#include <iostream> #include <sstream> -#include <iomanip> +#include <bitset> #include <cmath> +#include <cstring> +#include <iomanip> #include <iostream> +#include <map> +#include <set> #include <stdio.h> #include <stdlib.h> -#include <bitset> -#include <vector> -#include <set> -#include <map> #include <string> -#include <cstring> +#include <vector> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" #include "gsl/gsl_cdf.h" -#include "gsl/gsl_multiroots.h" #include "gsl/gsl_min.h" +#include "gsl/gsl_multiroots.h" #include "Eigen/Dense" -#include "param.h" -#include "io.h" -#include "lapack.h" #include "eigenlib.h" #include "gzstream.h" -#include "mathfunc.h" +#include "io.h" +#include "lapack.h" #include "lmm.h" +#include "mathfunc.h" +#include "param.h" #include "vc.h" using namespace std; using namespace Eigen; // In this file, X, Y are already transformed (i.e. UtX and UtY). -void VC::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; +void VC::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; - file_cat=cPar.file_cat; - file_beta=cPar.file_beta; - file_cor=cPar.file_cor; + file_cat = cPar.file_cat; + file_beta = cPar.file_beta; + file_cor = cPar.file_cor; - setSnps=cPar.setSnps; + setSnps = cPar.setSnps; - file_out=cPar.file_out; - path_out=cPar.path_out; + file_out = cPar.file_out; + path_out = cPar.path_out; - time_UtX=0.0; - time_opt=0.0; + time_UtX = 0.0; + time_opt = 0.0; - v_traceG=cPar.v_traceG; + v_traceG = cPar.v_traceG; - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ns_test=cPar.ns_test; + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ns_test = cPar.ns_test; - crt=cPar.crt; - window_cm=cPar.window_cm; - window_bp=cPar.window_bp; - window_ns=cPar.window_ns; + crt = cPar.crt; + window_cm = cPar.window_cm; + window_bp = cPar.window_bp; + window_ns = cPar.window_ns; - n_vc=cPar.n_vc; + n_vc = cPar.n_vc; return; } -void VC::CopyToParam (PARAM &cPar) { - cPar.time_UtX=time_UtX; - cPar.time_opt=time_opt; +void VC::CopyToParam(PARAM &cPar) { + cPar.time_UtX = time_UtX; + cPar.time_opt = time_opt; - cPar.v_pve=v_pve; - cPar.v_se_pve=v_se_pve; - cPar.v_sigma2=v_sigma2; - cPar.v_se_sigma2=v_se_sigma2; - cPar.pve_total=pve_total; - cPar.se_pve_total=se_pve_total; - cPar.v_traceG=v_traceG; + cPar.v_pve = v_pve; + cPar.v_se_pve = v_se_pve; + cPar.v_sigma2 = v_sigma2; + cPar.v_se_sigma2 = v_se_sigma2; + cPar.pve_total = pve_total; + cPar.se_pve_total = se_pve_total; + cPar.v_traceG = v_traceG; - cPar.v_beta=v_beta; - cPar.v_se_beta=v_se_beta; + cPar.v_beta = v_beta; + cPar.v_se_beta = v_se_beta; - cPar.ni_total=ni_total; - cPar.ns_total=ns_total; - cPar.ns_test=ns_test; + cPar.ni_total = ni_total; + cPar.ns_total = ns_total; + cPar.ns_test = ns_test; - cPar.n_vc=n_vc; + cPar.n_vc = n_vc; - return; + return; } -void VC::WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec, - const gsl_vector *qvar_vec, const gsl_matrix *S_mat, - const gsl_matrix *Svar_mat) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".qvec.txt"; - - ofstream outfile_q (file_str.c_str(), ofstream::out); - if (!outfile_q) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - for (size_t i=0; i<s_vec->size; i++) { - outfile_q<<gsl_vector_get(s_vec, i)<<endl; - } - for (size_t i=0; i<q_vec->size; i++) { - outfile_q<<gsl_vector_get(q_vec, i)<<endl; - } - for (size_t i=0; i<qvar_vec->size; i++) { - outfile_q<<gsl_vector_get(qvar_vec, i)<<endl; - } - - outfile_q.clear(); - outfile_q.close(); - - file_str=path_out+"/"+file_out; - file_str+=".smat.txt"; - - ofstream outfile_s (file_str.c_str(), ofstream::out); - if (!outfile_s) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - for (size_t i=0; i<S_mat->size1; i++) { - for (size_t j=0; j<S_mat->size2; j++) { - outfile_s<<gsl_matrix_get(S_mat, i, j)<<"\t"; - } - outfile_s<<endl; - } - for (size_t i=0; i<Svar_mat->size1; i++) { - for (size_t j=0; j<Svar_mat->size2; j++) { - outfile_s<<gsl_matrix_get(Svar_mat, i, j)<<"\t"; - } - outfile_s<<endl; - } - - outfile_s.clear(); - outfile_s.close(); - - return; +void VC::WriteFile_qs(const gsl_vector *s_vec, const gsl_vector *q_vec, + const gsl_vector *qvar_vec, const gsl_matrix *S_mat, + const gsl_matrix *Svar_mat) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".qvec.txt"; + + ofstream outfile_q(file_str.c_str(), ofstream::out); + if (!outfile_q) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + for (size_t i = 0; i < s_vec->size; i++) { + outfile_q << gsl_vector_get(s_vec, i) << endl; + } + for (size_t i = 0; i < q_vec->size; i++) { + outfile_q << gsl_vector_get(q_vec, i) << endl; + } + for (size_t i = 0; i < qvar_vec->size; i++) { + outfile_q << gsl_vector_get(qvar_vec, i) << endl; + } + + outfile_q.clear(); + outfile_q.close(); + + file_str = path_out + "/" + file_out; + file_str += ".smat.txt"; + + ofstream outfile_s(file_str.c_str(), ofstream::out); + if (!outfile_s) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + for (size_t i = 0; i < S_mat->size1; i++) { + for (size_t j = 0; j < S_mat->size2; j++) { + outfile_s << gsl_matrix_get(S_mat, i, j) << "\t"; + } + outfile_s << endl; + } + for (size_t i = 0; i < Svar_mat->size1; i++) { + for (size_t j = 0; j < Svar_mat->size2; j++) { + outfile_s << gsl_matrix_get(Svar_mat, i, j) << "\t"; + } + outfile_s << endl; + } + + outfile_s.clear(); + outfile_s.close(); + + return; } -void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) { - size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1, n_cvt=(p->W)->size2; +void UpdateParam(const gsl_vector *log_sigma2, VC_PARAM *p) { + size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1, n_cvt = (p->W)->size2; - gsl_matrix *K_temp=gsl_matrix_alloc(n1, n1); - gsl_matrix *HiW=gsl_matrix_alloc(n1, n_cvt); - gsl_matrix *WtHiW=gsl_matrix_alloc(n_cvt, n_cvt); - gsl_matrix *WtHiWi=gsl_matrix_alloc(n_cvt, n_cvt); - gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1); + gsl_matrix *K_temp = gsl_matrix_alloc(n1, n1); + gsl_matrix *HiW = gsl_matrix_alloc(n1, n_cvt); + gsl_matrix *WtHiW = gsl_matrix_alloc(n_cvt, n_cvt); + gsl_matrix *WtHiWi = gsl_matrix_alloc(n_cvt, n_cvt); + gsl_matrix *WtHiWiWtHi = gsl_matrix_alloc(n_cvt, n1); double sigma2; // Calculate H = \sum_i^{k+1} \sigma_i^2 K_i. - gsl_matrix_set_zero (p->P); - for (size_t i=0; i<n_vc+1; i++) { - if (i==n_vc) { - gsl_matrix_set_identity (K_temp); + gsl_matrix_set_zero(p->P); + for (size_t i = 0; i < n_vc + 1; i++) { + if (i == n_vc) { + gsl_matrix_set_identity(K_temp); } else { - gsl_matrix_const_view K_sub= - gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1); - gsl_matrix_memcpy (K_temp, &K_sub.matrix); + gsl_matrix_const_view K_sub = + gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1); + gsl_matrix_memcpy(K_temp, &K_sub.matrix); } // When unconstrained, update on sigma2 instead of log_sigma2. if (p->noconstrain) { - sigma2=gsl_vector_get (log_sigma2, i); + sigma2 = gsl_vector_get(log_sigma2, i); } else { - sigma2=exp(gsl_vector_get (log_sigma2, i) ); + sigma2 = exp(gsl_vector_get(log_sigma2, i)); } gsl_matrix_scale(K_temp, sigma2); - gsl_matrix_add (p->P, K_temp); + gsl_matrix_add(p->P, K_temp); } // Calculate H^{-1}. eigenlib_invert(p->P); - eigenlib_dgemm ("N", "N", 1.0, p->P, p->W, 0.0, HiW); - eigenlib_dgemm ("T", "N", 1.0, p->W, HiW, 0.0, WtHiW); + eigenlib_dgemm("N", "N", 1.0, p->P, p->W, 0.0, HiW); + eigenlib_dgemm("T", "N", 1.0, p->W, HiW, 0.0, WtHiW); eigenlib_invert(WtHiW); gsl_matrix_memcpy(WtHiWi, WtHiW); - eigenlib_dgemm ("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi); - eigenlib_dgemm ("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P); + eigenlib_dgemm("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi); + eigenlib_dgemm("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P); // Calculate Py, KPy, PKPy. gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py); double d; - for (size_t i=0; i<n_vc+1; i++) { - gsl_vector_view KPy=gsl_matrix_column (p->KPy_mat, i); - gsl_vector_view PKPy=gsl_matrix_column (p->PKPy_mat, i); + for (size_t i = 0; i < n_vc + 1; i++) { + gsl_vector_view KPy = gsl_matrix_column(p->KPy_mat, i); + gsl_vector_view PKPy = gsl_matrix_column(p->PKPy_mat, i); - if (i==n_vc) { - gsl_vector_memcpy (&KPy.vector, p->Py); + if (i == n_vc) { + gsl_vector_memcpy(&KPy.vector, p->Py); } else { - gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1); + gsl_matrix_const_view K_sub = + gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1); // Seems to be important to use gsl dgemv here instead of // eigenlib_dgemv; otherwise. - gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, - &KPy.vector); + gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector); } gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector); @@ -233,64 +233,64 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) { // When phenotypes are not normalized well, then some values in // the following matrix maybe NaN; change that to 0; this seems to // only happen when eigenlib_dgemv was used above. - for (size_t j=0; j<p->KPy_mat->size1; j++) { - d=gsl_matrix_get (p->KPy_mat, j, i); + for (size_t j = 0; j < p->KPy_mat->size1; j++) { + d = gsl_matrix_get(p->KPy_mat, j, i); if (std::isnan(d)) { - gsl_matrix_set (p->KPy_mat, j, i, 0); - cout<<"nan appears in "<<i<<" "<<j<<endl; + gsl_matrix_set(p->KPy_mat, j, i, 0); + cout << "nan appears in " << i << " " << j << endl; } - d=gsl_matrix_get (p->PKPy_mat, j, i); + d = gsl_matrix_get(p->PKPy_mat, j, i); if (std::isnan(d)) { - gsl_matrix_set (p->PKPy_mat, j, i, 0); - cout<<"nan appears in "<<i<<" "<<j<<endl; + gsl_matrix_set(p->PKPy_mat, j, i, 0); + cout << "nan appears in " << i << " " << j << endl; } } } - gsl_matrix_free (K_temp); - gsl_matrix_free (HiW); - gsl_matrix_free (WtHiW); - gsl_matrix_free (WtHiWi); - gsl_matrix_free (WtHiWiWtHi); + gsl_matrix_free(K_temp); + gsl_matrix_free(HiW); + gsl_matrix_free(WtHiW); + gsl_matrix_free(WtHiWi); + gsl_matrix_free(WtHiWiWtHi); return; } // Below are functions for AI algorithm. -int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) { - VC_PARAM *p=(VC_PARAM *) params; +int LogRL_dev1(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) { + VC_PARAM *p = (VC_PARAM *)params; - size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1; + size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1; double tr, d; // Update parameters. - UpdateParam (log_sigma2, p); + UpdateParam(log_sigma2, p); // Calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy. - for (size_t i=0; i<n_vc+1; i++) { - if (i==n_vc) { - tr=0; - for (size_t l=0; l<n1; l++) { - tr+=gsl_matrix_get (p->P, l, l); + for (size_t i = 0; i < n_vc + 1; i++) { + if (i == n_vc) { + tr = 0; + for (size_t l = 0; l < n1; l++) { + tr += gsl_matrix_get(p->P, l, l); } } else { - tr=0; - for (size_t l=0; l<n1; l++) { - gsl_vector_view P_row=gsl_matrix_row (p->P, l); - gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l); - gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); - tr+=d; + tr = 0; + for (size_t l = 0; l < n1; l++) { + gsl_vector_view P_row = gsl_matrix_row(p->P, l); + gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l); + gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); + tr += d; } } - gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i); + gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i); gsl_blas_ddot(p->Py, &KPy_i.vector, &d); if (p->noconstrain) { - d=(-0.5*tr+0.5*d); + d = (-0.5 * tr + 0.5 * d); } else { - d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i)); + d = (-0.5 * tr + 0.5 * d) * exp(gsl_vector_get(log_sigma2, i)); } gsl_vector_set(dev1, i, d); @@ -299,324 +299,354 @@ int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) { return GSL_SUCCESS; } -int LogRL_dev2 (const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) { - VC_PARAM *p=(VC_PARAM *) params; +int LogRL_dev2(const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) { + VC_PARAM *p = (VC_PARAM *)params; - size_t n_vc=log_sigma2->size-1; + size_t n_vc = log_sigma2->size - 1; double d, sigma2_i, sigma2_j; // Update parameters. - UpdateParam (log_sigma2, p); + UpdateParam(log_sigma2, p); // Calculate dev2 = 0.5(yPKPKPy). - for (size_t i=0; i<n_vc+1; i++) { - gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i); + for (size_t i = 0; i < n_vc + 1; i++) { + gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i); if (p->noconstrain) { - sigma2_i=gsl_vector_get(log_sigma2, i); + sigma2_i = gsl_vector_get(log_sigma2, i); } else { - sigma2_i=exp(gsl_vector_get(log_sigma2, i)); + sigma2_i = exp(gsl_vector_get(log_sigma2, i)); } - for (size_t j=i; j<n_vc+1; j++) { - gsl_vector_view PKPy_j=gsl_matrix_column (p->PKPy_mat, j); + for (size_t j = i; j < n_vc + 1; j++) { + gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j); gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d); if (p->noconstrain) { - sigma2_j=gsl_vector_get(log_sigma2, j); - d*=-0.5; + sigma2_j = gsl_vector_get(log_sigma2, j); + d *= -0.5; } else { - sigma2_j=exp(gsl_vector_get(log_sigma2, j)); - d*=-0.5*sigma2_i*sigma2_j; + sigma2_j = exp(gsl_vector_get(log_sigma2, j)); + d *= -0.5 * sigma2_i * sigma2_j; } gsl_matrix_set(dev2, i, j, d); - if (j!=i) {gsl_matrix_set(dev2, j, i, d);} + if (j != i) { + gsl_matrix_set(dev2, j, i, d); + } } } - gsl_matrix_memcpy (p->Hessian, dev2); + gsl_matrix_memcpy(p->Hessian, dev2); return GSL_SUCCESS; } -int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, - gsl_vector *dev1, gsl_matrix *dev2) { - VC_PARAM *p=(VC_PARAM *) params; +int LogRL_dev12(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, + gsl_matrix *dev2) { + VC_PARAM *p = (VC_PARAM *)params; - size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1; + size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1; double tr, d, sigma2_i, sigma2_j; // Update parameters. - UpdateParam (log_sigma2, p); + UpdateParam(log_sigma2, p); - for (size_t i=0; i<n_vc+1; i++) { - if (i==n_vc) { - tr=0; - for (size_t l=0; l<n1; l++) { - tr+=gsl_matrix_get (p->P, l, l); + for (size_t i = 0; i < n_vc + 1; i++) { + if (i == n_vc) { + tr = 0; + for (size_t l = 0; l < n1; l++) { + tr += gsl_matrix_get(p->P, l, l); } } else { - tr=0; - for (size_t l=0; l<n1; l++) { - gsl_vector_view P_row=gsl_matrix_row (p->P, l); - gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l); - gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); - tr+=d; + tr = 0; + for (size_t l = 0; l < n1; l++) { + gsl_vector_view P_row = gsl_matrix_row(p->P, l); + gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l); + gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); + tr += d; } } - gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i); + gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i); gsl_blas_ddot(p->Py, &KPy_i.vector, &d); if (p->noconstrain) { - sigma2_i=gsl_vector_get(log_sigma2, i); - d=(-0.5*tr+0.5*d); + sigma2_i = gsl_vector_get(log_sigma2, i); + d = (-0.5 * tr + 0.5 * d); } else { - sigma2_i=exp(gsl_vector_get(log_sigma2, i)); - d=(-0.5*tr+0.5*d)*sigma2_i; + sigma2_i = exp(gsl_vector_get(log_sigma2, i)); + d = (-0.5 * tr + 0.5 * d) * sigma2_i; } gsl_vector_set(dev1, i, d); - for (size_t j=i; j<n_vc+1; j++) { - gsl_vector_view PKPy_j=gsl_matrix_column (p->PKPy_mat, j); + for (size_t j = i; j < n_vc + 1; j++) { + gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j); gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d); if (p->noconstrain) { - sigma2_j=gsl_vector_get(log_sigma2, j); - d*=-0.5; + sigma2_j = gsl_vector_get(log_sigma2, j); + d *= -0.5; } else { - sigma2_j=exp(gsl_vector_get(log_sigma2, j)); - d*=-0.5*sigma2_i*sigma2_j; + sigma2_j = exp(gsl_vector_get(log_sigma2, j)); + d *= -0.5 * sigma2_i * sigma2_j; } gsl_matrix_set(dev2, i, j, d); - if (j!=i) {gsl_matrix_set(dev2, j, i, d);} + if (j != i) { + gsl_matrix_set(dev2, j, i, d); + } } - } - gsl_matrix_memcpy (p->Hessian, dev2); + gsl_matrix_memcpy(p->Hessian, dev2); return GSL_SUCCESS; } // Read header to determine which column contains which item. -bool ReadHeader_vc (const string &line, HEADER &header) { - string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID", - "rsid","RSID"}; - set<string> rs_set(rs_ptr, rs_ptr+10); - string chr_ptr[]={"chr","CHR"}; - set<string> chr_set(chr_ptr, chr_ptr+2); - string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", - "bp", "BP"}; - set<string> pos_set(pos_ptr, pos_ptr+8); - string cm_ptr[]={"cm","CM"}; - set<string> cm_set(cm_ptr, cm_ptr+2); - string a1_ptr[]={"a1","A1","allele1","ALLELE1"}; - set<string> a1_set(a1_ptr, a1_ptr+4); - string a0_ptr[]={"a0","A0","allele0","ALLELE0"}; - set<string> a0_set(a0_ptr, a0_ptr+4); - - string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"}; - set<string> z_set(z_ptr, z_ptr+6); - string beta_ptr[]={"beta","BETA","b","B"}; - set<string> beta_set(beta_ptr, beta_ptr+4); - string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"}; - set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4); - string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"}; - set<string> chisq_set(chisq_ptr, chisq_ptr+4); - string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"}; - set<string> p_set(p_ptr, p_ptr+6); - - string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"}; - set<string> n_set(n_ptr, n_ptr+6); - string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"}; - set<string> nmis_set(nmis_ptr, nmis_ptr+6); - string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"}; - set<string> nobs_set(nobs_ptr, nobs_ptr+4); - - string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq", - "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"}; - set<string> af_set(af_ptr, af_ptr+10); - string var_ptr[]={"var","VAR"}; - set<string> var_set(var_ptr, var_ptr+2); - - string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"}; - set<string> ws_set(ws_ptr, ws_ptr+4); - string cor_ptr[]={"cor","COR","r","R"}; - set<string> cor_set(cor_ptr, cor_ptr+4); - - header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0; - header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; - header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; - header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; - header.cor_col=0; header.coln=0; +bool ReadHeader_vc(const string &line, HEADER &header) { + string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps", + "SNPS", "snpid", "SNPID", "rsid", "RSID"}; + set<string> rs_set(rs_ptr, rs_ptr + 10); + string chr_ptr[] = {"chr", "CHR"}; + set<string> chr_set(chr_ptr, chr_ptr + 2); + string pos_ptr[] = { + "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"}; + set<string> pos_set(pos_ptr, pos_ptr + 8); + string cm_ptr[] = {"cm", "CM"}; + set<string> cm_set(cm_ptr, cm_ptr + 2); + string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1"}; + set<string> a1_set(a1_ptr, a1_ptr + 4); + string a0_ptr[] = {"a0", "A0", "allele0", "ALLELE0"}; + set<string> a0_set(a0_ptr, a0_ptr + 4); + + string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"}; + set<string> z_set(z_ptr, z_ptr + 6); + string beta_ptr[] = {"beta", "BETA", "b", "B"}; + set<string> beta_set(beta_ptr, beta_ptr + 4); + string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"}; + set<string> sebeta_set(sebeta_ptr, sebeta_ptr + 4); + string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"}; + set<string> chisq_set(chisq_ptr, chisq_ptr + 4); + string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"}; + set<string> p_set(p_ptr, p_ptr + 6); + + string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"}; + set<string> n_set(n_ptr, n_ptr + 6); + string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"}; + set<string> nmis_set(nmis_ptr, nmis_ptr + 6); + string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"}; + set<string> nobs_set(nobs_ptr, nobs_ptr + 4); + + string af_ptr[] = {"af", + "AF", + "maf", + "MAF", + "f", + "F", + "allele_freq", + "ALLELE_FREQ", + "allele_frequency", + "ALLELE_FREQUENCY"}; + set<string> af_set(af_ptr, af_ptr + 10); + string var_ptr[] = {"var", "VAR"}; + set<string> var_set(var_ptr, var_ptr + 2); + + string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"}; + set<string> ws_set(ws_ptr, ws_ptr + 4); + string cor_ptr[] = {"cor", "COR", "r", "R"}; + set<string> cor_set(cor_ptr, cor_ptr + 4); + + header.rs_col = 0; + header.chr_col = 0; + header.pos_col = 0; + header.a1_col = 0; + header.a0_col = 0; + header.z_col = 0; + header.beta_col = 0; + header.sebeta_col = 0; + header.chisq_col = 0; + header.p_col = 0; + header.n_col = 0; + header.nmis_col = 0; + header.nobs_col = 0; + header.af_col = 0; + header.var_col = 0; + header.ws_col = 0; + header.cor_col = 0; + header.coln = 0; char *ch_ptr; string type; - size_t n_error=0; - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - while (ch_ptr!=NULL) { - type=ch_ptr; - if (rs_set.count(type)!=0) { - if (header.rs_col==0) { - header.rs_col=header.coln+1; + size_t n_error = 0; + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + while (ch_ptr != NULL) { + type = ch_ptr; + if (rs_set.count(type) != 0) { + if (header.rs_col == 0) { + header.rs_col = header.coln + 1; } else { - cout<<"error! more than two rs columns in the file."<<endl; - n_error++; + cout << "error! more than two rs columns in the file." << endl; + n_error++; } - } else if (chr_set.count(type)!=0) { - if (header.chr_col==0) { - header.chr_col=header.coln+1; + } else if (chr_set.count(type) != 0) { + if (header.chr_col == 0) { + header.chr_col = header.coln + 1; } else { - cout<<"error! more than two chr columns in the file."<<endl; - n_error++; + cout << "error! more than two chr columns in the file." << endl; + n_error++; } - } else if (pos_set.count(type)!=0) { - if (header.pos_col==0) { - header.pos_col=header.coln+1; + } else if (pos_set.count(type) != 0) { + if (header.pos_col == 0) { + header.pos_col = header.coln + 1; } else { - cout<<"error! more than two pos columns in the file."<<endl; - n_error++; + cout << "error! more than two pos columns in the file." << endl; + n_error++; } - } else if (cm_set.count(type)!=0) { - if (header.cm_col==0) { - header.cm_col=header.coln+1; + } else if (cm_set.count(type) != 0) { + if (header.cm_col == 0) { + header.cm_col = header.coln + 1; } else { - cout<<"error! more than two cm columns in the file."<<endl; - n_error++; + cout << "error! more than two cm columns in the file." << endl; + n_error++; } - } else if (a1_set.count(type)!=0) { - if (header.a1_col==0) { - header.a1_col=header.coln+1; + } else if (a1_set.count(type) != 0) { + if (header.a1_col == 0) { + header.a1_col = header.coln + 1; } else { - cout<<"error! more than two allele1 columns in the file."<<endl; - n_error++; + cout << "error! more than two allele1 columns in the file." << endl; + n_error++; } - } else if (a0_set.count(type)!=0) { - if (header.a0_col==0) { - header.a0_col=header.coln+1; + } else if (a0_set.count(type) != 0) { + if (header.a0_col == 0) { + header.a0_col = header.coln + 1; } else { - cout<<"error! more than two allele0 columns in the file."<<endl; - n_error++; + cout << "error! more than two allele0 columns in the file." << endl; + n_error++; } - } else if (z_set.count(type)!=0) { - if (header.z_col==0) { - header.z_col=header.coln+1; + } else if (z_set.count(type) != 0) { + if (header.z_col == 0) { + header.z_col = header.coln + 1; } else { - cout<<"error! more than two z columns in the file."<<endl; - n_error++; + cout << "error! more than two z columns in the file." << endl; + n_error++; } - } else if (beta_set.count(type)!=0) { - if (header.beta_col==0) { - header.beta_col=header.coln+1; + } else if (beta_set.count(type) != 0) { + if (header.beta_col == 0) { + header.beta_col = header.coln + 1; } else { - cout<<"error! more than two beta columns in the file."<<endl; - n_error++; + cout << "error! more than two beta columns in the file." << endl; + n_error++; } - } else if (sebeta_set.count(type)!=0) { - if (header.sebeta_col==0) { - header.sebeta_col=header.coln+1; + } else if (sebeta_set.count(type) != 0) { + if (header.sebeta_col == 0) { + header.sebeta_col = header.coln + 1; } else { - cout<<"error! more than two se_beta columns in the file."<<endl; - n_error++; + cout << "error! more than two se_beta columns in the file." << endl; + n_error++; } - } else if (chisq_set.count(type)!=0) { - if (header.chisq_col==0) { - header.chisq_col=header.coln+1; + } else if (chisq_set.count(type) != 0) { + if (header.chisq_col == 0) { + header.chisq_col = header.coln + 1; } else { - cout<<"error! more than two z columns in the file."<<endl; - n_error++; + cout << "error! more than two z columns in the file." << endl; + n_error++; } - } else if (p_set.count(type)!=0) { - if (header.p_col==0) { - header.p_col=header.coln+1; + } else if (p_set.count(type) != 0) { + if (header.p_col == 0) { + header.p_col = header.coln + 1; } else { - cout<<"error! more than two p columns in the file."<<endl; - n_error++; + cout << "error! more than two p columns in the file." << endl; + n_error++; } - } else if (n_set.count(type)!=0) { - if (header.n_col==0) { - header.n_col=header.coln+1; + } else if (n_set.count(type) != 0) { + if (header.n_col == 0) { + header.n_col = header.coln + 1; } else { - cout<<"error! more than two n_total columns in the file."<<endl; - n_error++; + cout << "error! more than two n_total columns in the file." << endl; + n_error++; } - } else if (nmis_set.count(type)!=0) { - if (header.nmis_col==0) { - header.nmis_col=header.coln+1; + } else if (nmis_set.count(type) != 0) { + if (header.nmis_col == 0) { + header.nmis_col = header.coln + 1; } else { - cout<<"error! more than two n_mis columns in the file."<<endl; - n_error++; + cout << "error! more than two n_mis columns in the file." << endl; + n_error++; } - } else if (nobs_set.count(type)!=0) { - if (header.nobs_col==0) { - header.nobs_col=header.coln+1; + } else if (nobs_set.count(type) != 0) { + if (header.nobs_col == 0) { + header.nobs_col = header.coln + 1; } else { - cout<<"error! more than two n_obs columns in the file."<<endl; - n_error++; + cout << "error! more than two n_obs columns in the file." << endl; + n_error++; } - } else if (ws_set.count(type)!=0) { - if (header.ws_col==0) { - header.ws_col=header.coln+1; + } else if (ws_set.count(type) != 0) { + if (header.ws_col == 0) { + header.ws_col = header.coln + 1; } else { - cout<<"error! more than two window_size columns in the file."<<endl; - n_error++; + cout << "error! more than two window_size columns in the file." << endl; + n_error++; } - } else if (af_set.count(type)!=0) { - if (header.af_col==0) { - header.af_col=header.coln+1; + } else if (af_set.count(type) != 0) { + if (header.af_col == 0) { + header.af_col = header.coln + 1; } else { - cout<<"error! more than two af columns in the file."<<endl; - n_error++; + cout << "error! more than two af columns in the file." << endl; + n_error++; } - } else if (cor_set.count(type)!=0) { - if (header.cor_col==0) { - header.cor_col=header.coln+1; + } else if (cor_set.count(type) != 0) { + if (header.cor_col == 0) { + header.cor_col = header.coln + 1; } else { - cout<<"error! more than two cor columns in the file."<<endl; - n_error++; + cout << "error! more than two cor columns in the file." << endl; + n_error++; } - } else {} + } else { + } - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); header.coln++; } - if (header.cor_col!=0 && header.cor_col!=header.coln) { - cout<<"error! the cor column should be the last column."<<endl; + if (header.cor_col != 0 && header.cor_col != header.coln) { + cout << "error! the cor column should be the last column." << endl; n_error++; } - if (header.rs_col==0) { - if (header.chr_col!=0 && header.pos_col!=0) { - cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl; + if (header.rs_col == 0) { + if (header.chr_col != 0 && header.pos_col != 0) { + cout << "missing an rs column. rs id will be replaced by chr:pos" << endl; } else { - cout<<"error! missing an rs column."<<endl; n_error++; + cout << "error! missing an rs column." << endl; + n_error++; } } - if (n_error==0) {return true;} else {return false;} + if (n_error == 0) { + return true; + } else { + return false; + } } // Read cov file the first time, record mapRS2in, mapRS2var (in case // var is not provided in the z file), store vec_n and vec_rs. -void ReadFile_cor (const string &file_cor, const set<string> &setSnps, - vector<string> &vec_rs, vector<size_t> &vec_n, - vector<double> &vec_cm, vector<double> &vec_bp, - map<string, size_t> &mapRS2in, map<string, - double> &mapRS2var) { +void ReadFile_cor(const string &file_cor, const set<string> &setSnps, + vector<string> &vec_rs, vector<size_t> &vec_n, + vector<double> &vec_cm, vector<double> &vec_bp, + map<string, size_t> &mapRS2in, + map<string, double> &mapRS2var) { vec_rs.clear(); vec_n.clear(); mapRS2in.clear(); mapRS2var.clear(); - igzstream infile (file_cor.c_str(), igzstream::in); + igzstream infile(file_cor.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open cov file: "<<file_cor<<endl; + cout << "error! fail to open cov file: " << file_cor << endl; return; } @@ -624,88 +654,124 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps, char *ch_ptr; string rs, chr, a1, a0, pos, cm; - double af=0, var_x=0, d_pos, d_cm; - size_t n_total=0, n_mis=0, n_obs=0, ni_total=0; - size_t ns_test=0, ns_total=0; + double af = 0, var_x = 0, d_pos, d_cm; + size_t n_total = 0, n_mis = 0, n_obs = 0, ni_total = 0; + size_t ns_test = 0, ns_total = 0; HEADER header; // Header. !safeGetline(infile, line).eof(); - ReadHeader_vc (line, header); + ReadHeader_vc(line, header); - if (header.n_col==0 ) { - if (header.nobs_col==0 && header.nmis_col==0) { - cout<<"error! missing sample size in the cor file."<<endl; + if (header.n_col == 0) { + if (header.nobs_col == 0 && header.nmis_col == 0) { + cout << "error! missing sample size in the cor file." << endl; } else { - cout<<"total sample size will be replaced by obs/mis sample size."<<endl; + cout << "total sample size will be replaced by obs/mis sample size." + << endl; } } while (!safeGetline(infile, line).eof()) { - //do not read cor values this time; upto col_n-1. - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - - n_total=0; n_mis=0; n_obs=0; af=0; var_x=0; d_cm=0; d_pos=0; - for (size_t i=0; i<header.coln-1; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;} - if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;} - if (header.pos_col!=0 && header.pos_col==i+1) { - pos=ch_ptr; d_pos=atof(ch_ptr); + // do not read cor values this time; upto col_n-1. + ch_ptr = strtok((char *)line.c_str(), " , \t"); + + n_total = 0; + n_mis = 0; + n_obs = 0; + af = 0; + var_x = 0; + d_cm = 0; + d_pos = 0; + for (size_t i = 0; i < header.coln - 1; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } + if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } + if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + d_pos = atof(ch_ptr); } - if (header.cm_col!=0 && header.cm_col==i+1) { - cm=ch_ptr; d_cm=atof(ch_ptr); + if (header.cm_col != 0 && header.cm_col == i + 1) { + cm = ch_ptr; + d_cm = atof(ch_ptr); + } + if (header.a1_col != 0 && header.a1_col == i + 1) { + a1 = ch_ptr; + } + if (header.a0_col != 0 && header.a0_col == i + 1) { + a0 = ch_ptr; } - if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;} - if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;} - if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);} - if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);} - if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);} + if (header.n_col != 0 && header.n_col == i + 1) { + n_total = atoi(ch_ptr); + } + if (header.nmis_col != 0 && header.nmis_col == i + 1) { + n_mis = atoi(ch_ptr); + } + if (header.nobs_col != 0 && header.nobs_col == i + 1) { + n_obs = atoi(ch_ptr); + } - if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);} - if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);} + if (header.af_col != 0 && header.af_col == i + 1) { + af = atof(ch_ptr); + } + if (header.var_col != 0 && header.var_col == i + 1) { + var_x = atof(ch_ptr); + } - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } - if (header.rs_col==0) { - rs=chr+":"+pos; + if (header.rs_col == 0) { + rs = chr + ":" + pos; } - if (header.n_col==0) { - n_total=n_mis+n_obs; + if (header.n_col == 0) { + n_total = n_mis + n_obs; } // Record rs, n. vec_rs.push_back(rs); vec_n.push_back(n_total); - if (d_cm>0) {vec_cm.push_back(d_cm);} else {vec_cm.push_back(d_cm);} - if (d_pos>0) {vec_bp.push_back(d_pos);} else {vec_bp.push_back(d_pos);} + if (d_cm > 0) { + vec_cm.push_back(d_cm); + } else { + vec_cm.push_back(d_cm); + } + if (d_pos > 0) { + vec_bp.push_back(d_pos); + } else { + vec_bp.push_back(d_pos); + } // Record mapRS2in and mapRS2var. - if (setSnps.size()==0 || setSnps.count(rs)!=0) { - if (mapRS2in.count(rs)==0) { - mapRS2in[rs]=1; + if (setSnps.size() == 0 || setSnps.count(rs) != 0) { + if (mapRS2in.count(rs) == 0) { + mapRS2in[rs] = 1; - if (header.var_col!=0) { - mapRS2var[rs]=var_x; - } else if (header.af_col!=0) { - var_x=2.0*af*(1.0-af); - mapRS2var[rs]=var_x; - } else {} + if (header.var_col != 0) { + mapRS2var[rs] = var_x; + } else if (header.af_col != 0) { + var_x = 2.0 * af * (1.0 - af); + mapRS2var[rs] = var_x; + } else { + } - ns_test++; + ns_test++; } else { - cout<<"error! more than one snp has the same id "<<rs<< - " in cor file?"<<endl; + cout << "error! more than one snp has the same id " << rs + << " in cor file?" << endl; } } // Record max pos. - ni_total=max(ni_total, n_total); + ni_total = max(ni_total, n_total); ns_total++; } @@ -717,19 +783,18 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps, // Read beta file, store mapRS2var if var is provided here, calculate // q and var_y. -void ReadFile_beta (const bool flag_priorscale, const string &file_beta, - const map<string, size_t> &mapRS2cat, - map<string, size_t> &mapRS2in, - map<string, double> &mapRS2var, - map<string, size_t> &mapRS2nsamp, - gsl_vector *q_vec, gsl_vector *qvar_vec, - gsl_vector *s_vec, size_t &ni_total, - size_t &ns_total) { +void ReadFile_beta(const bool flag_priorscale, const string &file_beta, + const map<string, size_t> &mapRS2cat, + map<string, size_t> &mapRS2in, + map<string, double> &mapRS2var, + map<string, size_t> &mapRS2nsamp, gsl_vector *q_vec, + gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total, + size_t &ns_total) { mapRS2nsamp.clear(); - igzstream infile (file_beta.c_str(), igzstream::in); + igzstream infile(file_beta.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open beta file: "<<file_beta<<endl; + cout << "error! fail to open beta file: " << file_beta << endl; return; } @@ -738,13 +803,15 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta, string type; string rs, chr, a1, a0, pos, cm; - double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0; - size_t n_total=0, n_mis=0, n_obs=0; - size_t ns_test=0; - ns_total=0; ni_total=0; + double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0, + af = 0, var_x = 0; + size_t n_total = 0, n_mis = 0, n_obs = 0; + size_t ns_test = 0; + ns_total = 0; + ni_total = 0; vector<double> vec_q, vec_qvar, vec_s; - for (size_t i=0; i<q_vec->size; i++) { + for (size_t i = 0; i < q_vec->size; i++) { vec_q.push_back(0.0); vec_qvar.push_back(0.0); vec_s.push_back(0.0); @@ -753,122 +820,166 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta, // Read header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_vc (line, header); + ReadHeader_vc(line, header); - if (header.n_col==0 ) { - if (header.nobs_col==0 && header.nmis_col==0) { - cout<<"error! missing sample size in the beta file."<<endl; + if (header.n_col == 0) { + if (header.nobs_col == 0 && header.nmis_col == 0) { + cout << "error! missing sample size in the beta file." << endl; } else { - cout<<"total sample size will be replaced by obs/mis sample size."<<endl; + cout << "total sample size will be replaced by obs/mis sample size." + << endl; } } - if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) && - header.chisq_col==0 && header.p_col==0) { - cout<<"error! missing z scores in the beta file."<<endl; + if (header.z_col == 0 && (header.beta_col == 0 || header.sebeta_col == 0) && + header.chisq_col == 0 && header.p_col == 0) { + cout << "error! missing z scores in the beta file." << endl; } - if (header.af_col==0 && header.var_col==0 && mapRS2var.size()==0) { - cout<<"error! missing allele frequency in the beta file."<<endl; + if (header.af_col == 0 && header.var_col == 0 && mapRS2var.size() == 0) { + cout << "error! missing allele frequency in the beta file." << endl; } while (!safeGetline(infile, line).eof()) { - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - - z=0; beta=0; se_beta=0; chisq=0; pvalue=0; - n_total=0; n_mis=0; n_obs=0; af=0; var_x=0; - for (size_t i=0; i<header.coln; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;} - if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;} - if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;} - if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;} - if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;} - if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;} + ch_ptr = strtok((char *)line.c_str(), " , \t"); + + z = 0; + beta = 0; + se_beta = 0; + chisq = 0; + pvalue = 0; + n_total = 0; + n_mis = 0; + n_obs = 0; + af = 0; + var_x = 0; + for (size_t i = 0; i < header.coln; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } + if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } + if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + } + if (header.cm_col != 0 && header.cm_col == i + 1) { + cm = ch_ptr; + } + if (header.a1_col != 0 && header.a1_col == i + 1) { + a1 = ch_ptr; + } + if (header.a0_col != 0 && header.a0_col == i + 1) { + a0 = ch_ptr; + } - if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);} - if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);} - if (header.sebeta_col!=0 && header.sebeta_col==i+1) { - se_beta=atof(ch_ptr); + if (header.z_col != 0 && header.z_col == i + 1) { + z = atof(ch_ptr); + } + if (header.beta_col != 0 && header.beta_col == i + 1) { + beta = atof(ch_ptr); + } + if (header.sebeta_col != 0 && header.sebeta_col == i + 1) { + se_beta = atof(ch_ptr); + } + if (header.chisq_col != 0 && header.chisq_col == i + 1) { + chisq = atof(ch_ptr); + } + if (header.p_col != 0 && header.p_col == i + 1) { + pvalue = atof(ch_ptr); } - if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);} - if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);} - if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);} - if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);} - if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);} + if (header.n_col != 0 && header.n_col == i + 1) { + n_total = atoi(ch_ptr); + } + if (header.nmis_col != 0 && header.nmis_col == i + 1) { + n_mis = atoi(ch_ptr); + } + if (header.nobs_col != 0 && header.nobs_col == i + 1) { + n_obs = atoi(ch_ptr); + } - if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);} - if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);} + if (header.af_col != 0 && header.af_col == i + 1) { + af = atof(ch_ptr); + } + if (header.var_col != 0 && header.var_col == i + 1) { + var_x = atof(ch_ptr); + } - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } - if (header.rs_col==0) { - rs=chr+":"+pos; + if (header.rs_col == 0) { + rs = chr + ":" + pos; } - if (header.n_col==0) { - n_total=n_mis+n_obs; + if (header.n_col == 0) { + n_total = n_mis + n_obs; } // Both z values and beta/se_beta have directions, while // chisq/pvalue do not. - if (header.z_col!=0) { - zsquare=z*z; - } else if (header.beta_col!=0 && header.sebeta_col!=0) { - z=beta/se_beta; - zsquare=z*z; - } else if (header.chisq_col!=0) { - zsquare=chisq; - } else if (header.p_col!=0) { - zsquare=gsl_cdf_chisq_Qinv (pvalue, 1); - } else {zsquare=0;} + if (header.z_col != 0) { + zsquare = z * z; + } else if (header.beta_col != 0 && header.sebeta_col != 0) { + z = beta / se_beta; + zsquare = z * z; + } else if (header.chisq_col != 0) { + zsquare = chisq; + } else if (header.p_col != 0) { + zsquare = gsl_cdf_chisq_Qinv(pvalue, 1); + } else { + zsquare = 0; + } // If the snp is also present in cor file, then do calculations. - if ((header.var_col!=0 || header.af_col!=0 || mapRS2var.count(rs)!=0) && - mapRS2in.count(rs)!=0 && - (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) { - if (mapRS2in.at(rs)>1) { - cout<<"error! more than one snp has the same id "<<rs<< - " in beta file?"<<endl; - break; + if ((header.var_col != 0 || header.af_col != 0 || + mapRS2var.count(rs) != 0) && + mapRS2in.count(rs) != 0 && + (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) { + if (mapRS2in.at(rs) > 1) { + cout << "error! more than one snp has the same id " << rs + << " in beta file?" << endl; + break; } - if (header.var_col==0) { - if (header.af_col!=0) { - var_x=2.0*af*(1.0-af); - } else { - var_x=mapRS2var.at(rs); - } + if (header.var_col == 0) { + if (header.af_col != 0) { + var_x = 2.0 * af * (1.0 - af); + } else { + var_x = mapRS2var.at(rs); + } } - if (flag_priorscale) {var_x=1;} + if (flag_priorscale) { + var_x = 1; + } mapRS2in[rs]++; - mapRS2var[rs]=var_x; - mapRS2nsamp[rs]=n_total; - - if (mapRS2cat.size()!=0) { - vec_q[mapRS2cat.at(rs) ]+=(zsquare-1.0)*var_x/(double)n_total; - vec_s[mapRS2cat.at(rs) ]+=var_x; - vec_qvar[mapRS2cat.at(rs) ]+= - var_x*var_x/((double)n_total*(double)n_total); + mapRS2var[rs] = var_x; + mapRS2nsamp[rs] = n_total; + + if (mapRS2cat.size() != 0) { + vec_q[mapRS2cat.at(rs)] += (zsquare - 1.0) * var_x / (double)n_total; + vec_s[mapRS2cat.at(rs)] += var_x; + vec_qvar[mapRS2cat.at(rs)] += + var_x * var_x / ((double)n_total * (double)n_total); } else { - vec_q[0]+=(zsquare-1.0)*var_x/(double)n_total; - vec_s[0]+=var_x; - vec_qvar[0]+=var_x*var_x/((double)n_total*(double)n_total); + vec_q[0] += (zsquare - 1.0) * var_x / (double)n_total; + vec_s[0] += var_x; + vec_qvar[0] += var_x * var_x / ((double)n_total * (double)n_total); } - ni_total=max(ni_total, n_total); + ni_total = max(ni_total, n_total); ns_test++; } ns_total++; } - for (size_t i=0; i<q_vec->size; i++) { + for (size_t i = 0; i < q_vec->size; i++) { gsl_vector_set(q_vec, i, vec_q[i]); - gsl_vector_set(qvar_vec, i, 2.0*vec_qvar[i]); + gsl_vector_set(qvar_vec, i, 2.0 * vec_qvar[i]); gsl_vector_set(s_vec, i, vec_s[i]); } @@ -882,21 +993,20 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta, // Look for rs, n_mis+n_obs, var, window_size, cov. // If window_cm/bp/ns is provided, then use these max values to // calibrate estimates. -void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, - const vector<size_t> &vec_n, const vector<double> &vec_cm, - const vector<double> &vec_bp, - const map<string, size_t> &mapRS2cat, - const map<string, size_t> &mapRS2in, - const map<string, double> &mapRS2var, - const map<string, size_t> &mapRS2nsamp, - const size_t crt, const double &window_cm, - const double &window_bp, const double &window_ns, - gsl_matrix *S_mat, gsl_matrix *Svar_mat, - gsl_vector *qvar_vec, size_t &ni_total, - size_t &ns_total, size_t &ns_test, size_t &ns_pair) { - igzstream infile (file_cor.c_str(), igzstream::in); +void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs, + const vector<size_t> &vec_n, const vector<double> &vec_cm, + const vector<double> &vec_bp, + const map<string, size_t> &mapRS2cat, + const map<string, size_t> &mapRS2in, + const map<string, double> &mapRS2var, + const map<string, size_t> &mapRS2nsamp, const size_t crt, + const double &window_cm, const double &window_bp, + const double &window_ns, gsl_matrix *S_mat, + gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total, + size_t &ns_total, size_t &ns_test, size_t &ns_pair) { + igzstream infile(file_cor.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open cov file: "<<file_cor<<endl; + cout << "error! fail to open cov file: " << file_cor << endl; return; } @@ -905,172 +1015,192 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, string rs1, rs2; double d1, d2, d3, cor, var1, var2; - size_t n_nb, nsamp1, nsamp2, n12, bin_size=10, bin; + size_t n_nb, nsamp1, nsamp2, n12, bin_size = 10, bin; - vector<vector<double> > mat_S, mat_Svar, mat_tmp; + vector<vector<double>> mat_S, mat_Svar, mat_tmp; vector<double> vec_qvar, vec_tmp; - vector<vector<vector<double> > > mat3d_Sbin; + vector<vector<vector<double>>> mat3d_Sbin; - for (size_t i=0; i<S_mat->size1; i++) { + for (size_t i = 0; i < S_mat->size1; i++) { vec_qvar.push_back(0.0); } - for (size_t i=0; i<S_mat->size1; i++) { + for (size_t i = 0; i < S_mat->size1; i++) { mat_S.push_back(vec_qvar); mat_Svar.push_back(vec_qvar); } - for (size_t k=0; k<bin_size; k++) { + for (size_t k = 0; k < bin_size; k++) { vec_tmp.push_back(0.0); } - for (size_t i=0; i<S_mat->size1; i++) { + for (size_t i = 0; i < S_mat->size1; i++) { mat_tmp.push_back(vec_tmp); } - for (size_t i=0; i<S_mat->size1; i++) { + for (size_t i = 0; i < S_mat->size1; i++) { mat3d_Sbin.push_back(mat_tmp); } string rs, chr, a1, a0, type, pos, cm; - size_t n_total=0, n_mis=0, n_obs=0; + size_t n_total = 0, n_mis = 0, n_obs = 0; double d_pos1, d_pos2, d_pos, d_cm1, d_cm2, d_cm; - ns_test=0; ns_total=0; ns_pair=0; ni_total=0; + ns_test = 0; + ns_total = 0; + ns_pair = 0; + ni_total = 0; // Header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_vc (line, header); + ReadHeader_vc(line, header); while (!safeGetline(infile, line).eof()) { // Do not read cor values this time; upto col_n-1. - d_pos1=0; d_cm1=0; - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - for (size_t i=0; i<header.coln-1; i++) { - if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;} - if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;} - if (header.pos_col!=0 && header.pos_col==i+1) { - pos=ch_ptr; - d_pos1=atof(ch_ptr); + d_pos1 = 0; + d_cm1 = 0; + ch_ptr = strtok((char *)line.c_str(), " , \t"); + for (size_t i = 0; i < header.coln - 1; i++) { + if (header.rs_col != 0 && header.rs_col == i + 1) { + rs = ch_ptr; + } + if (header.chr_col != 0 && header.chr_col == i + 1) { + chr = ch_ptr; + } + if (header.pos_col != 0 && header.pos_col == i + 1) { + pos = ch_ptr; + d_pos1 = atof(ch_ptr); + } + if (header.cm_col != 0 && header.cm_col == i + 1) { + cm = ch_ptr; + d_cm1 = atof(ch_ptr); } - if (header.cm_col!=0 && header.cm_col==i+1) { - cm=ch_ptr; - d_cm1=atof(ch_ptr); + if (header.a1_col != 0 && header.a1_col == i + 1) { + a1 = ch_ptr; + } + if (header.a0_col != 0 && header.a0_col == i + 1) { + a0 = ch_ptr; } - if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;} - if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;} - if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);} - if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);} - if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);} + if (header.n_col != 0 && header.n_col == i + 1) { + n_total = atoi(ch_ptr); + } + if (header.nmis_col != 0 && header.nmis_col == i + 1) { + n_mis = atoi(ch_ptr); + } + if (header.nobs_col != 0 && header.nobs_col == i + 1) { + n_obs = atoi(ch_ptr); + } - ch_ptr=strtok (NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); } - if (header.rs_col==0) { - rs=chr+":"+pos; + if (header.rs_col == 0) { + rs = chr + ":" + pos; } - if (header.n_col==0) { - n_total=n_mis+n_obs; + if (header.n_col == 0) { + n_total = n_mis + n_obs; } - rs1=rs; - - if ( (mapRS2cat.size()==0 || mapRS2cat.count(rs1)!=0) && - mapRS2in.count(rs1)!=0 && mapRS2in.at(rs1)==2) { - var1=mapRS2var.at(rs1); - nsamp1=mapRS2nsamp.at(rs1); - d2=var1*var1; - - if (mapRS2cat.size()!=0) { - mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ]+= - (1-1.0/(double)vec_n[ns_total])*d2; - mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ]+= - d2*d2/((double)vec_n[ns_total]*(double)vec_n[ns_total]); - if (crt==1) { - mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ][0]+= - (1-1.0/(double)vec_n[ns_total])*d2; - } + rs1 = rs; + + if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs1) != 0) && + mapRS2in.count(rs1) != 0 && mapRS2in.at(rs1) == 2) { + var1 = mapRS2var.at(rs1); + nsamp1 = mapRS2nsamp.at(rs1); + d2 = var1 * var1; + + if (mapRS2cat.size() != 0) { + mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] += + (1 - 1.0 / (double)vec_n[ns_total]) * d2; + mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)] += + d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]); + if (crt == 1) { + mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs1)][0] += + (1 - 1.0 / (double)vec_n[ns_total]) * d2; + } } else { - mat_S[0][0]+=(1-1.0/(double)vec_n[ns_total])*d2; - mat_Svar[0][0]+= - d2*d2/((double)vec_n[ns_total]*(double)vec_n[ns_total]); - if (crt==1) { - mat3d_Sbin[0][0][0]+=(1-1.0/(double)vec_n[ns_total])*d2; - } - } - - n_nb=0; - while(ch_ptr!=NULL) { - type=ch_ptr; - if (type.compare("NA")!=0 && type.compare("na")!=0 && - type.compare("nan")!=0 && type.compare("-nan")!=0) { - cor=atof(ch_ptr); - rs2=vec_rs[ns_total+n_nb+1]; - d_pos2=vec_bp[ns_total+n_nb+1]; - d_cm2=vec_cm[ns_total+n_nb+1]; - d_pos=abs(d_pos2-d_pos1); - d_cm=abs(d_cm2-d_cm1); - - if ( (mapRS2cat.size()==0 || mapRS2cat.count(rs2)!=0) && - mapRS2in.count(rs2)!=0 && mapRS2in.at(rs2)==2) { - var2=mapRS2var.at(rs2); - nsamp2=mapRS2nsamp.at(rs2); - d1=cor*cor-1.0/(double)min(vec_n[ns_total], - vec_n[ns_total+n_nb+1]); - d2=var1*var2; - d3=cor*cor/((double)nsamp1*(double)nsamp2); - n12=min(vec_n[ns_total], vec_n[ns_total+n_nb+1]); - - // Compute bin. - if (crt==1) { - if (window_cm!=0 && d_cm1!=0 && d_cm2!=0) { - bin=min( (int)floor(d_cm/window_cm*bin_size), (int)bin_size); - } else if (window_bp!=0 && d_pos1!=0 && d_pos2!=0) { - bin=min( (int)floor(d_pos/window_bp*bin_size), (int)bin_size); - } else if (window_ns!=0) { - bin=min( (int)floor(((double)n_nb+1)/window_ns*bin_size), - (int)bin_size); - } - } - - if (mapRS2cat.size()!=0) { - if (mapRS2cat.at(rs1)==mapRS2cat.at(rs2)) { - vec_qvar[mapRS2cat.at(rs1)]+=2*d3*d2; - mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=2*d1*d2; - mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+= - 2*d2*d2/((double)n12*(double)n12); - if (crt==1) { - mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ][bin]+= - 2*d1*d2; - } - } else { - mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=d1*d2; - mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+= - d2*d2/((double)n12*(double)n12); - if (crt==1) { - mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ][bin]+= - d1*d2; - } - } - } else { - vec_qvar[0]+=2*d3*d2; - mat_S[0][0]+=2*d1*d2; - mat_Svar[0][0]+=2*d2*d2/((double)n12*(double)n12); - - if (crt==1) { - mat3d_Sbin[0][0][bin]+=2*d1*d2; - } - } - ns_pair++; - } - } - - ch_ptr=strtok (NULL, " , \t"); - n_nb++; - } - ni_total=max(ni_total, n_total); + mat_S[0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2; + mat_Svar[0][0] += + d2 * d2 / ((double)vec_n[ns_total] * (double)vec_n[ns_total]); + if (crt == 1) { + mat3d_Sbin[0][0][0] += (1 - 1.0 / (double)vec_n[ns_total]) * d2; + } + } + + n_nb = 0; + while (ch_ptr != NULL) { + type = ch_ptr; + if (type.compare("NA") != 0 && type.compare("na") != 0 && + type.compare("nan") != 0 && type.compare("-nan") != 0) { + cor = atof(ch_ptr); + rs2 = vec_rs[ns_total + n_nb + 1]; + d_pos2 = vec_bp[ns_total + n_nb + 1]; + d_cm2 = vec_cm[ns_total + n_nb + 1]; + d_pos = abs(d_pos2 - d_pos1); + d_cm = abs(d_cm2 - d_cm1); + + if ((mapRS2cat.size() == 0 || mapRS2cat.count(rs2) != 0) && + mapRS2in.count(rs2) != 0 && mapRS2in.at(rs2) == 2) { + var2 = mapRS2var.at(rs2); + nsamp2 = mapRS2nsamp.at(rs2); + d1 = cor * cor - + 1.0 / (double)min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]); + d2 = var1 * var2; + d3 = cor * cor / ((double)nsamp1 * (double)nsamp2); + n12 = min(vec_n[ns_total], vec_n[ns_total + n_nb + 1]); + + // Compute bin. + if (crt == 1) { + if (window_cm != 0 && d_cm1 != 0 && d_cm2 != 0) { + bin = + min((int)floor(d_cm / window_cm * bin_size), (int)bin_size); + } else if (window_bp != 0 && d_pos1 != 0 && d_pos2 != 0) { + bin = min((int)floor(d_pos / window_bp * bin_size), + (int)bin_size); + } else if (window_ns != 0) { + bin = min((int)floor(((double)n_nb + 1) / window_ns * bin_size), + (int)bin_size); + } + } + + if (mapRS2cat.size() != 0) { + if (mapRS2cat.at(rs1) == mapRS2cat.at(rs2)) { + vec_qvar[mapRS2cat.at(rs1)] += 2 * d3 * d2; + mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += 2 * d1 * d2; + mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += + 2 * d2 * d2 / ((double)n12 * (double)n12); + if (crt == 1) { + mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] += + 2 * d1 * d2; + } + } else { + mat_S[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += d1 * d2; + mat_Svar[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)] += + d2 * d2 / ((double)n12 * (double)n12); + if (crt == 1) { + mat3d_Sbin[mapRS2cat.at(rs1)][mapRS2cat.at(rs2)][bin] += + d1 * d2; + } + } + } else { + vec_qvar[0] += 2 * d3 * d2; + mat_S[0][0] += 2 * d1 * d2; + mat_Svar[0][0] += 2 * d2 * d2 / ((double)n12 * (double)n12); + + if (crt == 1) { + mat3d_Sbin[0][0][bin] += 2 * d1 * d2; + } + } + ns_pair++; + } + } + + ch_ptr = strtok(NULL, " , \t"); + n_nb++; + } + ni_total = max(ni_total, n_total); ns_test++; } @@ -1081,70 +1211,83 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, // x=seq(0.5,bin_size-0.5,by=1) and then compute a correlation // factor as a percentage. double a, b, x, y, n, var_y, var_x, mean_y, mean_x, cov_xy, crt_factor; - if (crt==1) { - for (size_t i=0; i<S_mat->size1; i++) { - for (size_t j=i; j<S_mat->size2; j++) { - - // Correct mat_S. - n=0; var_y=0; var_x=0; mean_y=0; mean_x=0; cov_xy=0; - for (size_t k=0; k<bin_size; k++) { - if (j==i) { - y=mat3d_Sbin[i][j][k]; - } else { - y=mat3d_Sbin[i][j][k]+mat3d_Sbin[j][i][k]; - } - x=k+0.5; - cout<<y<<", "; - if (y>0) { - y=1/sqrt(y); - mean_x+=x; mean_y+=y; var_x+=x*x; var_y+=y*y; cov_xy+=x*y; - n++; - } - } - cout<<endl; - - if (n>=5) { - mean_x/=n; mean_y/=n; var_x/=n; var_y/=n; cov_xy/=n; - var_x-=mean_x*mean_x; var_y-=mean_y*mean_y; cov_xy-=mean_x*mean_y; - b=cov_xy/var_x; - a=mean_y-b*mean_x; - crt_factor=a/(b*(bin_size+0.5))+1; - if (i==j) { - mat_S[i][j]*=crt_factor; - } else { - mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor; - } - cout<<crt_factor<<endl; - - // Correct qvar. - if (i==j) { - vec_qvar[i]*=crt_factor; - } - } + if (crt == 1) { + for (size_t i = 0; i < S_mat->size1; i++) { + for (size_t j = i; j < S_mat->size2; j++) { + + // Correct mat_S. + n = 0; + var_y = 0; + var_x = 0; + mean_y = 0; + mean_x = 0; + cov_xy = 0; + for (size_t k = 0; k < bin_size; k++) { + if (j == i) { + y = mat3d_Sbin[i][j][k]; + } else { + y = mat3d_Sbin[i][j][k] + mat3d_Sbin[j][i][k]; + } + x = k + 0.5; + cout << y << ", "; + if (y > 0) { + y = 1 / sqrt(y); + mean_x += x; + mean_y += y; + var_x += x * x; + var_y += y * y; + cov_xy += x * y; + n++; + } + } + cout << endl; + + if (n >= 5) { + mean_x /= n; + mean_y /= n; + var_x /= n; + var_y /= n; + cov_xy /= n; + var_x -= mean_x * mean_x; + var_y -= mean_y * mean_y; + cov_xy -= mean_x * mean_y; + b = cov_xy / var_x; + a = mean_y - b * mean_x; + crt_factor = a / (b * (bin_size + 0.5)) + 1; + if (i == j) { + mat_S[i][j] *= crt_factor; + } else { + mat_S[i][j] *= crt_factor; + mat_S[j][i] *= crt_factor; + } + cout << crt_factor << endl; + + // Correct qvar. + if (i == j) { + vec_qvar[i] *= crt_factor; + } + } } } } // Save to gsl_vector and gsl_matrix: qvar_vec, S_mat, Svar_mat. - for (size_t i=0; i<S_mat->size1; i++) { - d1=gsl_vector_get(qvar_vec, i)+2*vec_qvar[i]; + for (size_t i = 0; i < S_mat->size1; i++) { + d1 = gsl_vector_get(qvar_vec, i) + 2 * vec_qvar[i]; gsl_vector_set(qvar_vec, i, d1); - for (size_t j=0; j<S_mat->size2; j++) { - if (i==j) { - gsl_matrix_set(S_mat, i, j, mat_S[i][i]); - gsl_matrix_set(Svar_mat, i, j, - 2.0*mat_Svar[i][i]*ns_test*ns_test/(2.0*ns_pair) ); + for (size_t j = 0; j < S_mat->size2; j++) { + if (i == j) { + gsl_matrix_set(S_mat, i, j, mat_S[i][i]); + gsl_matrix_set(Svar_mat, i, j, 2.0 * mat_Svar[i][i] * ns_test * + ns_test / (2.0 * ns_pair)); } else { - gsl_matrix_set(S_mat, i, j, mat_S[i][j]+mat_S[j][i]); - gsl_matrix_set(Svar_mat, i, j, - 2.0*(mat_Svar[i][j]+mat_Svar[j][i])* - ns_test*ns_test/(2.0*ns_pair) ); + gsl_matrix_set(S_mat, i, j, mat_S[i][j] + mat_S[j][i]); + gsl_matrix_set(Svar_mat, i, j, 2.0 * (mat_Svar[i][j] + mat_Svar[j][i]) * + ns_test * ns_test / (2.0 * ns_pair)); } } } - - infile.clear(); infile.close(); @@ -1157,170 +1300,175 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, // compute the variance for S, use a set of genotypes, phenotypes, and // individual ids, and snp category label. void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, - const gsl_matrix *Svar_mat, const gsl_vector *q_vec, - const gsl_vector *s_vec, const double df, - vector<double> &v_pve, vector<double> &v_se_pve, - double &pve_total, double &se_pve_total, - vector<double> &v_sigma2, vector<double> &v_se_sigma2, - vector<double> &v_enrich, vector<double> &v_se_enrich) { - size_t n_vc=S_mat->size1; - - gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); - - gsl_vector *pve=gsl_vector_alloc (n_vc); - gsl_vector *pve_plus=gsl_vector_alloc (n_vc+1); - gsl_vector *tmp=gsl_vector_alloc (n_vc+1); - gsl_vector *sigma2persnp=gsl_vector_alloc (n_vc); - gsl_vector *enrich=gsl_vector_alloc (n_vc); - gsl_vector *se_pve=gsl_vector_alloc (n_vc); - gsl_vector *se_sigma2persnp=gsl_vector_alloc (n_vc); - gsl_vector *se_enrich=gsl_vector_alloc (n_vc); + const gsl_matrix *Svar_mat, const gsl_vector *q_vec, + const gsl_vector *s_vec, const double df, vector<double> &v_pve, + vector<double> &v_se_pve, double &pve_total, double &se_pve_total, + vector<double> &v_sigma2, vector<double> &v_se_sigma2, + vector<double> &v_enrich, vector<double> &v_se_enrich) { + size_t n_vc = S_mat->size1; + + gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc); + + gsl_vector *pve = gsl_vector_alloc(n_vc); + gsl_vector *pve_plus = gsl_vector_alloc(n_vc + 1); + gsl_vector *tmp = gsl_vector_alloc(n_vc + 1); + gsl_vector *sigma2persnp = gsl_vector_alloc(n_vc); + gsl_vector *enrich = gsl_vector_alloc(n_vc); + gsl_vector *se_pve = gsl_vector_alloc(n_vc); + gsl_vector *se_sigma2persnp = gsl_vector_alloc(n_vc); + gsl_vector *se_enrich = gsl_vector_alloc(n_vc); double d; // Calculate S^{-1}q. - gsl_matrix_memcpy (tmp_mat, S_mat); + gsl_matrix_memcpy(tmp_mat, S_mat); int sig; - gsl_permutation * pmt=gsl_permutation_alloc (n_vc); - LUDecomp (tmp_mat, pmt, &sig); - LUInvert (tmp_mat, pmt, Si_mat); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc); + LUDecomp(tmp_mat, pmt, &sig); + LUInvert(tmp_mat, pmt, Si_mat); // Calculate sigma2snp and pve. - gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); + gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); gsl_vector_memcpy(sigma2persnp, pve); gsl_vector_div(sigma2persnp, s_vec); // Get qvar_mat. - gsl_matrix_memcpy (qvar_mat, Vq); - gsl_matrix_scale (qvar_mat, 1.0/(df*df)); + gsl_matrix_memcpy(qvar_mat, Vq); + gsl_matrix_scale(qvar_mat, 1.0 / (df * df)); // Calculate variance for these estimates. - for (size_t i=0; i<n_vc; i++) { - for (size_t j=i; j<n_vc; j++) { - d=gsl_matrix_get(Svar_mat, i, j); - d*=gsl_vector_get(pve, i)*gsl_vector_get(pve, j); + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = i; j < n_vc; j++) { + d = gsl_matrix_get(Svar_mat, i, j); + d *= gsl_vector_get(pve, i) * gsl_vector_get(pve, j); - d+=gsl_matrix_get(qvar_mat, i, j); + d += gsl_matrix_get(qvar_mat, i, j); gsl_matrix_set(Var_mat, i, j, d); - if (i!=j) {gsl_matrix_set(Var_mat, j, i, d);} + if (i != j) { + gsl_matrix_set(Var_mat, j, i, d); + } } } - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, - 0.0, tmp_mat); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, - 0.0, Var_mat); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0, + tmp_mat); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0, + Var_mat); - for (size_t i=0; i<n_vc; i++) { - d=sqrt(gsl_matrix_get(Var_mat, i, i)); + for (size_t i = 0; i < n_vc; i++) { + d = sqrt(gsl_matrix_get(Var_mat, i, i)); gsl_vector_set(se_pve, i, d); - d/=gsl_vector_get(s_vec, i); + d /= gsl_vector_get(s_vec, i); gsl_vector_set(se_sigma2persnp, i, d); } // Compute pve_total, se_pve_total. - pve_total=0; se_pve_total=0; - for (size_t i=0; i<n_vc; i++) { - pve_total+=gsl_vector_get(pve, i); + pve_total = 0; + se_pve_total = 0; + for (size_t i = 0; i < n_vc; i++) { + pve_total += gsl_vector_get(pve, i); - for (size_t j=0; j<n_vc; j++) { - se_pve_total+=gsl_matrix_get(Var_mat, i, j); + for (size_t j = 0; j < n_vc; j++) { + se_pve_total += gsl_matrix_get(Var_mat, i, j); } } - se_pve_total=sqrt(se_pve_total); + se_pve_total = sqrt(se_pve_total); // Compute enrichment and its variance. - double s_pve=0, s_snp=0; - for (size_t i=0; i<n_vc; i++) { - s_pve+=gsl_vector_get(pve, i); - s_snp+=gsl_vector_get(s_vec, i); + double s_pve = 0, s_snp = 0; + for (size_t i = 0; i < n_vc; i++) { + s_pve += gsl_vector_get(pve, i); + s_snp += gsl_vector_get(s_vec, i); } - gsl_vector_memcpy (enrich, sigma2persnp); - gsl_vector_scale (enrich, s_snp/s_pve); + gsl_vector_memcpy(enrich, sigma2persnp); + gsl_vector_scale(enrich, s_snp / s_pve); gsl_matrix_set_identity(tmp_mat); double d1; - for (size_t i=0; i<n_vc; i++) { - d=gsl_vector_get(pve, i)/s_pve; - d1=gsl_vector_get(s_vec, i); - for (size_t j=0; j<n_vc; j++) { - if (i==j) { - gsl_matrix_set(tmp_mat, i, j, (1-d)/d1*s_snp/s_pve); + for (size_t i = 0; i < n_vc; i++) { + d = gsl_vector_get(pve, i) / s_pve; + d1 = gsl_vector_get(s_vec, i); + for (size_t j = 0; j < n_vc; j++) { + if (i == j) { + gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve); } else { - gsl_matrix_set(tmp_mat, i, j, -1*d/d1*s_snp/s_pve); + gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve); } } } gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0, - tmp_mat1); + tmp_mat1); gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0, - VarEnrich_mat); + VarEnrich_mat); - for (size_t i=0; i<n_vc; i++) { - d=sqrt(gsl_matrix_get(VarEnrich_mat, i, i)); + for (size_t i = 0; i < n_vc; i++) { + d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i)); gsl_vector_set(se_enrich, i, d); } - cout<<"pve = "; - for (size_t i=0; i<n_vc; i++) { - cout<<gsl_vector_get(pve, i)<<" "; + cout << "pve = "; + for (size_t i = 0; i < n_vc; i++) { + cout << gsl_vector_get(pve, i) << " "; } - cout<<endl; + cout << endl; - cout<<"se(pve) = "; - for (size_t i=0; i<n_vc; i++) { - cout<<gsl_vector_get(se_pve, i)<<" "; + cout << "se(pve) = "; + for (size_t i = 0; i < n_vc; i++) { + cout << gsl_vector_get(se_pve, i) << " "; } - cout<<endl; + cout << endl; - cout<<"sigma2 per snp = "; - for (size_t i=0; i<n_vc; i++) { - cout<<gsl_vector_get(sigma2persnp, i)<<" "; + cout << "sigma2 per snp = "; + for (size_t i = 0; i < n_vc; i++) { + cout << gsl_vector_get(sigma2persnp, i) << " "; } - cout<<endl; + cout << endl; - cout<<"se(sigma2 per snp) = "; - for (size_t i=0; i<n_vc; i++) { - cout<<gsl_vector_get(se_sigma2persnp, i)<<" "; + cout << "se(sigma2 per snp) = "; + for (size_t i = 0; i < n_vc; i++) { + cout << gsl_vector_get(se_sigma2persnp, i) << " "; } - cout<<endl; + cout << endl; - cout<<"enrichment = "; - for (size_t i=0; i<n_vc; i++) { - cout<<gsl_vector_get(enrich, i)<<" "; + cout << "enrichment = "; + for (size_t i = 0; i < n_vc; i++) { + cout << gsl_vector_get(enrich, i) << " "; } - cout<<endl; + cout << endl; - cout<<"se(enrichment) = "; - for (size_t i=0; i<n_vc; i++) { - cout<<gsl_vector_get(se_enrich, i)<<" "; + cout << "se(enrichment) = "; + for (size_t i = 0; i < n_vc; i++) { + cout << gsl_vector_get(se_enrich, i) << " "; } - cout<<endl; + cout << endl; // Save data. - v_pve.clear(); v_se_pve.clear(); - v_sigma2.clear(); v_se_sigma2.clear(); - v_enrich.clear(); v_se_enrich.clear(); - for (size_t i=0; i<n_vc; i++) { - d=gsl_vector_get(pve, i); + v_pve.clear(); + v_se_pve.clear(); + v_sigma2.clear(); + v_se_sigma2.clear(); + v_enrich.clear(); + v_se_enrich.clear(); + for (size_t i = 0; i < n_vc; i++) { + d = gsl_vector_get(pve, i); v_pve.push_back(d); - d=gsl_vector_get(se_pve, i); + d = gsl_vector_get(se_pve, i); v_se_pve.push_back(d); - d=gsl_vector_get(sigma2persnp, i); + d = gsl_vector_get(sigma2persnp, i); v_sigma2.push_back(d); - d=gsl_vector_get(se_sigma2persnp, i); + d = gsl_vector_get(se_sigma2persnp, i); v_se_sigma2.push_back(d); - d=gsl_vector_get(enrich, i); + d = gsl_vector_get(enrich, i); v_enrich.push_back(d); - d=gsl_vector_get(se_enrich, i); + d = gsl_vector_get(se_enrich, i); v_se_enrich.push_back(d); } @@ -1345,196 +1493,206 @@ void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, } // Ks are not scaled. -void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W, - const gsl_vector *y) { - size_t n1=K->size1, n2=K->size2; - size_t n_vc=n2/n1; +void VC::CalcVChe(const gsl_matrix *K, const gsl_matrix *W, + const gsl_vector *y) { + size_t n1 = K->size1, n2 = K->size2; + size_t n_vc = n2 / n1; - double r=(double)n1/(double)(n1 - W->size2); + double r = (double)n1 / (double)(n1 - W->size2); double var_y, var_y_new; double d, tr, s, v; vector<double> traceG_new; // New matrices/vectors. - gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2); - gsl_vector *y_scale=gsl_vector_alloc (n1); - gsl_matrix *Kry=gsl_matrix_alloc (n1, n_vc); - gsl_matrix *yKrKKry=gsl_matrix_alloc (n_vc, n_vc*(n_vc+1) ); - gsl_vector *KKry=gsl_vector_alloc (n1); + gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2); + gsl_vector *y_scale = gsl_vector_alloc(n1); + gsl_matrix *Kry = gsl_matrix_alloc(n1, n_vc); + gsl_matrix *yKrKKry = gsl_matrix_alloc(n_vc, n_vc * (n_vc + 1)); + gsl_vector *KKry = gsl_vector_alloc(n1); // Old matrices/vectors. - gsl_vector *pve=gsl_vector_alloc (n_vc); - gsl_vector *se_pve=gsl_vector_alloc (n_vc); - gsl_vector *q_vec=gsl_vector_alloc (n_vc); - gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_vector *pve = gsl_vector_alloc(n_vc); + gsl_vector *se_pve = gsl_vector_alloc(n_vc); + gsl_vector *q_vec = gsl_vector_alloc(n_vc); + gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc); // Center and scale K by W. - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { gsl_matrix_view Kscale_sub = - gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1); + gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1); gsl_matrix_const_view K_sub = - gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1); - gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix); + gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1); + gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix); - CenterMatrix (&Kscale_sub.matrix, W); - d=ScaleMatrix (&Kscale_sub.matrix); + CenterMatrix(&Kscale_sub.matrix, W); + d = ScaleMatrix(&Kscale_sub.matrix); traceG_new.push_back(d); } // Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1). - gsl_vector_memcpy (y_scale, y); - CenterVector (y_scale, W); + gsl_vector_memcpy(y_scale, y); + CenterVector(y_scale, W); - var_y=VectorVar (y); - var_y_new=VectorVar (y_scale); + var_y = VectorVar(y); + var_y_new = VectorVar(y_scale); - StandardizeVector (y_scale); + StandardizeVector(y_scale); // Compute Kry, which is used for confidence interval; also compute // q_vec (*n^2). - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { gsl_matrix_const_view Kscale_sub = - gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1); - gsl_vector_view Kry_col=gsl_matrix_column (Kry, i); + gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1); + gsl_vector_view Kry_col = gsl_matrix_column(Kry, i); - gsl_vector_memcpy (&Kry_col.vector, y_scale); - gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0*r, - &Kry_col.vector); + gsl_vector_memcpy(&Kry_col.vector, y_scale); + gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0 * r, + &Kry_col.vector); - gsl_blas_ddot (&Kry_col.vector, y_scale, &d); + gsl_blas_ddot(&Kry_col.vector, y_scale, &d); gsl_vector_set(q_vec, i, d); } // Compute yKrKKry, which is used later for confidence interval. - for (size_t i=0; i<n_vc; i++) { - gsl_vector_const_view Kry_coli=gsl_matrix_const_column (Kry, i); - for (size_t j=i; j<n_vc; j++) { - gsl_vector_const_view Kry_colj=gsl_matrix_const_column (Kry, j); - for (size_t l=0; l<n_vc; l++) { - gsl_matrix_const_view Kscale_sub = - gsl_matrix_const_submatrix (K_scale, 0, n1*l, n1, n1); - gsl_blas_dgemv (CblasNoTrans, 1.0, &Kscale_sub.matrix, - &Kry_coli.vector, 0.0, KKry); - gsl_blas_ddot (&Kry_colj.vector, KKry, &d); - gsl_matrix_set(yKrKKry, i, l*n_vc+j, d); - if (i!=j) {gsl_matrix_set(yKrKKry, j, l*n_vc+i, d);} + for (size_t i = 0; i < n_vc; i++) { + gsl_vector_const_view Kry_coli = gsl_matrix_const_column(Kry, i); + for (size_t j = i; j < n_vc; j++) { + gsl_vector_const_view Kry_colj = gsl_matrix_const_column(Kry, j); + for (size_t l = 0; l < n_vc; l++) { + gsl_matrix_const_view Kscale_sub = + gsl_matrix_const_submatrix(K_scale, 0, n1 * l, n1, n1); + gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, &Kry_coli.vector, + 0.0, KKry); + gsl_blas_ddot(&Kry_colj.vector, KKry, &d); + gsl_matrix_set(yKrKKry, i, l * n_vc + j, d); + if (i != j) { + gsl_matrix_set(yKrKKry, j, l * n_vc + i, d); + } + } + gsl_blas_ddot(&Kry_coli.vector, &Kry_colj.vector, &d); + gsl_matrix_set(yKrKKry, i, n_vc * n_vc + j, d); + if (i != j) { + gsl_matrix_set(yKrKKry, j, n_vc * n_vc + i, d); } - gsl_blas_ddot (&Kry_coli.vector, &Kry_colj.vector, &d); - gsl_matrix_set(yKrKKry, i, n_vc*n_vc+j, d); - if (i!=j) {gsl_matrix_set(yKrKKry, j, n_vc*n_vc+i, d);} } } // Compute Sij (*n^2). - for (size_t i=0; i<n_vc; i++) { - for (size_t j=i; j<n_vc; j++) { - tr=0; - for (size_t l=0; l<n1; l++) { - gsl_vector_const_view Ki_col=gsl_matrix_const_column (K_scale, i*n1+l); - gsl_vector_const_view Kj_col=gsl_matrix_const_column (K_scale, j*n1+l); - gsl_blas_ddot (&Ki_col.vector, &Kj_col.vector, &d); - tr+=d; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = i; j < n_vc; j++) { + tr = 0; + for (size_t l = 0; l < n1; l++) { + gsl_vector_const_view Ki_col = + gsl_matrix_const_column(K_scale, i * n1 + l); + gsl_vector_const_view Kj_col = + gsl_matrix_const_column(K_scale, j * n1 + l); + gsl_blas_ddot(&Ki_col.vector, &Kj_col.vector, &d); + tr += d; } - tr=tr-r*(double)n1; - gsl_matrix_set (S_mat, i, j, tr); - if (i!=j) {gsl_matrix_set (S_mat, j, i, tr);} + tr = tr - r * (double)n1; + gsl_matrix_set(S_mat, i, j, tr); + if (i != j) { + gsl_matrix_set(S_mat, j, i, tr); + } } } // Compute S^{-1}q. int sig; - gsl_permutation * pmt=gsl_permutation_alloc (n_vc); - LUDecomp (S_mat, pmt, &sig); - LUInvert (S_mat, pmt, Si_mat); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc); + LUDecomp(S_mat, pmt, &sig); + LUInvert(S_mat, pmt, Si_mat); // Compute pve (on the transformed scale). - gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); + gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); // Compute q_var (*n^4). - gsl_matrix_set_zero (qvar_mat); - s=1; - for (size_t i=0; i<n_vc; i++) { - d=gsl_vector_get(pve, i); - gsl_matrix_view yKrKKry_sub= - gsl_matrix_submatrix(yKrKKry, 0, i*n_vc, n_vc, n_vc); - gsl_matrix_memcpy (tmp_mat, &yKrKKry_sub.matrix); + gsl_matrix_set_zero(qvar_mat); + s = 1; + for (size_t i = 0; i < n_vc; i++) { + d = gsl_vector_get(pve, i); + gsl_matrix_view yKrKKry_sub = + gsl_matrix_submatrix(yKrKKry, 0, i * n_vc, n_vc, n_vc); + gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix); gsl_matrix_scale(tmp_mat, d); - gsl_matrix_add (qvar_mat, tmp_mat); - s-=d; + gsl_matrix_add(qvar_mat, tmp_mat); + s -= d; } - gsl_matrix_view yKrKKry_sub=gsl_matrix_submatrix(yKrKKry, 0, n_vc*n_vc, - n_vc, n_vc); - gsl_matrix_memcpy (tmp_mat, &yKrKKry_sub.matrix); + gsl_matrix_view yKrKKry_sub = + gsl_matrix_submatrix(yKrKKry, 0, n_vc * n_vc, n_vc, n_vc); + gsl_matrix_memcpy(tmp_mat, &yKrKKry_sub.matrix); gsl_matrix_scale(tmp_mat, s); - gsl_matrix_add (qvar_mat, tmp_mat); + gsl_matrix_add(qvar_mat, tmp_mat); gsl_matrix_scale(qvar_mat, 2.0); // Compute S^{-1}var_qS^{-1}. - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat, - 0.0, tmp_mat); - gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, - 0.0, Var_mat); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat, 0.0, + tmp_mat); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0, + Var_mat); // Transform pve back to the original scale and save data. - v_pve.clear(); v_se_pve.clear(); - v_sigma2.clear(); v_se_sigma2.clear(); - - s=1.0, v=0, pve_total=0, se_pve_total=0; - for (size_t i=0; i<n_vc; i++) { - d=gsl_vector_get (pve, i); - v_sigma2.push_back(d*var_y_new/traceG_new[i]); - v_pve.push_back(d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y)); - s-=d; - pve_total+=d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y); - - d=sqrt(gsl_matrix_get (Var_mat, i, i)); - v_se_sigma2.push_back(d*var_y_new/traceG_new[i]); - v_se_pve.push_back(d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y)); - - for (size_t j=0; j<n_vc; j++) { - v+=gsl_matrix_get(Var_mat, i, j); - se_pve_total+=gsl_matrix_get(Var_mat, i, j)* - (var_y_new/traceG_new[i])*(v_traceG[i]/var_y)* - (var_y_new/traceG_new[j])*(v_traceG[j]/var_y); + v_pve.clear(); + v_se_pve.clear(); + v_sigma2.clear(); + v_se_sigma2.clear(); + + s = 1.0, v = 0, pve_total = 0, se_pve_total = 0; + for (size_t i = 0; i < n_vc; i++) { + d = gsl_vector_get(pve, i); + v_sigma2.push_back(d * var_y_new / traceG_new[i]); + v_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y)); + s -= d; + pve_total += d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y); + + d = sqrt(gsl_matrix_get(Var_mat, i, i)); + v_se_sigma2.push_back(d * var_y_new / traceG_new[i]); + v_se_pve.push_back(d * (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y)); + + for (size_t j = 0; j < n_vc; j++) { + v += gsl_matrix_get(Var_mat, i, j); + se_pve_total += gsl_matrix_get(Var_mat, i, j) * + (var_y_new / traceG_new[i]) * (v_traceG[i] / var_y) * + (var_y_new / traceG_new[j]) * (v_traceG[j] / var_y); } } - v_sigma2.push_back(s*r*var_y_new); - v_se_sigma2.push_back(sqrt(v)*r*var_y_new); - se_pve_total=sqrt(se_pve_total); + v_sigma2.push_back(s * r * var_y_new); + v_se_sigma2.push_back(sqrt(v) * r * var_y_new); + se_pve_total = sqrt(se_pve_total); - cout<<"sigma2 = "; - for (size_t i=0; i<n_vc+1; i++) { - cout<<v_sigma2[i]<<" "; + cout << "sigma2 = "; + for (size_t i = 0; i < n_vc + 1; i++) { + cout << v_sigma2[i] << " "; } - cout<<endl; + cout << endl; - cout<<"se(sigma2) = "; - for (size_t i=0; i<n_vc+1; i++) { - cout<<v_se_sigma2[i]<<" "; + cout << "se(sigma2) = "; + for (size_t i = 0; i < n_vc + 1; i++) { + cout << v_se_sigma2[i] << " "; } - cout<<endl; + cout << endl; - cout<<"pve = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_pve[i]<<" "; + cout << "pve = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_pve[i] << " "; } - cout<<endl; + cout << endl; - cout<<"se(pve) = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_se_pve[i]<<" "; + cout << "se(pve) = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_se_pve[i] << " "; } - cout<<endl; + cout << endl; - if (n_vc>1) { - cout<<"total pve = "<<pve_total<<endl; - cout<<"se(total pve) = "<<se_pve_total<<endl; + if (n_vc > 1) { + cout << "total pve = " << pve_total << endl; + cout << "se(total pve) = " << se_pve_total << endl; } gsl_permutation_free(pmt); @@ -1558,188 +1716,195 @@ void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W, } // REML for log(sigma2) based on the AI algorithm. -void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K, - const gsl_matrix *W, const gsl_vector *y) { - size_t n1=K->size1, n2=K->size2; - size_t n_vc=n2/n1; - gsl_vector *log_sigma2=gsl_vector_alloc (n_vc+1); +void VC::CalcVCreml(bool noconstrain, const gsl_matrix *K, const gsl_matrix *W, + const gsl_vector *y) { + size_t n1 = K->size1, n2 = K->size2; + size_t n_vc = n2 / n1; + gsl_vector *log_sigma2 = gsl_vector_alloc(n_vc + 1); double d, s; // Set up params. - gsl_matrix *P=gsl_matrix_alloc (n1, n1); - gsl_vector *Py=gsl_vector_alloc (n1); - gsl_matrix *KPy_mat=gsl_matrix_alloc (n1, n_vc+1); - gsl_matrix *PKPy_mat=gsl_matrix_alloc (n1, n_vc+1); - gsl_vector *dev1=gsl_vector_alloc (n_vc+1); - gsl_matrix *dev2=gsl_matrix_alloc (n_vc+1, n_vc+1); - gsl_matrix *Hessian=gsl_matrix_alloc (n_vc+1, n_vc+1); - VC_PARAM params={K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain}; + gsl_matrix *P = gsl_matrix_alloc(n1, n1); + gsl_vector *Py = gsl_vector_alloc(n1); + gsl_matrix *KPy_mat = gsl_matrix_alloc(n1, n_vc + 1); + gsl_matrix *PKPy_mat = gsl_matrix_alloc(n1, n_vc + 1); + gsl_vector *dev1 = gsl_vector_alloc(n_vc + 1); + gsl_matrix *dev2 = gsl_matrix_alloc(n_vc + 1, n_vc + 1); + gsl_matrix *Hessian = gsl_matrix_alloc(n_vc + 1, n_vc + 1); + VC_PARAM params = {K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain}; // Initialize sigma2/log_sigma2. - CalcVChe (K, W, y); + CalcVChe(K, W, y); - gsl_blas_ddot (y, y, &s); - s/=(double)n1; - for (size_t i=0; i<n_vc+1; i++) { + gsl_blas_ddot(y, y, &s); + s /= (double)n1; + for (size_t i = 0; i < n_vc + 1; i++) { if (noconstrain) { - d=v_sigma2[i]; + d = v_sigma2[i]; } else { - if (v_sigma2[i]<=0) {d=log(0.1);} else {d=log(v_sigma2[i]);} + if (v_sigma2[i] <= 0) { + d = log(0.1); + } else { + d = log(v_sigma2[i]); + } } - gsl_vector_set (log_sigma2, i, d); + gsl_vector_set(log_sigma2, i, d); } - cout<<"iteration "<<0<<endl; - cout<<"sigma2 = "; - for (size_t i=0; i<n_vc+1; i++) { + cout << "iteration " << 0 << endl; + cout << "sigma2 = "; + for (size_t i = 0; i < n_vc + 1; i++) { if (noconstrain) { - cout<<gsl_vector_get(log_sigma2, i)<<" "; + cout << gsl_vector_get(log_sigma2, i) << " "; } else { - cout<<exp(gsl_vector_get(log_sigma2, i))<<" "; + cout << exp(gsl_vector_get(log_sigma2, i)) << " "; } } - cout<<endl; + cout << endl; // Set up fdf. gsl_multiroot_function_fdf FDF; - FDF.n=n_vc+1; - FDF.params=¶ms; - FDF.f=&LogRL_dev1; - FDF.df=&LogRL_dev2; - FDF.fdf=&LogRL_dev12; + FDF.n = n_vc + 1; + FDF.params = ¶ms; + FDF.f = &LogRL_dev1; + FDF.df = &LogRL_dev2; + FDF.fdf = &LogRL_dev12; // Set up solver. int status; - int iter=0, max_iter=100; + int iter = 0, max_iter = 100; const gsl_multiroot_fdfsolver_type *T_fdf; gsl_multiroot_fdfsolver *s_fdf; - T_fdf=gsl_multiroot_fdfsolver_hybridsj; - s_fdf=gsl_multiroot_fdfsolver_alloc (T_fdf, n_vc+1); + T_fdf = gsl_multiroot_fdfsolver_hybridsj; + s_fdf = gsl_multiroot_fdfsolver_alloc(T_fdf, n_vc + 1); - gsl_multiroot_fdfsolver_set (s_fdf, &FDF, log_sigma2); + gsl_multiroot_fdfsolver_set(s_fdf, &FDF, log_sigma2); do { iter++; - status=gsl_multiroot_fdfsolver_iterate (s_fdf); + status = gsl_multiroot_fdfsolver_iterate(s_fdf); - if (status) break; + if (status) + break; - cout<<"iteration "<<iter<<endl; - cout<<"sigma2 = "; - for (size_t i=0; i<n_vc+1; i++) { + cout << "iteration " << iter << endl; + cout << "sigma2 = "; + for (size_t i = 0; i < n_vc + 1; i++) { if (noconstrain) { - cout<<gsl_vector_get(s_fdf->x, i)<<" "; + cout << gsl_vector_get(s_fdf->x, i) << " "; } else { - cout<<exp(gsl_vector_get(s_fdf->x, i))<<" "; + cout << exp(gsl_vector_get(s_fdf->x, i)) << " "; } } - cout<<endl; - status=gsl_multiroot_test_residual (s_fdf->f, 1e-3); - } - while (status==GSL_CONTINUE && iter<max_iter); + cout << endl; + status = gsl_multiroot_test_residual(s_fdf->f, 1e-3); + } while (status == GSL_CONTINUE && iter < max_iter); // Obtain Hessian and Hessian inverse. - int sig=LogRL_dev12 (s_fdf->x, ¶ms, dev1, dev2); + int sig = LogRL_dev12(s_fdf->x, ¶ms, dev1, dev2); - gsl_permutation * pmt=gsl_permutation_alloc (n_vc+1); - LUDecomp (dev2, pmt, &sig); - LUInvert (dev2, pmt, Hessian); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc + 1); + LUDecomp(dev2, pmt, &sig); + LUInvert(dev2, pmt, Hessian); gsl_permutation_free(pmt); // Save sigma2 and se_sigma2. - v_sigma2.clear(); v_se_sigma2.clear(); - for (size_t i=0; i<n_vc+1; i++) { + v_sigma2.clear(); + v_se_sigma2.clear(); + for (size_t i = 0; i < n_vc + 1; i++) { if (noconstrain) { - d=gsl_vector_get(s_fdf->x, i); + d = gsl_vector_get(s_fdf->x, i); } else { - d=exp(gsl_vector_get(s_fdf->x, i)); + d = exp(gsl_vector_get(s_fdf->x, i)); } v_sigma2.push_back(d); if (noconstrain) { - d=-1.0*gsl_matrix_get(Hessian, i, i); + d = -1.0 * gsl_matrix_get(Hessian, i, i); } else { - d=-1.0*d*d*gsl_matrix_get(Hessian, i, i); + d = -1.0 * d * d * gsl_matrix_get(Hessian, i, i); } v_se_sigma2.push_back(sqrt(d)); } - s=0; - for (size_t i=0; i<n_vc; i++) { - s+=v_traceG[i]*v_sigma2[i]; + s = 0; + for (size_t i = 0; i < n_vc; i++) { + s += v_traceG[i] * v_sigma2[i]; } - s+=v_sigma2[n_vc]; + s += v_sigma2[n_vc]; // Compute pve. - v_pve.clear(); pve_total=0; - for (size_t i=0; i<n_vc; i++) { - d=v_traceG[i]*v_sigma2[i]/s; + v_pve.clear(); + pve_total = 0; + for (size_t i = 0; i < n_vc; i++) { + d = v_traceG[i] * v_sigma2[i] / s; v_pve.push_back(d); - pve_total+=d; + pve_total += d; } // Compute se_pve; k=n_vc+1: total. double d1, d2; - v_se_pve.clear(); se_pve_total=0; - for (size_t k=0; k<n_vc+1; k++) { - d=0; - for (size_t i=0; i<n_vc+1; i++) { + v_se_pve.clear(); + se_pve_total = 0; + for (size_t k = 0; k < n_vc + 1; k++) { + d = 0; + for (size_t i = 0; i < n_vc + 1; i++) { if (noconstrain) { - d1=gsl_vector_get(s_fdf->x, i); - d1=1; + d1 = gsl_vector_get(s_fdf->x, i); + d1 = 1; } else { - d1=exp(gsl_vector_get(s_fdf->x, i)); + d1 = exp(gsl_vector_get(s_fdf->x, i)); } - if (k<n_vc) { - if (i==k) { - d1*=v_traceG[k]*(s-v_sigma2[k]*v_traceG[k])/(s*s); - } else if (i==n_vc) { - d1*=-1*v_traceG[k]*v_sigma2[k]/(s*s); - } else { - d1*=-1*v_traceG[i]*v_traceG[k]*v_sigma2[k]/(s*s); - } + if (k < n_vc) { + if (i == k) { + d1 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s); + } else if (i == n_vc) { + d1 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s); + } else { + d1 *= -1 * v_traceG[i] * v_traceG[k] * v_sigma2[k] / (s * s); + } } else { - if (i==k) { - d1*=-1*(s-v_sigma2[n_vc])/(s*s); - } else { - d1*=v_traceG[i]*v_sigma2[n_vc]/(s*s); - } - } - - for (size_t j=0; j<n_vc+1; j++) { - if (noconstrain) { - d2=gsl_vector_get(s_fdf->x, j); - d2=1; - } else { - d2=exp(gsl_vector_get(s_fdf->x, j)); - } - - if (k<n_vc) { - if (j==k) { - d2*=v_traceG[k]*(s-v_sigma2[k]*v_traceG[k])/(s*s); - } else if (j==n_vc) { - d2*=-1*v_traceG[k]*v_sigma2[k]/(s*s); - } else { - d2*=-1*v_traceG[j]*v_traceG[k]*v_sigma2[k]/(s*s); - } - } else { - if (j==k) { - d2*=-1*(s-v_sigma2[n_vc])/(s*s); - } else { - d2*=v_traceG[j]*v_sigma2[n_vc]/(s*s); - } - } - - d+=-1.0*d1*d2*gsl_matrix_get(Hessian, i, j); - } - } - - if (k<n_vc) { - v_se_pve.push_back(sqrt(d) ); + if (i == k) { + d1 *= -1 * (s - v_sigma2[n_vc]) / (s * s); + } else { + d1 *= v_traceG[i] * v_sigma2[n_vc] / (s * s); + } + } + + for (size_t j = 0; j < n_vc + 1; j++) { + if (noconstrain) { + d2 = gsl_vector_get(s_fdf->x, j); + d2 = 1; + } else { + d2 = exp(gsl_vector_get(s_fdf->x, j)); + } + + if (k < n_vc) { + if (j == k) { + d2 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s); + } else if (j == n_vc) { + d2 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s); + } else { + d2 *= -1 * v_traceG[j] * v_traceG[k] * v_sigma2[k] / (s * s); + } + } else { + if (j == k) { + d2 *= -1 * (s - v_sigma2[n_vc]) / (s * s); + } else { + d2 *= v_traceG[j] * v_sigma2[n_vc] / (s * s); + } + } + + d += -1.0 * d1 * d2 * gsl_matrix_get(Hessian, i, j); + } + } + + if (k < n_vc) { + v_se_pve.push_back(sqrt(d)); } else { - se_pve_total=sqrt(d); + se_pve_total = sqrt(d); } } @@ -1758,252 +1923,265 @@ void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K, } // Ks are not scaled. -void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W, - const gsl_vector *y) { - size_t n1=K->size1, n2=K->size2; - size_t n_vc=n2/n1; +void VC::CalcVCacl(const gsl_matrix *K, const gsl_matrix *W, + const gsl_vector *y) { + size_t n1 = K->size1, n2 = K->size2; + size_t n_vc = n2 / n1; double d, y2_sum, tau_inv, se_tau_inv; // New matrices/vectors. - gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2); - gsl_vector *y_scale=gsl_vector_alloc (n1); - gsl_vector *y2=gsl_vector_alloc (n1); - gsl_vector *n1_vec=gsl_vector_alloc (n1); - gsl_matrix *Ay=gsl_matrix_alloc (n1, n_vc); - gsl_matrix *K2=gsl_matrix_alloc (n1, n_vc*n_vc); - gsl_matrix *K_tmp=gsl_matrix_alloc (n1, n1); - gsl_matrix *V_mat=gsl_matrix_alloc (n1, n1); + gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2); + gsl_vector *y_scale = gsl_vector_alloc(n1); + gsl_vector *y2 = gsl_vector_alloc(n1); + gsl_vector *n1_vec = gsl_vector_alloc(n1); + gsl_matrix *Ay = gsl_matrix_alloc(n1, n_vc); + gsl_matrix *K2 = gsl_matrix_alloc(n1, n_vc * n_vc); + gsl_matrix *K_tmp = gsl_matrix_alloc(n1, n1); + gsl_matrix *V_mat = gsl_matrix_alloc(n1, n1); // Old matrices/vectors. - gsl_vector *pve=gsl_vector_alloc (n_vc); - gsl_vector *se_pve=gsl_vector_alloc (n_vc); - gsl_vector *q_vec=gsl_vector_alloc (n_vc); - gsl_matrix *S1=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *S2=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *J_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_vector *pve = gsl_vector_alloc(n_vc); + gsl_vector *se_pve = gsl_vector_alloc(n_vc); + gsl_vector *q_vec = gsl_vector_alloc(n_vc); + gsl_matrix *S1 = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *S2 = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *J_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc); int sig; - gsl_permutation * pmt=gsl_permutation_alloc (n_vc); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc); // Center and scale K by W, and standardize K further so that all // diagonal elements are 1 - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { gsl_matrix_view Kscale_sub = - gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1); + gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1); gsl_matrix_const_view K_sub = - gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1); - gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix); + gsl_matrix_const_submatrix(K, 0, n1 * i, n1, n1); + gsl_matrix_memcpy(&Kscale_sub.matrix, &K_sub.matrix); - CenterMatrix (&Kscale_sub.matrix, W); - StandardizeMatrix (&Kscale_sub.matrix); + CenterMatrix(&Kscale_sub.matrix, W); + StandardizeMatrix(&Kscale_sub.matrix); } // Center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1) - gsl_vector_memcpy (y_scale, y); - CenterVector (y_scale, W); + gsl_vector_memcpy(y_scale, y); + CenterVector(y_scale, W); // Compute y^2 and sum(y^2), which is also the variance of y*n1. - gsl_vector_memcpy (y2, y_scale); - gsl_vector_mul (y2, y_scale); + gsl_vector_memcpy(y2, y_scale); + gsl_vector_mul(y2, y_scale); - y2_sum=0; - for (size_t i=0; i<y2->size; i++) { - y2_sum+=gsl_vector_get(y2, i); + y2_sum = 0; + for (size_t i = 0; i < y2->size; i++) { + y2_sum += gsl_vector_get(y2, i); } // Compute the n_vc size q vector. - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { gsl_matrix_const_view Kscale_sub = - gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1); + gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1); - gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, - 0.0, n1_vec); + gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, n1_vec); - gsl_blas_ddot (n1_vec, y_scale, &d); - gsl_vector_set(q_vec, i, d-y2_sum); + gsl_blas_ddot(n1_vec, y_scale, &d); + gsl_vector_set(q_vec, i, d - y2_sum); } // Compute the n_vc by n_vc S1 and S2 matrix (and eventually // S=S1-\tau^{-1}S2). - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { gsl_matrix_const_view Kscale_sub1 = - gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1); + gsl_matrix_const_submatrix(K_scale, 0, n1 * i, n1, n1); - for (size_t j=i; j<n_vc; j++) { + for (size_t j = i; j < n_vc; j++) { gsl_matrix_const_view Kscale_sub2 = - gsl_matrix_const_submatrix (K_scale, 0, n1*j, n1, n1); + gsl_matrix_const_submatrix(K_scale, 0, n1 * j, n1, n1); - gsl_matrix_memcpy (K_tmp, &Kscale_sub1.matrix); - gsl_matrix_mul_elements (K_tmp, &Kscale_sub2.matrix); + gsl_matrix_memcpy(K_tmp, &Kscale_sub1.matrix); + gsl_matrix_mul_elements(K_tmp, &Kscale_sub2.matrix); gsl_vector_set_zero(n1_vec); - for (size_t t=0; t<K_tmp->size1; t++) { - gsl_vector_view Ktmp_col=gsl_matrix_column (K_tmp, t); - gsl_vector_add (n1_vec, &Ktmp_col.vector); + for (size_t t = 0; t < K_tmp->size1; t++) { + gsl_vector_view Ktmp_col = gsl_matrix_column(K_tmp, t); + gsl_vector_add(n1_vec, &Ktmp_col.vector); } - gsl_vector_add_constant (n1_vec, -1.0); + gsl_vector_add_constant(n1_vec, -1.0); // Compute S1. - gsl_blas_ddot (n1_vec, y2, &d); - gsl_matrix_set (S1, i, j, 2*d); - if (i!=j) {gsl_matrix_set (S1, j, i, 2*d);} + gsl_blas_ddot(n1_vec, y2, &d); + gsl_matrix_set(S1, i, j, 2 * d); + if (i != j) { + gsl_matrix_set(S1, j, i, 2 * d); + } // Compute S2. - d=0; - for (size_t t=0; t<n1_vec->size; t++) { - d+=gsl_vector_get (n1_vec, t); + d = 0; + for (size_t t = 0; t < n1_vec->size; t++) { + d += gsl_vector_get(n1_vec, t); + } + gsl_matrix_set(S2, i, j, d); + if (i != j) { + gsl_matrix_set(S2, j, i, d); } - gsl_matrix_set (S2, i, j, d); - if (i!=j) {gsl_matrix_set (S2, j, i, d);} // Save information to compute J. - gsl_vector_view K2col1=gsl_matrix_column (K2, n_vc*i+j); - gsl_vector_view K2col2=gsl_matrix_column (K2, n_vc*j+i); + gsl_vector_view K2col1 = gsl_matrix_column(K2, n_vc * i + j); + gsl_vector_view K2col2 = gsl_matrix_column(K2, n_vc * j + i); gsl_vector_memcpy(&K2col1.vector, n1_vec); - if (i!=j) {gsl_vector_memcpy(&K2col2.vector, n1_vec);} + if (i != j) { + gsl_vector_memcpy(&K2col2.vector, n1_vec); + } } } // Iterate to solve tau and h's. - size_t it=0; - double s=1; - while (abs(s)>1e-3 && it<100) { + size_t it = 0; + double s = 1; + while (abs(s) > 1e-3 && it < 100) { // Update tau_inv. - gsl_blas_ddot (q_vec, pve, &d); - if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;} - tau_inv=y2_sum/(double)n1-d/((double)n1*((double)n1-1)); - if (it>0) {s/=tau_inv;} + gsl_blas_ddot(q_vec, pve, &d); + if (it > 0) { + s = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1)) - tau_inv; + } + tau_inv = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1)); + if (it > 0) { + s /= tau_inv; + } // Update S. - gsl_matrix_memcpy (S_mat, S2); - gsl_matrix_scale (S_mat, -1*tau_inv); - gsl_matrix_add (S_mat, S1); + gsl_matrix_memcpy(S_mat, S2); + gsl_matrix_scale(S_mat, -1 * tau_inv); + gsl_matrix_add(S_mat, S1); // Update h=S^{-1}q. int sig; - gsl_permutation * pmt=gsl_permutation_alloc (n_vc); - LUDecomp (S_mat, pmt, &sig); - LUInvert (S_mat, pmt, Si_mat); - gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc); + LUDecomp(S_mat, pmt, &sig); + LUInvert(S_mat, pmt, Si_mat); + gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); it++; } // Compute V matrix and A matrix (K_scale is destroyed, so need to // compute V first). - gsl_matrix_set_zero (V_mat); - for (size_t i=0; i<n_vc; i++) { + gsl_matrix_set_zero(V_mat); + for (size_t i = 0; i < n_vc; i++) { gsl_matrix_view Kscale_sub = - gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1); + gsl_matrix_submatrix(K_scale, 0, n1 * i, n1, n1); // Compute V. - gsl_matrix_memcpy (K_tmp, &Kscale_sub.matrix); - gsl_matrix_scale (K_tmp, gsl_vector_get(pve, i)); - gsl_matrix_add (V_mat, K_tmp); + gsl_matrix_memcpy(K_tmp, &Kscale_sub.matrix); + gsl_matrix_scale(K_tmp, gsl_vector_get(pve, i)); + gsl_matrix_add(V_mat, K_tmp); // Compute A; the corresponding Kscale is destroyed. gsl_matrix_const_view K2_sub = - gsl_matrix_const_submatrix (K2, 0, n_vc*i, n1, n_vc); - gsl_blas_dgemv (CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec); + gsl_matrix_const_submatrix(K2, 0, n_vc * i, n1, n_vc); + gsl_blas_dgemv(CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec); - for (size_t t=0; t<n1; t++) { - gsl_matrix_set (K_scale, t, n1*i+t, gsl_vector_get(n1_vec, t) ); + for (size_t t = 0; t < n1; t++) { + gsl_matrix_set(K_scale, t, n1 * i + t, gsl_vector_get(n1_vec, t)); } // Compute Ay. - gsl_vector_view Ay_col=gsl_matrix_column (Ay, i); - gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, - 0.0, &Ay_col.vector); + gsl_vector_view Ay_col = gsl_matrix_column(Ay, i); + gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, + &Ay_col.vector); } - gsl_matrix_scale (V_mat, tau_inv); + gsl_matrix_scale(V_mat, tau_inv); // Compute J matrix. - for (size_t i=0; i<n_vc; i++) { - gsl_vector_view Ay_col1=gsl_matrix_column (Ay, i); + for (size_t i = 0; i < n_vc; i++) { + gsl_vector_view Ay_col1 = gsl_matrix_column(Ay, i); gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, &Ay_col1.vector, 0.0, n1_vec); - for (size_t j=i; j<n_vc; j++) { - gsl_vector_view Ay_col2=gsl_matrix_column (Ay, j); + for (size_t j = i; j < n_vc; j++) { + gsl_vector_view Ay_col2 = gsl_matrix_column(Ay, j); - gsl_blas_ddot (&Ay_col2.vector, n1_vec, &d); - gsl_matrix_set (J_mat, i, j, 2.0*d); - if (i!=j) {gsl_matrix_set (J_mat, j, i, 2.0*d);} + gsl_blas_ddot(&Ay_col2.vector, n1_vec, &d); + gsl_matrix_set(J_mat, i, j, 2.0 * d); + if (i != j) { + gsl_matrix_set(J_mat, j, i, 2.0 * d); + } } } // Compute H^{-1}JH^{-1} as V(\hat h), where H=S2*tau_inv; this is // stored in Var_mat. - gsl_matrix_memcpy (S_mat, S2); - gsl_matrix_scale (S_mat, tau_inv); + gsl_matrix_memcpy(S_mat, S2); + gsl_matrix_scale(S_mat, tau_inv); - LUDecomp (S_mat, pmt, &sig); - LUInvert (S_mat, pmt, Si_mat); + LUDecomp(S_mat, pmt, &sig); + LUInvert(S_mat, pmt, Si_mat); gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, J_mat, 0.0, S_mat); gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, S_mat, Si_mat, 0.0, Var_mat); // Compute variance for tau_inv. gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, y_scale, 0.0, n1_vec); - gsl_blas_ddot (y_scale, n1_vec, &d); - se_tau_inv=sqrt(2*d)/(double)n1; + gsl_blas_ddot(y_scale, n1_vec, &d); + se_tau_inv = sqrt(2 * d) / (double)n1; // Transform pve back to the original scale and save data. - v_pve.clear(); v_se_pve.clear(); - v_sigma2.clear(); v_se_sigma2.clear(); + v_pve.clear(); + v_se_pve.clear(); + v_sigma2.clear(); + v_se_sigma2.clear(); - pve_total=0, se_pve_total=0; - for (size_t i=0; i<n_vc; i++) { - d=gsl_vector_get (pve, i); - pve_total+=d; + pve_total = 0, se_pve_total = 0; + for (size_t i = 0; i < n_vc; i++) { + d = gsl_vector_get(pve, i); + pve_total += d; v_pve.push_back(d); - v_sigma2.push_back(d*tau_inv/v_traceG[i] ); + v_sigma2.push_back(d * tau_inv / v_traceG[i]); - d=sqrt(gsl_matrix_get (Var_mat, i, i)); + d = sqrt(gsl_matrix_get(Var_mat, i, i)); v_se_pve.push_back(d); - v_se_sigma2.push_back(d*tau_inv/v_traceG[i]); + v_se_sigma2.push_back(d * tau_inv / v_traceG[i]); - for (size_t j=0; j<n_vc; j++) { - se_pve_total+=gsl_matrix_get(Var_mat, i, j); + for (size_t j = 0; j < n_vc; j++) { + se_pve_total += gsl_matrix_get(Var_mat, i, j); } } - v_sigma2.push_back( (1-pve_total)*tau_inv ); - v_se_sigma2.push_back(sqrt(se_pve_total)*tau_inv ); - se_pve_total=sqrt(se_pve_total); + v_sigma2.push_back((1 - pve_total) * tau_inv); + v_se_sigma2.push_back(sqrt(se_pve_total) * tau_inv); + se_pve_total = sqrt(se_pve_total); - cout<<"sigma2 = "; - for (size_t i=0; i<n_vc+1; i++) { - cout<<v_sigma2[i]<<" "; + cout << "sigma2 = "; + for (size_t i = 0; i < n_vc + 1; i++) { + cout << v_sigma2[i] << " "; } - cout<<endl; + cout << endl; - cout<<"se(sigma2) = "; - for (size_t i=0; i<n_vc+1; i++) { - cout<<v_se_sigma2[i]<<" "; + cout << "se(sigma2) = "; + for (size_t i = 0; i < n_vc + 1; i++) { + cout << v_se_sigma2[i] << " "; } - cout<<endl; + cout << endl; - cout<<"pve = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_pve[i]<<" "; + cout << "pve = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_pve[i] << " "; } - cout<<endl; + cout << endl; - cout<<"se(pve) = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_se_pve[i]<<" "; + cout << "se(pve) = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_se_pve[i] << " "; } - cout<<endl; + cout << endl; - if (n_vc>1) { - cout<<"total pve = "<<pve_total<<endl; - cout<<"se(total pve) = "<<se_pve_total<<endl; + if (n_vc > 1) { + cout << "total pve = " << pve_total << endl; + cout << "se(total pve) = " << se_pve_total << endl; } gsl_permutation_free(pmt); @@ -2031,234 +2209,248 @@ void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W, } // Read bimbam mean genotype file and compute XWz. -bool BimbamXwz (const string &file_geno, const int display_pace, - vector<int> &indicator_idv, vector<int> &indicator_snp, - const vector<size_t> &vec_cat, const gsl_vector *w, - const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return false; - } - - string line; - char *ch_ptr; - - size_t n_miss; - double d, geno_mean, geno_var; - - size_t ni_test=XWz->size1; - gsl_vector *geno=gsl_vector_alloc (ni_test); - gsl_vector *geno_miss=gsl_vector_alloc (ni_test); - gsl_vector *wz=gsl_vector_alloc (w->size); - gsl_vector_memcpy (wz, z); - gsl_vector_mul(wz, w); - - for (size_t t=0; t<indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); - if (t%display_pace==0 || t==(indicator_snp.size()-1)) { - ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - geno_mean=0.0; n_miss=0; geno_var=0.0; - gsl_vector_set_all(geno_miss, 0); - - size_t j=0; - for (size_t i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0) {continue;} - ch_ptr=strtok (NULL, " , \t"); - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(geno_miss, i, 0); - n_miss++; - } else { - d=atof(ch_ptr); - gsl_vector_set (geno, j, d); - gsl_vector_set (geno_miss, j, 1); - geno_mean+=d; - geno_var+=d*d; - } - j++; - } - - geno_mean/=(double)(ni_test-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_test; - geno_var-=geno_mean*geno_mean; - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (geno_miss, i)==0) { - gsl_vector_set(geno, i, geno_mean); - } - } - - gsl_vector_add_constant (geno, -1.0*geno_mean); - - gsl_vector_view XWz_col= - gsl_matrix_column(XWz, vec_cat[ns_test]); - d=gsl_vector_get (wz, ns_test); - gsl_blas_daxpy (d/sqrt(geno_var), geno, &XWz_col.vector); - - ns_test++; - } - - cout<<endl; - - gsl_vector_free (geno); - gsl_vector_free (geno_miss); - gsl_vector_free (wz); - - infile.close(); - infile.clear(); - - return true; +bool BimbamXwz(const string &file_geno, const int display_pace, + vector<int> &indicator_idv, vector<int> &indicator_snp, + const vector<size_t> &vec_cat, const gsl_vector *w, + const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + string line; + char *ch_ptr; + + size_t n_miss; + double d, geno_mean, geno_var; + + size_t ni_test = XWz->size1; + gsl_vector *geno = gsl_vector_alloc(ni_test); + gsl_vector *geno_miss = gsl_vector_alloc(ni_test); + gsl_vector *wz = gsl_vector_alloc(w->size); + gsl_vector_memcpy(wz, z); + gsl_vector_mul(wz, w); + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + geno_mean = 0.0; + n_miss = 0; + geno_var = 0.0; + gsl_vector_set_all(geno_miss, 0); + + size_t j = 0; + for (size_t i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ch_ptr = strtok(NULL, " , \t"); + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(geno_miss, i, 0); + n_miss++; + } else { + d = atof(ch_ptr); + gsl_vector_set(geno, j, d); + gsl_vector_set(geno_miss, j, 1); + geno_mean += d; + geno_var += d * d; + } + j++; + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(geno_miss, i) == 0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]); + d = gsl_vector_get(wz, ns_test); + gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector); + + ns_test++; + } + + cout << endl; + + gsl_vector_free(geno); + gsl_vector_free(geno_miss); + gsl_vector_free(wz); + + infile.close(); + infile.clear(); + + return true; } // Read PLINK bed file and compute XWz. -bool PlinkXwz (const string &file_bed, const int display_pace, - vector<int> &indicator_idv, vector<int> &indicator_snp, - const vector<size_t> &vec_cat, const gsl_vector *w, - const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return false; - } - - char ch[1]; - bitset<8> b; - - size_t n_miss, ci_total, ci_test; - double d, geno_mean, geno_var; - - size_t ni_test=XWz->size1; - size_t ni_total=indicator_idv.size(); - gsl_vector *geno=gsl_vector_alloc (ni_test); - gsl_vector *wz=gsl_vector_alloc (w->size); - gsl_vector_memcpy (wz, z); - gsl_vector_mul(wz, w); - - int n_bit; - - // Calculate n_bit and c, the number of bit for each snp. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (t%display_pace==0 || t==(indicator_snp.size()-1)) { - ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && ci_total==ni_total) { - break; - } - if (indicator_idv[ci_total]==0) { - ci_total++; - continue; - } - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(geno, ci_test, 2.0); - geno_mean+=2.0; geno_var+=4.0; - } - else { - gsl_vector_set(geno, ci_test, 1.0); - geno_mean+=1.0; geno_var+=1.0; - } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(geno, ci_test, 0.0); - } - else { - gsl_vector_set(geno, ci_test, -9.0); - n_miss++; - } - } - - ci_test++; - ci_total++; - } - } - - geno_mean/=(double)(ni_test-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_test; - geno_var-=geno_mean*geno_mean; - - for (size_t i=0; i<ni_test; ++i) { - d=gsl_vector_get(geno,i); - if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);} - } - - gsl_vector_add_constant (geno, -1.0*geno_mean); - - gsl_vector_view XWz_col= - gsl_matrix_column(XWz, vec_cat[ns_test]); - d=gsl_vector_get (wz, ns_test); - gsl_blas_daxpy (d/sqrt(geno_var), geno, &XWz_col.vector); - - ns_test++; - } - cout<<endl; - - gsl_vector_free (geno); - gsl_vector_free (wz); - - infile.close(); - infile.clear(); - - return true; +bool PlinkXwz(const string &file_bed, const int display_pace, + vector<int> &indicator_idv, vector<int> &indicator_snp, + const vector<size_t> &vec_cat, const gsl_vector *w, + const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + char ch[1]; + bitset<8> b; + + size_t n_miss, ci_total, ci_test; + double d, geno_mean, geno_var; + + size_t ni_test = XWz->size1; + size_t ni_total = indicator_idv.size(); + gsl_vector *geno = gsl_vector_alloc(ni_test); + gsl_vector *wz = gsl_vector_alloc(w->size); + gsl_vector_memcpy(wz, z); + gsl_vector_mul(wz, w); + + int n_bit; + + // Calculate n_bit and c, the number of bit for each snp. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + geno_mean = 0.0; + n_miss = 0; + ci_total = 0; + geno_var = 0.0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(geno, ci_test, 2.0); + geno_mean += 2.0; + geno_var += 4.0; + } else { + gsl_vector_set(geno, ci_test, 1.0); + geno_mean += 1.0; + geno_var += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(geno, ci_test, 0.0); + } else { + gsl_vector_set(geno, ci_test, -9.0); + n_miss++; + } + } + + ci_test++; + ci_total++; + } + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + d = gsl_vector_get(geno, i); + if (d == -9.0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]); + d = gsl_vector_get(wz, ns_test); + gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector); + + ns_test++; + } + cout << endl; + + gsl_vector_free(geno); + gsl_vector_free(wz); + + infile.close(); + infile.clear(); + + return true; } // Read multiple genotype files and compute XWz. -bool MFILEXwz (const size_t mfile_mode, const string &file_mfile, - const int display_pace, vector<int> &indicator_idv, - vector<vector<int> > &mindicator_snp, - const vector<size_t> &vec_cat, const gsl_vector *w, - const gsl_vector *z, gsl_matrix *XWz) { +bool MFILEXwz(const size_t mfile_mode, const string &file_mfile, + const int display_pace, vector<int> &indicator_idv, + vector<vector<int>> &mindicator_snp, + const vector<size_t> &vec_cat, const gsl_vector *w, + const gsl_vector *z, gsl_matrix *XWz) { gsl_matrix_set_zero(XWz); - igzstream infile (file_mfile.c_str(), igzstream::in); + igzstream infile(file_mfile.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open mfile file: "<<file_mfile<<endl; + cout << "error! fail to open mfile file: " << file_mfile << endl; return false; } string file_name; - size_t l=0, ns_test=0; + size_t l = 0, ns_test = 0; while (!safeGetline(infile, file_name).eof()) { - if (mfile_mode==1) { - file_name+=".bed"; - PlinkXwz (file_name, display_pace, indicator_idv, mindicator_snp[l], - vec_cat, w, z, ns_test, XWz); + if (mfile_mode == 1) { + file_name += ".bed"; + PlinkXwz(file_name, display_pace, indicator_idv, mindicator_snp[l], + vec_cat, w, z, ns_test, XWz); } else { - BimbamXwz (file_name, display_pace, indicator_idv, mindicator_snp[l], - vec_cat, w, z, ns_test, XWz); + BimbamXwz(file_name, display_pace, indicator_idv, mindicator_snp[l], + vec_cat, w, z, ns_test, XWz); } l++; @@ -2271,228 +2463,241 @@ bool MFILEXwz (const size_t mfile_mode, const string &file_mfile, } // Read bimbam mean genotype file and compute X_i^TX_jWz. -bool BimbamXtXwz (const string &file_geno, const int display_pace, - vector<int> &indicator_idv, vector<int> &indicator_snp, - const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return false; - } - - string line; - char *ch_ptr; - - size_t n_miss; - double d, geno_mean, geno_var; - - size_t ni_test=XWz->size1; - gsl_vector *geno=gsl_vector_alloc (ni_test); - gsl_vector *geno_miss=gsl_vector_alloc (ni_test); - - for (size_t t=0; t<indicator_snp.size(); ++t) { - !safeGetline(infile, line).eof(); - if (t%display_pace==0 || t==(indicator_snp.size()-1)) { - ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1); - } - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - geno_mean=0.0; n_miss=0; geno_var=0.0; - gsl_vector_set_all(geno_miss, 0); - - size_t j=0; - for (size_t i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0) {continue;} - ch_ptr=strtok (NULL, " , \t"); - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(geno_miss, i, 0); - n_miss++; - } - else { - d=atof(ch_ptr); - gsl_vector_set (geno, j, d); - gsl_vector_set (geno_miss, j, 1); - geno_mean+=d; - geno_var+=d*d; - } - j++; - } - - geno_mean/=(double)(ni_test-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_test; - geno_var-=geno_mean*geno_mean; - - for (size_t i=0; i<ni_test; ++i) { - if (gsl_vector_get (geno_miss, i)==0) { - gsl_vector_set(geno, i, geno_mean); - } - } - - gsl_vector_add_constant (geno, -1.0*geno_mean); - - for (size_t i=0; i<XWz->size2; i++) { - gsl_vector_const_view XWz_col= - gsl_matrix_const_column(XWz, i); - gsl_blas_ddot (geno, &XWz_col.vector, &d); - gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var)); - } - - ns_test++; - } - - cout<<endl; - - gsl_vector_free (geno); - gsl_vector_free (geno_miss); - - infile.close(); - infile.clear(); - - return true; +bool BimbamXtXwz(const string &file_geno, const int display_pace, + vector<int> &indicator_idv, vector<int> &indicator_snp, + const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + string line; + char *ch_ptr; + + size_t n_miss; + double d, geno_mean, geno_var; + + size_t ni_test = XWz->size1; + gsl_vector *geno = gsl_vector_alloc(ni_test); + gsl_vector *geno_miss = gsl_vector_alloc(ni_test); + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + geno_mean = 0.0; + n_miss = 0; + geno_var = 0.0; + gsl_vector_set_all(geno_miss, 0); + + size_t j = 0; + for (size_t i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ch_ptr = strtok(NULL, " , \t"); + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(geno_miss, i, 0); + n_miss++; + } else { + d = atof(ch_ptr); + gsl_vector_set(geno, j, d); + gsl_vector_set(geno_miss, j, 1); + geno_mean += d; + geno_var += d * d; + } + j++; + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(geno_miss, i) == 0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + for (size_t i = 0; i < XWz->size2; i++) { + gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i); + gsl_blas_ddot(geno, &XWz_col.vector, &d); + gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var)); + } + + ns_test++; + } + + cout << endl; + + gsl_vector_free(geno); + gsl_vector_free(geno_miss); + + infile.close(); + infile.clear(); + + return true; } // Read PLINK bed file and compute XWz. -bool PlinkXtXwz (const string &file_bed, const int display_pace, - vector<int> &indicator_idv, vector<int> &indicator_snp, - const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return false; - } - - char ch[1]; - bitset<8> b; - - size_t n_miss, ci_total, ci_test; - double d, geno_mean, geno_var; - - size_t ni_test=XWz->size1; - size_t ni_total=indicator_idv.size(); - gsl_vector *geno=gsl_vector_alloc (ni_test); - - int n_bit; - - // Calculate n_bit and c, the number of bit for each snp. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (size_t t=0; t<indicator_snp.size(); ++t) { - if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);} - if (indicator_snp[t]==0) {continue;} - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0; - for (int i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0; - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && ci_total==ni_total) { - break; - } - if (indicator_idv[ci_total]==0) { - ci_total++; - continue; - } - - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(geno, ci_test, 2.0); - geno_mean+=2.0; - geno_var+=4.0; - } - else { - gsl_vector_set(geno, ci_test, 1.0); - geno_mean+=1.0; - geno_var+=1.0; - } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(geno, ci_test, 0.0); - } - else { - gsl_vector_set(geno, ci_test, -9.0); - n_miss++; - } - } - - ci_test++; - ci_total++; - } - } - - geno_mean/=(double)(ni_test-n_miss); - geno_var+=geno_mean*geno_mean*(double)n_miss; - geno_var/=(double)ni_test; - geno_var-=geno_mean*geno_mean; - - for (size_t i=0; i<ni_test; ++i) { - d=gsl_vector_get(geno,i); - if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);} - } - - gsl_vector_add_constant (geno, -1.0*geno_mean); - - for (size_t i=0; i<XWz->size2; i++) { - gsl_vector_const_view XWz_col= - gsl_matrix_const_column(XWz, i); - gsl_blas_ddot (geno, &XWz_col.vector, &d); - gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var)); - } - - ns_test++; - } - cout<<endl; - - gsl_vector_free (geno); - - infile.close(); - infile.clear(); - - return true; +bool PlinkXtXwz(const string &file_bed, const int display_pace, + vector<int> &indicator_idv, vector<int> &indicator_snp, + const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + char ch[1]; + bitset<8> b; + + size_t n_miss, ci_total, ci_test; + double d, geno_mean, geno_var; + + size_t ni_test = XWz->size1; + size_t ni_total = indicator_idv.size(); + gsl_vector *geno = gsl_vector_alloc(ni_test); + + int n_bit; + + // Calculate n_bit and c, the number of bit for each snp. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + geno_mean = 0.0; + n_miss = 0; + ci_total = 0; + geno_var = 0.0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0; + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(geno, ci_test, 2.0); + geno_mean += 2.0; + geno_var += 4.0; + } else { + gsl_vector_set(geno, ci_test, 1.0); + geno_mean += 1.0; + geno_var += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(geno, ci_test, 0.0); + } else { + gsl_vector_set(geno, ci_test, -9.0); + n_miss++; + } + } + + ci_test++; + ci_total++; + } + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + d = gsl_vector_get(geno, i); + if (d == -9.0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + for (size_t i = 0; i < XWz->size2; i++) { + gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i); + gsl_blas_ddot(geno, &XWz_col.vector, &d); + gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var)); + } + + ns_test++; + } + cout << endl; + + gsl_vector_free(geno); + + infile.close(); + infile.clear(); + + return true; } // Read multiple genotype files and compute XWz. -bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile, - const int display_pace, vector<int> &indicator_idv, - vector<vector<int> > &mindicator_snp, const gsl_matrix *XWz, - gsl_matrix *XtXWz) { +bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile, + const int display_pace, vector<int> &indicator_idv, + vector<vector<int>> &mindicator_snp, const gsl_matrix *XWz, + gsl_matrix *XtXWz) { gsl_matrix_set_zero(XtXWz); - igzstream infile (file_mfile.c_str(), igzstream::in); + igzstream infile(file_mfile.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open mfile file: "<<file_mfile<<endl; + cout << "error! fail to open mfile file: " << file_mfile << endl; return false; } string file_name; - size_t l=0, ns_test=0; + size_t l = 0, ns_test = 0; while (!safeGetline(infile, file_name).eof()) { - if (mfile_mode==1) { - file_name+=".bed"; - PlinkXtXwz (file_name, display_pace, indicator_idv, mindicator_snp[l], - XWz, ns_test, XtXWz); + if (mfile_mode == 1) { + file_name += ".bed"; + PlinkXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l], XWz, + ns_test, XtXWz); } else { - BimbamXtXwz (file_name, display_pace, indicator_idv, mindicator_snp[l], - XWz, ns_test, XtXWz); + BimbamXtXwz(file_name, display_pace, indicator_idv, mindicator_snp[l], + XWz, ns_test, XtXWz); } l++; @@ -2506,217 +2711,225 @@ bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile, // Compute confidence intervals from summary statistics. void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz, - const gsl_matrix *XtXWz, const gsl_matrix *S_mat, - const gsl_matrix *Svar_mat, const gsl_vector *w, - const gsl_vector *z, const gsl_vector *s_vec, - const vector<size_t> &vec_cat, const vector<double> &v_pve, - vector<double> &v_se_pve, double &pve_total, - double &se_pve_total, vector<double> &v_sigma2, - vector<double> &v_se_sigma2, vector<double> &v_enrich, - vector<double> &v_se_enrich) { - size_t n_vc=XWz->size2, ns_test=w->size, ni_test=XWz->size1; + const gsl_matrix *XtXWz, const gsl_matrix *S_mat, + const gsl_matrix *Svar_mat, const gsl_vector *w, + const gsl_vector *z, const gsl_vector *s_vec, + const vector<size_t> &vec_cat, const vector<double> &v_pve, + vector<double> &v_se_pve, double &pve_total, double &se_pve_total, + vector<double> &v_sigma2, vector<double> &v_se_sigma2, + vector<double> &v_enrich, vector<double> &v_se_enrich) { + size_t n_vc = XWz->size2, ns_test = w->size, ni_test = XWz->size1; // Set up matrices. - gsl_vector *w_pve=gsl_vector_alloc (ns_test); - gsl_vector *wz=gsl_vector_alloc (ns_test); - gsl_vector *zwz=gsl_vector_alloc (n_vc); - gsl_vector *zz=gsl_vector_alloc (n_vc); - gsl_vector *Xz_pve=gsl_vector_alloc (ni_test); - gsl_vector *WXtXWz=gsl_vector_alloc (ns_test); - - gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_vector *w_pve = gsl_vector_alloc(ns_test); + gsl_vector *wz = gsl_vector_alloc(ns_test); + gsl_vector *zwz = gsl_vector_alloc(n_vc); + gsl_vector *zz = gsl_vector_alloc(n_vc); + gsl_vector *Xz_pve = gsl_vector_alloc(ni_test); + gsl_vector *WXtXWz = gsl_vector_alloc(ns_test); + + gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc); double d, s0, s1, s, s_pve, s_snp; // Compute wz and zwz. - gsl_vector_memcpy (wz, z); - gsl_vector_mul (wz, w); + gsl_vector_memcpy(wz, z); + gsl_vector_mul(wz, w); - gsl_vector_set_zero (zwz); - gsl_vector_set_zero (zz); - for (size_t i=0; i<w->size; i++) { - d=gsl_vector_get (wz, i)*gsl_vector_get (z, i); - d+=gsl_vector_get (zwz, vec_cat[i]); - gsl_vector_set (zwz, vec_cat[i], d); + gsl_vector_set_zero(zwz); + gsl_vector_set_zero(zz); + for (size_t i = 0; i < w->size; i++) { + d = gsl_vector_get(wz, i) * gsl_vector_get(z, i); + d += gsl_vector_get(zwz, vec_cat[i]); + gsl_vector_set(zwz, vec_cat[i], d); - d=gsl_vector_get (z, i)*gsl_vector_get (z, i); - d+=gsl_vector_get (zz, vec_cat[i]); - gsl_vector_set (zz, vec_cat[i], d); + d = gsl_vector_get(z, i) * gsl_vector_get(z, i); + d += gsl_vector_get(zz, vec_cat[i]); + gsl_vector_set(zz, vec_cat[i], d); } // Compute wz, ve and Xz_pve. - gsl_vector_set_zero (Xz_pve); s_pve=0; s_snp=0; - for (size_t i=0; i<n_vc; i++) { - s_pve+=v_pve[i]; - s_snp+=gsl_vector_get(s_vec, i); + gsl_vector_set_zero(Xz_pve); + s_pve = 0; + s_snp = 0; + for (size_t i = 0; i < n_vc; i++) { + s_pve += v_pve[i]; + s_snp += gsl_vector_get(s_vec, i); - gsl_vector_const_view Xz_col=gsl_matrix_const_column (Xz, i); - gsl_blas_daxpy (v_pve[i]/gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve); + gsl_vector_const_view Xz_col = gsl_matrix_const_column(Xz, i); + gsl_blas_daxpy(v_pve[i] / gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve); } // Set up wpve vector. - for (size_t i=0; i<w->size; i++) { - d=v_pve[vec_cat[i]]/gsl_vector_get(s_vec, vec_cat[i]); - gsl_vector_set (w_pve, i, d); + for (size_t i = 0; i < w->size; i++) { + d = v_pve[vec_cat[i]] / gsl_vector_get(s_vec, vec_cat[i]); + gsl_vector_set(w_pve, i, d); } // Compute Vq (in qvar_mat). - s0=1-s_pve; - for (size_t i=0; i<n_vc; i++) { - s0+=gsl_vector_get (zz, i)*v_pve[i]/gsl_vector_get(s_vec, i); + s0 = 1 - s_pve; + for (size_t i = 0; i < n_vc; i++) { + s0 += gsl_vector_get(zz, i) * v_pve[i] / gsl_vector_get(s_vec, i); } - for (size_t i=0; i<n_vc; i++) { - s1=s0; - s1-=gsl_vector_get (zwz, i)*(1-s_pve)/gsl_vector_get(s_vec, i); + for (size_t i = 0; i < n_vc; i++) { + s1 = s0; + s1 -= gsl_vector_get(zwz, i) * (1 - s_pve) / gsl_vector_get(s_vec, i); - gsl_vector_const_view XWz_col1=gsl_matrix_const_column (XWz, i); - gsl_vector_const_view XtXWz_col1=gsl_matrix_const_column (XtXWz, i); + gsl_vector_const_view XWz_col1 = gsl_matrix_const_column(XWz, i); + gsl_vector_const_view XtXWz_col1 = gsl_matrix_const_column(XtXWz, i); - gsl_vector_memcpy (WXtXWz, &XtXWz_col1.vector); - gsl_vector_mul (WXtXWz, w_pve); + gsl_vector_memcpy(WXtXWz, &XtXWz_col1.vector); + gsl_vector_mul(WXtXWz, w_pve); - gsl_blas_ddot (Xz_pve, &XWz_col1.vector, &d); - s1-=d/gsl_vector_get(s_vec, i); + gsl_blas_ddot(Xz_pve, &XWz_col1.vector, &d); + s1 -= d / gsl_vector_get(s_vec, i); - for (size_t j=0; j<n_vc; j++) { - s=s1; + for (size_t j = 0; j < n_vc; j++) { + s = s1; - s-=gsl_vector_get (zwz, j)*(1-s_pve)/gsl_vector_get(s_vec, j); + s -= gsl_vector_get(zwz, j) * (1 - s_pve) / gsl_vector_get(s_vec, j); - gsl_vector_const_view XWz_col2=gsl_matrix_const_column (XWz, j); - gsl_vector_const_view XtXWz_col2=gsl_matrix_const_column (XtXWz, j); + gsl_vector_const_view XWz_col2 = gsl_matrix_const_column(XWz, j); + gsl_vector_const_view XtXWz_col2 = gsl_matrix_const_column(XtXWz, j); - gsl_blas_ddot (WXtXWz, &XtXWz_col2.vector, &d); - s+=d/(gsl_vector_get(s_vec, i)*gsl_vector_get(s_vec, j)); + gsl_blas_ddot(WXtXWz, &XtXWz_col2.vector, &d); + s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j)); - gsl_blas_ddot (&XWz_col1.vector, &XWz_col2.vector, &d); - s+=d/(gsl_vector_get(s_vec, i)*gsl_vector_get(s_vec, j))*(1-s_pve); + gsl_blas_ddot(&XWz_col1.vector, &XWz_col2.vector, &d); + s += d / (gsl_vector_get(s_vec, i) * gsl_vector_get(s_vec, j)) * + (1 - s_pve); - gsl_blas_ddot (Xz_pve, &XWz_col2.vector, &d); - s-=d/gsl_vector_get(s_vec, j); + gsl_blas_ddot(Xz_pve, &XWz_col2.vector, &d); + s -= d / gsl_vector_get(s_vec, j); - gsl_matrix_set (qvar_mat, i, j, s); + gsl_matrix_set(qvar_mat, i, j, s); } } - d=(double)(ni_test-1); - gsl_matrix_scale (qvar_mat, 2.0/(d*d*d)); + d = (double)(ni_test - 1); + gsl_matrix_scale(qvar_mat, 2.0 / (d * d * d)); // Calculate S^{-1}. - gsl_matrix_memcpy (tmp_mat, S_mat); + gsl_matrix_memcpy(tmp_mat, S_mat); int sig; - gsl_permutation * pmt=gsl_permutation_alloc (n_vc); - LUDecomp (tmp_mat, pmt, &sig); - LUInvert (tmp_mat, pmt, Si_mat); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc); + LUDecomp(tmp_mat, pmt, &sig); + LUInvert(tmp_mat, pmt, Si_mat); // Calculate variance for the estimates. - for (size_t i=0; i<n_vc; i++) { - for (size_t j=i; j<n_vc; j++) { - d=gsl_matrix_get(Svar_mat, i, j); - d*=v_pve[i]*v_pve[j]; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = i; j < n_vc; j++) { + d = gsl_matrix_get(Svar_mat, i, j); + d *= v_pve[i] * v_pve[j]; - d+=gsl_matrix_get(qvar_mat, i, j); + d += gsl_matrix_get(qvar_mat, i, j); gsl_matrix_set(Var_mat, i, j, d); - if (i!=j) {gsl_matrix_set(Var_mat, j, i, d);} + if (i != j) { + gsl_matrix_set(Var_mat, j, i, d); + } } } - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,Si_mat,Var_mat,0.0,tmp_mat); - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,tmp_mat,Si_mat,0.0,Var_mat); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0, + tmp_mat); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0, + Var_mat); // Compute sigma2 per snp, enrich. - v_sigma2.clear(); v_enrich.clear(); - for (size_t i=0; i<n_vc; i++) { - v_sigma2.push_back(v_pve[i]/gsl_vector_get(s_vec, i) ); - v_enrich.push_back(v_pve[i]/gsl_vector_get(s_vec, i)*s_snp/s_pve); + v_sigma2.clear(); + v_enrich.clear(); + for (size_t i = 0; i < n_vc; i++) { + v_sigma2.push_back(v_pve[i] / gsl_vector_get(s_vec, i)); + v_enrich.push_back(v_pve[i] / gsl_vector_get(s_vec, i) * s_snp / s_pve); } // Compute se_pve, se_sigma2. - for (size_t i=0; i<n_vc; i++) { - d=sqrt(gsl_matrix_get(Var_mat, i, i)); + for (size_t i = 0; i < n_vc; i++) { + d = sqrt(gsl_matrix_get(Var_mat, i, i)); v_se_pve.push_back(d); - v_se_sigma2.push_back(d/gsl_vector_get(s_vec, i)); + v_se_sigma2.push_back(d / gsl_vector_get(s_vec, i)); } // Compute pve_total, se_pve_total. - pve_total=0; - for (size_t i=0; i<n_vc; i++) { - pve_total+=v_pve[i]; + pve_total = 0; + for (size_t i = 0; i < n_vc; i++) { + pve_total += v_pve[i]; } - se_pve_total=0; - for (size_t i=0; i<n_vc; i++) { - for (size_t j=0; j<n_vc; j++) { - se_pve_total+=gsl_matrix_get(Var_mat, i, j); + se_pve_total = 0; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = 0; j < n_vc; j++) { + se_pve_total += gsl_matrix_get(Var_mat, i, j); } } - se_pve_total=sqrt(se_pve_total); + se_pve_total = sqrt(se_pve_total); // Compute se_enrich. gsl_matrix_set_identity(tmp_mat); double d1; - for (size_t i=0; i<n_vc; i++) { - d=v_pve[i]/s_pve; - d1=gsl_vector_get(s_vec, i); - for (size_t j=0; j<n_vc; j++) { - if (i==j) { - gsl_matrix_set(tmp_mat, i, j, (1-d)/d1*s_snp/s_pve); + for (size_t i = 0; i < n_vc; i++) { + d = v_pve[i] / s_pve; + d1 = gsl_vector_get(s_vec, i); + for (size_t j = 0; j < n_vc; j++) { + if (i == j) { + gsl_matrix_set(tmp_mat, i, j, (1 - d) / d1 * s_snp / s_pve); } else { - gsl_matrix_set(tmp_mat, i, j, -1*d/d1*s_snp/s_pve); + gsl_matrix_set(tmp_mat, i, j, -1 * d / d1 * s_snp / s_pve); } } } - gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,tmp_mat,Var_mat,0.0,tmp_mat1); - gsl_blas_dgemm(CblasNoTrans,CblasTrans,1.0,tmp_mat1,tmp_mat,0.0, - VarEnrich_mat); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0, + tmp_mat1); + gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0, + VarEnrich_mat); - for (size_t i=0; i<n_vc; i++) { - d=sqrt(gsl_matrix_get(VarEnrich_mat, i, i)); + for (size_t i = 0; i < n_vc; i++) { + d = sqrt(gsl_matrix_get(VarEnrich_mat, i, i)); v_se_enrich.push_back(d); } - cout<<"pve = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_pve[i]<<" "; + cout << "pve = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_pve[i] << " "; } - cout<<endl; + cout << endl; - cout<<"se(pve) = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_se_pve[i]<<" "; + cout << "se(pve) = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_se_pve[i] << " "; } - cout<<endl; + cout << endl; - cout<<"sigma2 per snp = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_sigma2[i]<<" "; + cout << "sigma2 per snp = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_sigma2[i] << " "; } - cout<<endl; + cout << endl; - cout<<"se(sigma2 per snp) = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_se_sigma2[i]<<" "; + cout << "se(sigma2 per snp) = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_se_sigma2[i] << " "; } - cout<<endl; + cout << endl; - cout<<"enrichment = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_enrich[i]<<" "; + cout << "enrichment = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_enrich[i] << " "; } - cout<<endl; + cout << endl; - cout<<"se(enrichment) = "; - for (size_t i=0; i<n_vc; i++) { - cout<<v_se_enrich[i]<<" "; + cout << "se(enrichment) = "; + for (size_t i = 0; i < n_vc; i++) { + cout << v_se_enrich[i] << " "; } - cout<<endl; + cout << endl; // Delete matrices. gsl_matrix_free(Si_mat); @@ -19,25 +19,25 @@ #ifndef __VC_H__ #define __VC_H__ -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" -#include "param.h" +#include "gsl/gsl_vector.h" #include "io.h" +#include "param.h" using namespace std; class VC_PARAM { public: - const gsl_matrix *K; - const gsl_matrix *W; - const gsl_vector *y; - gsl_matrix *P; - gsl_vector *Py; - gsl_matrix *KPy_mat; - gsl_matrix *PKPy_mat; - gsl_matrix *Hessian; - bool noconstrain; + const gsl_matrix *K; + const gsl_matrix *W; + const gsl_vector *y; + gsl_matrix *P; + gsl_vector *Py; + gsl_matrix *KPy_mat; + gsl_matrix *PKPy_mat; + gsl_matrix *Hessian; + bool noconstrain; }; class VC { @@ -45,91 +45,87 @@ class VC { public: // IO-related parameters size_t a_mode; - string file_cat; - string file_beta; - string file_cor; - string file_mq; - string file_ms; - - string file_out; - string path_out; - - set<string> setSnps; - - size_t ni_total_ref, ns_total_ref, ns_pair; - size_t ni_total, ns_total, ns_test; - size_t n_vc; - - double pve_total, se_pve_total; - vector<double> v_sigma2; - vector<double> v_se_sigma2; - vector<double> v_pve; - vector<double> v_se_pve; - vector<double> v_traceG; - vector<double> v_beta; - vector<double> v_se_beta; - - size_t crt; - double window_cm, window_bp, window_ns; - - double time_UtX; - double time_opt; - - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - void WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec, - const gsl_vector *qvar_vec, const gsl_matrix *S_mat, - const gsl_matrix *Svar_mat); - void CalcVChe (const gsl_matrix *K, const gsl_matrix *W, - const gsl_vector *y); - void CalcVCreml (const bool noconstrain, const gsl_matrix *K, - const gsl_matrix *W, const gsl_vector *y); - void CalcVCacl (const gsl_matrix *K, const gsl_matrix *W, - const gsl_vector *y); + string file_cat; + string file_beta; + string file_cor; + string file_mq; + string file_ms; + + string file_out; + string path_out; + + set<string> setSnps; + + size_t ni_total_ref, ns_total_ref, ns_pair; + size_t ni_total, ns_total, ns_test; + size_t n_vc; + + double pve_total, se_pve_total; + vector<double> v_sigma2; + vector<double> v_se_sigma2; + vector<double> v_pve; + vector<double> v_se_pve; + vector<double> v_traceG; + vector<double> v_beta; + vector<double> v_se_beta; + + size_t crt; + double window_cm, window_bp, window_ns; + + double time_UtX; + double time_opt; + + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + void WriteFile_qs(const gsl_vector *s_vec, const gsl_vector *q_vec, + const gsl_vector *qvar_vec, const gsl_matrix *S_mat, + const gsl_matrix *Svar_mat); + void CalcVChe(const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y); + void CalcVCreml(const bool noconstrain, const gsl_matrix *K, + const gsl_matrix *W, const gsl_vector *y); + void CalcVCacl(const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y); }; void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, - const gsl_matrix *Svar_mat, const gsl_vector *q_vec, - const gsl_vector *s_vec, const double df, vector<double> &v_pve, - vector<double> &v_se_pve, double &pve_total, - double &se_pve_total, vector<double> &v_sigma2, - vector<double> &v_se_sigma2, vector<double> &v_enrich, - vector<double> &v_se_enrich); - -bool BimbamXwz (const string &file_geno, const int display_pace, - vector<int> &indicator_idv, vector<int> &indicator_snp, - const vector<size_t> &vec_cat, const gsl_vector *w, - const gsl_vector *z, size_t ns_test, gsl_matrix *XWz); -bool PlinkXwz (const string &file_bed, const int display_pace, - vector<int> &indicator_idv, vector<int> &indicator_snp, - const vector<size_t> &vec_cat, const gsl_vector *w, - const gsl_vector *z, size_t ns_test, gsl_matrix *XWz); -bool MFILEXwz (const size_t mfile_mode, const string &file_mfile, - const int display_pace, vector<int> &indicator_idv, - vector<vector<int> > &mindicator_snp, - const vector<size_t> &vec_cat, const gsl_vector *w, - const gsl_vector *z, gsl_matrix *XWz); - -bool BimbamXtXwz (const string &file_geno, const int display_pace, - vector<int> &indicator_idv, vector<int> &indicator_snp, - const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz); -bool PlinkXtXwz (const string &file_bed, const int display_pace, - vector<int> &indicator_idv, vector<int> &indicator_snp, - const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz); -bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile, - const int display_pace, vector<int> &indicator_idv, - vector<vector<int> > &mindicator_snp, const gsl_matrix *XWz, - gsl_matrix *XtXWz); + const gsl_matrix *Svar_mat, const gsl_vector *q_vec, + const gsl_vector *s_vec, const double df, vector<double> &v_pve, + vector<double> &v_se_pve, double &pve_total, double &se_pve_total, + vector<double> &v_sigma2, vector<double> &v_se_sigma2, + vector<double> &v_enrich, vector<double> &v_se_enrich); + +bool BimbamXwz(const string &file_geno, const int display_pace, + vector<int> &indicator_idv, vector<int> &indicator_snp, + const vector<size_t> &vec_cat, const gsl_vector *w, + const gsl_vector *z, size_t ns_test, gsl_matrix *XWz); +bool PlinkXwz(const string &file_bed, const int display_pace, + vector<int> &indicator_idv, vector<int> &indicator_snp, + const vector<size_t> &vec_cat, const gsl_vector *w, + const gsl_vector *z, size_t ns_test, gsl_matrix *XWz); +bool MFILEXwz(const size_t mfile_mode, const string &file_mfile, + const int display_pace, vector<int> &indicator_idv, + vector<vector<int>> &mindicator_snp, + const vector<size_t> &vec_cat, const gsl_vector *w, + const gsl_vector *z, gsl_matrix *XWz); + +bool BimbamXtXwz(const string &file_geno, const int display_pace, + vector<int> &indicator_idv, vector<int> &indicator_snp, + const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz); +bool PlinkXtXwz(const string &file_bed, const int display_pace, + vector<int> &indicator_idv, vector<int> &indicator_snp, + const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz); +bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile, + const int display_pace, vector<int> &indicator_idv, + vector<vector<int>> &mindicator_snp, const gsl_matrix *XWz, + gsl_matrix *XtXWz); void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz, - const gsl_matrix *XtXWz, const gsl_matrix *S_mat, - const gsl_matrix *Svar_mat, const gsl_vector *w, - const gsl_vector *z, const gsl_vector *s_vec, - const vector<size_t> &vec_cat, const vector<double> &v_pve, - vector<double> &v_se_pve, double &pve_total, - double &se_pve_total, vector<double> &v_sigma2, - vector<double> &v_se_sigma2, vector<double> &v_enrich, - vector<double> &v_se_enrich); + const gsl_matrix *XtXWz, const gsl_matrix *S_mat, + const gsl_matrix *Svar_mat, const gsl_vector *w, + const gsl_vector *z, const gsl_vector *s_vec, + const vector<size_t> &vec_cat, const vector<double> &v_pve, + vector<double> &v_se_pve, double &pve_total, double &se_pve_total, + vector<double> &v_sigma2, vector<double> &v_se_sigma2, + vector<double> &v_enrich, vector<double> &v_se_enrich); #endif |