From 3935ba39d30666dd7d4a831155631847c77b70c4 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 2 Aug 2017 08:46:58 +0000 Subject: Massive patch using LLVM coding style. It was generated with: clang-format -style=LLVM -i *.cpp *.h Please set your editor to replace tabs with spaces and use indentation of 2 spaces. --- src/vc.cpp | 3655 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 1934 insertions(+), 1721 deletions(-) (limited to 'src/vc.cpp') diff --git a/src/vc.cpp b/src/vc.cpp index e8ccece..b5f36c0 100644 --- a/src/vc.cpp +++ b/src/vc.cpp @@ -16,216 +16,216 @@ along with this program. If not, see . */ -#include #include +#include #include -#include +#include #include +#include +#include #include +#include +#include #include #include -#include -#include -#include -#include #include -#include +#include -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" #include "gsl/gsl_cdf.h" -#include "gsl/gsl_multiroots.h" #include "gsl/gsl_min.h" +#include "gsl/gsl_multiroots.h" #include "Eigen/Dense" -#include "param.h" -#include "io.h" -#include "lapack.h" #include "eigenlib.h" #include "gzstream.h" -#include "mathfunc.h" +#include "io.h" +#include "lapack.h" #include "lmm.h" +#include "mathfunc.h" +#include "param.h" #include "vc.h" using namespace std; using namespace Eigen; // In this file, X, Y are already transformed (i.e. UtX and UtY). -void VC::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; +void VC::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; - file_cat=cPar.file_cat; - file_beta=cPar.file_beta; - file_cor=cPar.file_cor; + file_cat = cPar.file_cat; + file_beta = cPar.file_beta; + file_cor = cPar.file_cor; - setSnps=cPar.setSnps; + setSnps = cPar.setSnps; - file_out=cPar.file_out; - path_out=cPar.path_out; + file_out = cPar.file_out; + path_out = cPar.path_out; - time_UtX=0.0; - time_opt=0.0; + time_UtX = 0.0; + time_opt = 0.0; - v_traceG=cPar.v_traceG; + v_traceG = cPar.v_traceG; - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ns_test=cPar.ns_test; + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ns_test = cPar.ns_test; - crt=cPar.crt; - window_cm=cPar.window_cm; - window_bp=cPar.window_bp; - window_ns=cPar.window_ns; + crt = cPar.crt; + window_cm = cPar.window_cm; + window_bp = cPar.window_bp; + window_ns = cPar.window_ns; - n_vc=cPar.n_vc; + n_vc = cPar.n_vc; return; } -void VC::CopyToParam (PARAM &cPar) { - cPar.time_UtX=time_UtX; - cPar.time_opt=time_opt; +void VC::CopyToParam(PARAM &cPar) { + cPar.time_UtX = time_UtX; + cPar.time_opt = time_opt; - cPar.v_pve=v_pve; - cPar.v_se_pve=v_se_pve; - cPar.v_sigma2=v_sigma2; - cPar.v_se_sigma2=v_se_sigma2; - cPar.pve_total=pve_total; - cPar.se_pve_total=se_pve_total; - cPar.v_traceG=v_traceG; + cPar.v_pve = v_pve; + cPar.v_se_pve = v_se_pve; + cPar.v_sigma2 = v_sigma2; + cPar.v_se_sigma2 = v_se_sigma2; + cPar.pve_total = pve_total; + cPar.se_pve_total = se_pve_total; + cPar.v_traceG = v_traceG; - cPar.v_beta=v_beta; - cPar.v_se_beta=v_se_beta; + cPar.v_beta = v_beta; + cPar.v_se_beta = v_se_beta; - cPar.ni_total=ni_total; - cPar.ns_total=ns_total; - cPar.ns_test=ns_test; + cPar.ni_total = ni_total; + cPar.ns_total = ns_total; + cPar.ns_test = ns_test; - cPar.n_vc=n_vc; + cPar.n_vc = n_vc; - return; + return; } -void VC::WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec, - const gsl_vector *qvar_vec, const gsl_matrix *S_mat, - const gsl_matrix *Svar_mat) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".qvec.txt"; - - ofstream outfile_q (file_str.c_str(), ofstream::out); - if (!outfile_q) { - cout<<"error writing file: "<size; i++) { - outfile_q<size; i++) { - outfile_q<size; i++) { - outfile_q<size1; i++) { - for (size_t j=0; jsize2; j++) { - outfile_s<size1; i++) { - for (size_t j=0; jsize2; j++) { - outfile_s<size; i++) { + outfile_q << gsl_vector_get(s_vec, i) << endl; + } + for (size_t i = 0; i < q_vec->size; i++) { + outfile_q << gsl_vector_get(q_vec, i) << endl; + } + for (size_t i = 0; i < qvar_vec->size; i++) { + outfile_q << gsl_vector_get(qvar_vec, i) << endl; + } + + outfile_q.clear(); + outfile_q.close(); + + file_str = path_out + "/" + file_out; + file_str += ".smat.txt"; + + ofstream outfile_s(file_str.c_str(), ofstream::out); + if (!outfile_s) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + for (size_t i = 0; i < S_mat->size1; i++) { + for (size_t j = 0; j < S_mat->size2; j++) { + outfile_s << gsl_matrix_get(S_mat, i, j) << "\t"; + } + outfile_s << endl; + } + for (size_t i = 0; i < Svar_mat->size1; i++) { + for (size_t j = 0; j < Svar_mat->size2; j++) { + outfile_s << gsl_matrix_get(Svar_mat, i, j) << "\t"; + } + outfile_s << endl; + } + + outfile_s.clear(); + outfile_s.close(); + + return; } -void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) { - size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1, n_cvt=(p->W)->size2; +void UpdateParam(const gsl_vector *log_sigma2, VC_PARAM *p) { + size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1, n_cvt = (p->W)->size2; - gsl_matrix *K_temp=gsl_matrix_alloc(n1, n1); - gsl_matrix *HiW=gsl_matrix_alloc(n1, n_cvt); - gsl_matrix *WtHiW=gsl_matrix_alloc(n_cvt, n_cvt); - gsl_matrix *WtHiWi=gsl_matrix_alloc(n_cvt, n_cvt); - gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1); + gsl_matrix *K_temp = gsl_matrix_alloc(n1, n1); + gsl_matrix *HiW = gsl_matrix_alloc(n1, n_cvt); + gsl_matrix *WtHiW = gsl_matrix_alloc(n_cvt, n_cvt); + gsl_matrix *WtHiWi = gsl_matrix_alloc(n_cvt, n_cvt); + gsl_matrix *WtHiWiWtHi = gsl_matrix_alloc(n_cvt, n1); double sigma2; // Calculate H = \sum_i^{k+1} \sigma_i^2 K_i. - gsl_matrix_set_zero (p->P); - for (size_t i=0; iP); + for (size_t i = 0; i < n_vc + 1; i++) { + if (i == n_vc) { + gsl_matrix_set_identity(K_temp); } else { - gsl_matrix_const_view K_sub= - gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1); - gsl_matrix_memcpy (K_temp, &K_sub.matrix); + gsl_matrix_const_view K_sub = + gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1); + gsl_matrix_memcpy(K_temp, &K_sub.matrix); } // When unconstrained, update on sigma2 instead of log_sigma2. if (p->noconstrain) { - sigma2=gsl_vector_get (log_sigma2, i); + sigma2 = gsl_vector_get(log_sigma2, i); } else { - sigma2=exp(gsl_vector_get (log_sigma2, i) ); + sigma2 = exp(gsl_vector_get(log_sigma2, i)); } gsl_matrix_scale(K_temp, sigma2); - gsl_matrix_add (p->P, K_temp); + gsl_matrix_add(p->P, K_temp); } // Calculate H^{-1}. eigenlib_invert(p->P); - eigenlib_dgemm ("N", "N", 1.0, p->P, p->W, 0.0, HiW); - eigenlib_dgemm ("T", "N", 1.0, p->W, HiW, 0.0, WtHiW); + eigenlib_dgemm("N", "N", 1.0, p->P, p->W, 0.0, HiW); + eigenlib_dgemm("T", "N", 1.0, p->W, HiW, 0.0, WtHiW); eigenlib_invert(WtHiW); gsl_matrix_memcpy(WtHiWi, WtHiW); - eigenlib_dgemm ("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi); - eigenlib_dgemm ("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P); + eigenlib_dgemm("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi); + eigenlib_dgemm("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P); // Calculate Py, KPy, PKPy. gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py); double d; - for (size_t i=0; iKPy_mat, i); - gsl_vector_view PKPy=gsl_matrix_column (p->PKPy_mat, i); + for (size_t i = 0; i < n_vc + 1; i++) { + gsl_vector_view KPy = gsl_matrix_column(p->KPy_mat, i); + gsl_vector_view PKPy = gsl_matrix_column(p->PKPy_mat, i); - if (i==n_vc) { - gsl_vector_memcpy (&KPy.vector, p->Py); + if (i == n_vc) { + gsl_vector_memcpy(&KPy.vector, p->Py); } else { - gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1); + gsl_matrix_const_view K_sub = + gsl_matrix_const_submatrix(p->K, 0, n1 * i, n1, n1); // Seems to be important to use gsl dgemv here instead of // eigenlib_dgemv; otherwise. - gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, - &KPy.vector); + gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector); } gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector); @@ -233,64 +233,64 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) { // When phenotypes are not normalized well, then some values in // the following matrix maybe NaN; change that to 0; this seems to // only happen when eigenlib_dgemv was used above. - for (size_t j=0; jKPy_mat->size1; j++) { - d=gsl_matrix_get (p->KPy_mat, j, i); + for (size_t j = 0; j < p->KPy_mat->size1; j++) { + d = gsl_matrix_get(p->KPy_mat, j, i); if (std::isnan(d)) { - gsl_matrix_set (p->KPy_mat, j, i, 0); - cout<<"nan appears in "<KPy_mat, j, i, 0); + cout << "nan appears in " << i << " " << j << endl; } - d=gsl_matrix_get (p->PKPy_mat, j, i); + d = gsl_matrix_get(p->PKPy_mat, j, i); if (std::isnan(d)) { - gsl_matrix_set (p->PKPy_mat, j, i, 0); - cout<<"nan appears in "<PKPy_mat, j, i, 0); + cout << "nan appears in " << i << " " << j << endl; } } } - gsl_matrix_free (K_temp); - gsl_matrix_free (HiW); - gsl_matrix_free (WtHiW); - gsl_matrix_free (WtHiWi); - gsl_matrix_free (WtHiWiWtHi); + gsl_matrix_free(K_temp); + gsl_matrix_free(HiW); + gsl_matrix_free(WtHiW); + gsl_matrix_free(WtHiWi); + gsl_matrix_free(WtHiWiWtHi); return; } // Below are functions for AI algorithm. -int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) { - VC_PARAM *p=(VC_PARAM *) params; +int LogRL_dev1(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) { + VC_PARAM *p = (VC_PARAM *)params; - size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1; + size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1; double tr, d; // Update parameters. - UpdateParam (log_sigma2, p); + UpdateParam(log_sigma2, p); // Calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy. - for (size_t i=0; iP, l, l); + for (size_t i = 0; i < n_vc + 1; i++) { + if (i == n_vc) { + tr = 0; + for (size_t l = 0; l < n1; l++) { + tr += gsl_matrix_get(p->P, l, l); } } else { - tr=0; - for (size_t l=0; lP, l); - gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l); - gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); - tr+=d; + tr = 0; + for (size_t l = 0; l < n1; l++) { + gsl_vector_view P_row = gsl_matrix_row(p->P, l); + gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l); + gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); + tr += d; } } - gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i); + gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i); gsl_blas_ddot(p->Py, &KPy_i.vector, &d); if (p->noconstrain) { - d=(-0.5*tr+0.5*d); + d = (-0.5 * tr + 0.5 * d); } else { - d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i)); + d = (-0.5 * tr + 0.5 * d) * exp(gsl_vector_get(log_sigma2, i)); } gsl_vector_set(dev1, i, d); @@ -299,324 +299,354 @@ int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) { return GSL_SUCCESS; } -int LogRL_dev2 (const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) { - VC_PARAM *p=(VC_PARAM *) params; +int LogRL_dev2(const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) { + VC_PARAM *p = (VC_PARAM *)params; - size_t n_vc=log_sigma2->size-1; + size_t n_vc = log_sigma2->size - 1; double d, sigma2_i, sigma2_j; // Update parameters. - UpdateParam (log_sigma2, p); + UpdateParam(log_sigma2, p); // Calculate dev2 = 0.5(yPKPKPy). - for (size_t i=0; iKPy_mat, i); + for (size_t i = 0; i < n_vc + 1; i++) { + gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i); if (p->noconstrain) { - sigma2_i=gsl_vector_get(log_sigma2, i); + sigma2_i = gsl_vector_get(log_sigma2, i); } else { - sigma2_i=exp(gsl_vector_get(log_sigma2, i)); + sigma2_i = exp(gsl_vector_get(log_sigma2, i)); } - for (size_t j=i; jPKPy_mat, j); + for (size_t j = i; j < n_vc + 1; j++) { + gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j); gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d); if (p->noconstrain) { - sigma2_j=gsl_vector_get(log_sigma2, j); - d*=-0.5; + sigma2_j = gsl_vector_get(log_sigma2, j); + d *= -0.5; } else { - sigma2_j=exp(gsl_vector_get(log_sigma2, j)); - d*=-0.5*sigma2_i*sigma2_j; + sigma2_j = exp(gsl_vector_get(log_sigma2, j)); + d *= -0.5 * sigma2_i * sigma2_j; } gsl_matrix_set(dev2, i, j, d); - if (j!=i) {gsl_matrix_set(dev2, j, i, d);} + if (j != i) { + gsl_matrix_set(dev2, j, i, d); + } } } - gsl_matrix_memcpy (p->Hessian, dev2); + gsl_matrix_memcpy(p->Hessian, dev2); return GSL_SUCCESS; } -int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, - gsl_vector *dev1, gsl_matrix *dev2) { - VC_PARAM *p=(VC_PARAM *) params; +int LogRL_dev12(const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, + gsl_matrix *dev2) { + VC_PARAM *p = (VC_PARAM *)params; - size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1; + size_t n1 = (p->K)->size1, n_vc = log_sigma2->size - 1; double tr, d, sigma2_i, sigma2_j; // Update parameters. - UpdateParam (log_sigma2, p); + UpdateParam(log_sigma2, p); - for (size_t i=0; iP, l, l); + for (size_t i = 0; i < n_vc + 1; i++) { + if (i == n_vc) { + tr = 0; + for (size_t l = 0; l < n1; l++) { + tr += gsl_matrix_get(p->P, l, l); } } else { - tr=0; - for (size_t l=0; lP, l); - gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l); - gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); - tr+=d; + tr = 0; + for (size_t l = 0; l < n1; l++) { + gsl_vector_view P_row = gsl_matrix_row(p->P, l); + gsl_vector_const_view K_col = gsl_matrix_const_column(p->K, n1 * i + l); + gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); + tr += d; } } - gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i); + gsl_vector_view KPy_i = gsl_matrix_column(p->KPy_mat, i); gsl_blas_ddot(p->Py, &KPy_i.vector, &d); if (p->noconstrain) { - sigma2_i=gsl_vector_get(log_sigma2, i); - d=(-0.5*tr+0.5*d); + sigma2_i = gsl_vector_get(log_sigma2, i); + d = (-0.5 * tr + 0.5 * d); } else { - sigma2_i=exp(gsl_vector_get(log_sigma2, i)); - d=(-0.5*tr+0.5*d)*sigma2_i; + sigma2_i = exp(gsl_vector_get(log_sigma2, i)); + d = (-0.5 * tr + 0.5 * d) * sigma2_i; } gsl_vector_set(dev1, i, d); - for (size_t j=i; jPKPy_mat, j); + for (size_t j = i; j < n_vc + 1; j++) { + gsl_vector_view PKPy_j = gsl_matrix_column(p->PKPy_mat, j); gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d); if (p->noconstrain) { - sigma2_j=gsl_vector_get(log_sigma2, j); - d*=-0.5; + sigma2_j = gsl_vector_get(log_sigma2, j); + d *= -0.5; } else { - sigma2_j=exp(gsl_vector_get(log_sigma2, j)); - d*=-0.5*sigma2_i*sigma2_j; + sigma2_j = exp(gsl_vector_get(log_sigma2, j)); + d *= -0.5 * sigma2_i * sigma2_j; } gsl_matrix_set(dev2, i, j, d); - if (j!=i) {gsl_matrix_set(dev2, j, i, d);} + if (j != i) { + gsl_matrix_set(dev2, j, i, d); + } } - } - gsl_matrix_memcpy (p->Hessian, dev2); + gsl_matrix_memcpy(p->Hessian, dev2); return GSL_SUCCESS; } // Read header to determine which column contains which item. -bool ReadHeader_vc (const string &line, HEADER &header) { - string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID", - "rsid","RSID"}; - set rs_set(rs_ptr, rs_ptr+10); - string chr_ptr[]={"chr","CHR"}; - set chr_set(chr_ptr, chr_ptr+2); - string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", - "bp", "BP"}; - set pos_set(pos_ptr, pos_ptr+8); - string cm_ptr[]={"cm","CM"}; - set cm_set(cm_ptr, cm_ptr+2); - string a1_ptr[]={"a1","A1","allele1","ALLELE1"}; - set a1_set(a1_ptr, a1_ptr+4); - string a0_ptr[]={"a0","A0","allele0","ALLELE0"}; - set a0_set(a0_ptr, a0_ptr+4); - - string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"}; - set z_set(z_ptr, z_ptr+6); - string beta_ptr[]={"beta","BETA","b","B"}; - set beta_set(beta_ptr, beta_ptr+4); - string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"}; - set sebeta_set(sebeta_ptr, sebeta_ptr+4); - string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"}; - set chisq_set(chisq_ptr, chisq_ptr+4); - string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"}; - set p_set(p_ptr, p_ptr+6); - - string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"}; - set n_set(n_ptr, n_ptr+6); - string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"}; - set nmis_set(nmis_ptr, nmis_ptr+6); - string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"}; - set nobs_set(nobs_ptr, nobs_ptr+4); - - string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq", - "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"}; - set af_set(af_ptr, af_ptr+10); - string var_ptr[]={"var","VAR"}; - set var_set(var_ptr, var_ptr+2); - - string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"}; - set ws_set(ws_ptr, ws_ptr+4); - string cor_ptr[]={"cor","COR","r","R"}; - set cor_set(cor_ptr, cor_ptr+4); - - header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0; - header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; - header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; - header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; - header.cor_col=0; header.coln=0; +bool ReadHeader_vc(const string &line, HEADER &header) { + string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps", + "SNPS", "snpid", "SNPID", "rsid", "RSID"}; + set rs_set(rs_ptr, rs_ptr + 10); + string chr_ptr[] = {"chr", "CHR"}; + set chr_set(chr_ptr, chr_ptr + 2); + string pos_ptr[] = { + "ps", "PS", "pos", "POS", "base_position", "BASE_POSITION", "bp", "BP"}; + set pos_set(pos_ptr, pos_ptr + 8); + string cm_ptr[] = {"cm", "CM"}; + set cm_set(cm_ptr, cm_ptr + 2); + string a1_ptr[] = {"a1", "A1", "allele1", "ALLELE1"}; + set a1_set(a1_ptr, a1_ptr + 4); + string a0_ptr[] = {"a0", "A0", "allele0", "ALLELE0"}; + set a0_set(a0_ptr, a0_ptr + 4); + + string z_ptr[] = {"z", "Z", "z_score", "Z_SCORE", "zscore", "ZSCORE"}; + set z_set(z_ptr, z_ptr + 6); + string beta_ptr[] = {"beta", "BETA", "b", "B"}; + set beta_set(beta_ptr, beta_ptr + 4); + string sebeta_ptr[] = {"se_beta", "SE_BETA", "se", "SE"}; + set sebeta_set(sebeta_ptr, sebeta_ptr + 4); + string chisq_ptr[] = {"chisq", "CHISQ", "chisquare", "CHISQUARE"}; + set chisq_set(chisq_ptr, chisq_ptr + 4); + string p_ptr[] = {"p", "P", "pvalue", "PVALUE", "p-value", "P-VALUE"}; + set p_set(p_ptr, p_ptr + 6); + + string n_ptr[] = {"n", "N", "ntotal", "NTOTAL", "n_total", "N_TOTAL"}; + set n_set(n_ptr, n_ptr + 6); + string nmis_ptr[] = {"nmis", "NMIS", "n_mis", "N_MIS", "n_miss", "N_MISS"}; + set nmis_set(nmis_ptr, nmis_ptr + 6); + string nobs_ptr[] = {"nobs", "NOBS", "n_obs", "N_OBS"}; + set nobs_set(nobs_ptr, nobs_ptr + 4); + + string af_ptr[] = {"af", + "AF", + "maf", + "MAF", + "f", + "F", + "allele_freq", + "ALLELE_FREQ", + "allele_frequency", + "ALLELE_FREQUENCY"}; + set af_set(af_ptr, af_ptr + 10); + string var_ptr[] = {"var", "VAR"}; + set var_set(var_ptr, var_ptr + 2); + + string ws_ptr[] = {"window_size", "WINDOW_SIZE", "ws", "WS"}; + set ws_set(ws_ptr, ws_ptr + 4); + string cor_ptr[] = {"cor", "COR", "r", "R"}; + set cor_set(cor_ptr, cor_ptr + 4); + + header.rs_col = 0; + header.chr_col = 0; + header.pos_col = 0; + header.a1_col = 0; + header.a0_col = 0; + header.z_col = 0; + header.beta_col = 0; + header.sebeta_col = 0; + header.chisq_col = 0; + header.p_col = 0; + header.n_col = 0; + header.nmis_col = 0; + header.nobs_col = 0; + header.af_col = 0; + header.var_col = 0; + header.ws_col = 0; + header.cor_col = 0; + header.coln = 0; char *ch_ptr; string type; - size_t n_error=0; - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - while (ch_ptr!=NULL) { - type=ch_ptr; - if (rs_set.count(type)!=0) { - if (header.rs_col==0) { - header.rs_col=header.coln+1; + size_t n_error = 0; + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + while (ch_ptr != NULL) { + type = ch_ptr; + if (rs_set.count(type) != 0) { + if (header.rs_col == 0) { + header.rs_col = header.coln + 1; } else { - cout<<"error! more than two rs columns in the file."< &setSnps, - vector &vec_rs, vector &vec_n, - vector &vec_cm, vector &vec_bp, - map &mapRS2in, map &mapRS2var) { +void ReadFile_cor(const string &file_cor, const set &setSnps, + vector &vec_rs, vector &vec_n, + vector &vec_cm, vector &vec_bp, + map &mapRS2in, + map &mapRS2var) { vec_rs.clear(); vec_n.clear(); mapRS2in.clear(); mapRS2var.clear(); - igzstream infile (file_cor.c_str(), igzstream::in); + igzstream infile(file_cor.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open cov file: "< &setSnps, char *ch_ptr; string rs, chr, a1, a0, pos, cm; - double af=0, var_x=0, d_pos, d_cm; - size_t n_total=0, n_mis=0, n_obs=0, ni_total=0; - size_t ns_test=0, ns_total=0; + double af = 0, var_x = 0, d_pos, d_cm; + size_t n_total = 0, n_mis = 0, n_obs = 0, ni_total = 0; + size_t ns_test = 0, ns_total = 0; HEADER header; // Header. !safeGetline(infile, line).eof(); - ReadHeader_vc (line, header); + ReadHeader_vc(line, header); - if (header.n_col==0 ) { - if (header.nobs_col==0 && header.nmis_col==0) { - cout<<"error! missing sample size in the cor file."<0) {vec_cm.push_back(d_cm);} else {vec_cm.push_back(d_cm);} - if (d_pos>0) {vec_bp.push_back(d_pos);} else {vec_bp.push_back(d_pos);} + if (d_cm > 0) { + vec_cm.push_back(d_cm); + } else { + vec_cm.push_back(d_cm); + } + if (d_pos > 0) { + vec_bp.push_back(d_pos); + } else { + vec_bp.push_back(d_pos); + } // Record mapRS2in and mapRS2var. - if (setSnps.size()==0 || setSnps.count(rs)!=0) { - if (mapRS2in.count(rs)==0) { - mapRS2in[rs]=1; + if (setSnps.size() == 0 || setSnps.count(rs) != 0) { + if (mapRS2in.count(rs) == 0) { + mapRS2in[rs] = 1; - if (header.var_col!=0) { - mapRS2var[rs]=var_x; - } else if (header.af_col!=0) { - var_x=2.0*af*(1.0-af); - mapRS2var[rs]=var_x; - } else {} + if (header.var_col != 0) { + mapRS2var[rs] = var_x; + } else if (header.af_col != 0) { + var_x = 2.0 * af * (1.0 - af); + mapRS2var[rs] = var_x; + } else { + } - ns_test++; + ns_test++; } else { - cout<<"error! more than one snp has the same id "< &setSnps, // Read beta file, store mapRS2var if var is provided here, calculate // q and var_y. -void ReadFile_beta (const bool flag_priorscale, const string &file_beta, - const map &mapRS2cat, - map &mapRS2in, - map &mapRS2var, - map &mapRS2nsamp, - gsl_vector *q_vec, gsl_vector *qvar_vec, - gsl_vector *s_vec, size_t &ni_total, - size_t &ns_total) { +void ReadFile_beta(const bool flag_priorscale, const string &file_beta, + const map &mapRS2cat, + map &mapRS2in, + map &mapRS2var, + map &mapRS2nsamp, gsl_vector *q_vec, + gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total, + size_t &ns_total) { mapRS2nsamp.clear(); - igzstream infile (file_beta.c_str(), igzstream::in); + igzstream infile(file_beta.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open beta file: "< vec_q, vec_qvar, vec_s; - for (size_t i=0; isize; i++) { + for (size_t i = 0; i < q_vec->size; i++) { vec_q.push_back(0.0); vec_qvar.push_back(0.0); vec_s.push_back(0.0); @@ -753,122 +820,166 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta, // Read header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_vc (line, header); + ReadHeader_vc(line, header); - if (header.n_col==0 ) { - if (header.nobs_col==0 && header.nmis_col==0) { - cout<<"error! missing sample size in the beta file."<1) { - cout<<"error! more than one snp has the same id "< 1) { + cout << "error! more than one snp has the same id " << rs + << " in beta file?" << endl; + break; } - if (header.var_col==0) { - if (header.af_col!=0) { - var_x=2.0*af*(1.0-af); - } else { - var_x=mapRS2var.at(rs); - } + if (header.var_col == 0) { + if (header.af_col != 0) { + var_x = 2.0 * af * (1.0 - af); + } else { + var_x = mapRS2var.at(rs); + } } - if (flag_priorscale) {var_x=1;} + if (flag_priorscale) { + var_x = 1; + } mapRS2in[rs]++; - mapRS2var[rs]=var_x; - mapRS2nsamp[rs]=n_total; - - if (mapRS2cat.size()!=0) { - vec_q[mapRS2cat.at(rs) ]+=(zsquare-1.0)*var_x/(double)n_total; - vec_s[mapRS2cat.at(rs) ]+=var_x; - vec_qvar[mapRS2cat.at(rs) ]+= - var_x*var_x/((double)n_total*(double)n_total); + mapRS2var[rs] = var_x; + mapRS2nsamp[rs] = n_total; + + if (mapRS2cat.size() != 0) { + vec_q[mapRS2cat.at(rs)] += (zsquare - 1.0) * var_x / (double)n_total; + vec_s[mapRS2cat.at(rs)] += var_x; + vec_qvar[mapRS2cat.at(rs)] += + var_x * var_x / ((double)n_total * (double)n_total); } else { - vec_q[0]+=(zsquare-1.0)*var_x/(double)n_total; - vec_s[0]+=var_x; - vec_qvar[0]+=var_x*var_x/((double)n_total*(double)n_total); + vec_q[0] += (zsquare - 1.0) * var_x / (double)n_total; + vec_s[0] += var_x; + vec_qvar[0] += var_x * var_x / ((double)n_total * (double)n_total); } - ni_total=max(ni_total, n_total); + ni_total = max(ni_total, n_total); ns_test++; } ns_total++; } - for (size_t i=0; isize; i++) { + for (size_t i = 0; i < q_vec->size; i++) { gsl_vector_set(q_vec, i, vec_q[i]); - gsl_vector_set(qvar_vec, i, 2.0*vec_qvar[i]); + gsl_vector_set(qvar_vec, i, 2.0 * vec_qvar[i]); gsl_vector_set(s_vec, i, vec_s[i]); } @@ -882,21 +993,20 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta, // Look for rs, n_mis+n_obs, var, window_size, cov. // If window_cm/bp/ns is provided, then use these max values to // calibrate estimates. -void ReadFile_cor (const string &file_cor, const vector &vec_rs, - const vector &vec_n, const vector &vec_cm, - const vector &vec_bp, - const map &mapRS2cat, - const map &mapRS2in, - const map &mapRS2var, - const map &mapRS2nsamp, - const size_t crt, const double &window_cm, - const double &window_bp, const double &window_ns, - gsl_matrix *S_mat, gsl_matrix *Svar_mat, - gsl_vector *qvar_vec, size_t &ni_total, - size_t &ns_total, size_t &ns_test, size_t &ns_pair) { - igzstream infile (file_cor.c_str(), igzstream::in); +void ReadFile_cor(const string &file_cor, const vector &vec_rs, + const vector &vec_n, const vector &vec_cm, + const vector &vec_bp, + const map &mapRS2cat, + const map &mapRS2in, + const map &mapRS2var, + const map &mapRS2nsamp, const size_t crt, + const double &window_cm, const double &window_bp, + const double &window_ns, gsl_matrix *S_mat, + gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total, + size_t &ns_total, size_t &ns_test, size_t &ns_pair) { + igzstream infile(file_cor.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open cov file: "< &vec_rs, string rs1, rs2; double d1, d2, d3, cor, var1, var2; - size_t n_nb, nsamp1, nsamp2, n12, bin_size=10, bin; + size_t n_nb, nsamp1, nsamp2, n12, bin_size = 10, bin; - vector > mat_S, mat_Svar, mat_tmp; + vector> mat_S, mat_Svar, mat_tmp; vector vec_qvar, vec_tmp; - vector > > mat3d_Sbin; + vector>> mat3d_Sbin; - for (size_t i=0; isize1; i++) { + for (size_t i = 0; i < S_mat->size1; i++) { vec_qvar.push_back(0.0); } - for (size_t i=0; isize1; i++) { + for (size_t i = 0; i < S_mat->size1; i++) { mat_S.push_back(vec_qvar); mat_Svar.push_back(vec_qvar); } - for (size_t k=0; ksize1; i++) { + for (size_t i = 0; i < S_mat->size1; i++) { mat_tmp.push_back(vec_tmp); } - for (size_t i=0; isize1; i++) { + for (size_t i = 0; i < S_mat->size1; i++) { mat3d_Sbin.push_back(mat_tmp); } string rs, chr, a1, a0, type, pos, cm; - size_t n_total=0, n_mis=0, n_obs=0; + size_t n_total = 0, n_mis = 0, n_obs = 0; double d_pos1, d_pos2, d_pos, d_cm1, d_cm2, d_cm; - ns_test=0; ns_total=0; ns_pair=0; ni_total=0; + ns_test = 0; + ns_total = 0; + ns_pair = 0; + ni_total = 0; // Header. HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_vc (line, header); + ReadHeader_vc(line, header); while (!safeGetline(infile, line).eof()) { // Do not read cor values this time; upto col_n-1. - d_pos1=0; d_cm1=0; - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - for (size_t i=0; i &vec_rs, // x=seq(0.5,bin_size-0.5,by=1) and then compute a correlation // factor as a percentage. double a, b, x, y, n, var_y, var_x, mean_y, mean_x, cov_xy, crt_factor; - if (crt==1) { - for (size_t i=0; isize1; i++) { - for (size_t j=i; jsize2; j++) { - - // Correct mat_S. - n=0; var_y=0; var_x=0; mean_y=0; mean_x=0; cov_xy=0; - for (size_t k=0; k0) { - y=1/sqrt(y); - mean_x+=x; mean_y+=y; var_x+=x*x; var_y+=y*y; cov_xy+=x*y; - n++; - } - } - cout<=5) { - mean_x/=n; mean_y/=n; var_x/=n; var_y/=n; cov_xy/=n; - var_x-=mean_x*mean_x; var_y-=mean_y*mean_y; cov_xy-=mean_x*mean_y; - b=cov_xy/var_x; - a=mean_y-b*mean_x; - crt_factor=a/(b*(bin_size+0.5))+1; - if (i==j) { - mat_S[i][j]*=crt_factor; - } else { - mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor; - } - cout<size1; i++) { + for (size_t j = i; j < S_mat->size2; j++) { + + // Correct mat_S. + n = 0; + var_y = 0; + var_x = 0; + mean_y = 0; + mean_x = 0; + cov_xy = 0; + for (size_t k = 0; k < bin_size; k++) { + if (j == i) { + y = mat3d_Sbin[i][j][k]; + } else { + y = mat3d_Sbin[i][j][k] + mat3d_Sbin[j][i][k]; + } + x = k + 0.5; + cout << y << ", "; + if (y > 0) { + y = 1 / sqrt(y); + mean_x += x; + mean_y += y; + var_x += x * x; + var_y += y * y; + cov_xy += x * y; + n++; + } + } + cout << endl; + + if (n >= 5) { + mean_x /= n; + mean_y /= n; + var_x /= n; + var_y /= n; + cov_xy /= n; + var_x -= mean_x * mean_x; + var_y -= mean_y * mean_y; + cov_xy -= mean_x * mean_y; + b = cov_xy / var_x; + a = mean_y - b * mean_x; + crt_factor = a / (b * (bin_size + 0.5)) + 1; + if (i == j) { + mat_S[i][j] *= crt_factor; + } else { + mat_S[i][j] *= crt_factor; + mat_S[j][i] *= crt_factor; + } + cout << crt_factor << endl; + + // Correct qvar. + if (i == j) { + vec_qvar[i] *= crt_factor; + } + } } } } // Save to gsl_vector and gsl_matrix: qvar_vec, S_mat, Svar_mat. - for (size_t i=0; isize1; i++) { - d1=gsl_vector_get(qvar_vec, i)+2*vec_qvar[i]; + for (size_t i = 0; i < S_mat->size1; i++) { + d1 = gsl_vector_get(qvar_vec, i) + 2 * vec_qvar[i]; gsl_vector_set(qvar_vec, i, d1); - for (size_t j=0; jsize2; j++) { - if (i==j) { - gsl_matrix_set(S_mat, i, j, mat_S[i][i]); - gsl_matrix_set(Svar_mat, i, j, - 2.0*mat_Svar[i][i]*ns_test*ns_test/(2.0*ns_pair) ); + for (size_t j = 0; j < S_mat->size2; j++) { + if (i == j) { + gsl_matrix_set(S_mat, i, j, mat_S[i][i]); + gsl_matrix_set(Svar_mat, i, j, 2.0 * mat_Svar[i][i] * ns_test * + ns_test / (2.0 * ns_pair)); } else { - gsl_matrix_set(S_mat, i, j, mat_S[i][j]+mat_S[j][i]); - gsl_matrix_set(Svar_mat, i, j, - 2.0*(mat_Svar[i][j]+mat_Svar[j][i])* - ns_test*ns_test/(2.0*ns_pair) ); + gsl_matrix_set(S_mat, i, j, mat_S[i][j] + mat_S[j][i]); + gsl_matrix_set(Svar_mat, i, j, 2.0 * (mat_Svar[i][j] + mat_Svar[j][i]) * + ns_test * ns_test / (2.0 * ns_pair)); } } } - - infile.clear(); infile.close(); @@ -1157,170 +1300,175 @@ void ReadFile_cor (const string &file_cor, const vector &vec_rs, // compute the variance for S, use a set of genotypes, phenotypes, and // individual ids, and snp category label. void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, - const gsl_matrix *Svar_mat, const gsl_vector *q_vec, - const gsl_vector *s_vec, const double df, - vector &v_pve, vector &v_se_pve, - double &pve_total, double &se_pve_total, - vector &v_sigma2, vector &v_se_sigma2, - vector &v_enrich, vector &v_se_enrich) { - size_t n_vc=S_mat->size1; - - gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); - - gsl_vector *pve=gsl_vector_alloc (n_vc); - gsl_vector *pve_plus=gsl_vector_alloc (n_vc+1); - gsl_vector *tmp=gsl_vector_alloc (n_vc+1); - gsl_vector *sigma2persnp=gsl_vector_alloc (n_vc); - gsl_vector *enrich=gsl_vector_alloc (n_vc); - gsl_vector *se_pve=gsl_vector_alloc (n_vc); - gsl_vector *se_sigma2persnp=gsl_vector_alloc (n_vc); - gsl_vector *se_enrich=gsl_vector_alloc (n_vc); + const gsl_matrix *Svar_mat, const gsl_vector *q_vec, + const gsl_vector *s_vec, const double df, vector &v_pve, + vector &v_se_pve, double &pve_total, double &se_pve_total, + vector &v_sigma2, vector &v_se_sigma2, + vector &v_enrich, vector &v_se_enrich) { + size_t n_vc = S_mat->size1; + + gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc); + + gsl_vector *pve = gsl_vector_alloc(n_vc); + gsl_vector *pve_plus = gsl_vector_alloc(n_vc + 1); + gsl_vector *tmp = gsl_vector_alloc(n_vc + 1); + gsl_vector *sigma2persnp = gsl_vector_alloc(n_vc); + gsl_vector *enrich = gsl_vector_alloc(n_vc); + gsl_vector *se_pve = gsl_vector_alloc(n_vc); + gsl_vector *se_sigma2persnp = gsl_vector_alloc(n_vc); + gsl_vector *se_enrich = gsl_vector_alloc(n_vc); double d; // Calculate S^{-1}q. - gsl_matrix_memcpy (tmp_mat, S_mat); + gsl_matrix_memcpy(tmp_mat, S_mat); int sig; - gsl_permutation * pmt=gsl_permutation_alloc (n_vc); - LUDecomp (tmp_mat, pmt, &sig); - LUInvert (tmp_mat, pmt, Si_mat); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc); + LUDecomp(tmp_mat, pmt, &sig); + LUInvert(tmp_mat, pmt, Si_mat); // Calculate sigma2snp and pve. - gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); + gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); gsl_vector_memcpy(sigma2persnp, pve); gsl_vector_div(sigma2persnp, s_vec); // Get qvar_mat. - gsl_matrix_memcpy (qvar_mat, Vq); - gsl_matrix_scale (qvar_mat, 1.0/(df*df)); + gsl_matrix_memcpy(qvar_mat, Vq); + gsl_matrix_scale(qvar_mat, 1.0 / (df * df)); // Calculate variance for these estimates. - for (size_t i=0; isize1, n2=K->size2; - size_t n_vc=n2/n1; +void VC::CalcVChe(const gsl_matrix *K, const gsl_matrix *W, + const gsl_vector *y) { + size_t n1 = K->size1, n2 = K->size2; + size_t n_vc = n2 / n1; - double r=(double)n1/(double)(n1 - W->size2); + double r = (double)n1 / (double)(n1 - W->size2); double var_y, var_y_new; double d, tr, s, v; vector traceG_new; // New matrices/vectors. - gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2); - gsl_vector *y_scale=gsl_vector_alloc (n1); - gsl_matrix *Kry=gsl_matrix_alloc (n1, n_vc); - gsl_matrix *yKrKKry=gsl_matrix_alloc (n_vc, n_vc*(n_vc+1) ); - gsl_vector *KKry=gsl_vector_alloc (n1); + gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2); + gsl_vector *y_scale = gsl_vector_alloc(n1); + gsl_matrix *Kry = gsl_matrix_alloc(n1, n_vc); + gsl_matrix *yKrKKry = gsl_matrix_alloc(n_vc, n_vc * (n_vc + 1)); + gsl_vector *KKry = gsl_vector_alloc(n1); // Old matrices/vectors. - gsl_vector *pve=gsl_vector_alloc (n_vc); - gsl_vector *se_pve=gsl_vector_alloc (n_vc); - gsl_vector *q_vec=gsl_vector_alloc (n_vc); - gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_vector *pve = gsl_vector_alloc(n_vc); + gsl_vector *se_pve = gsl_vector_alloc(n_vc); + gsl_vector *q_vec = gsl_vector_alloc(n_vc); + gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc); // Center and scale K by W. - for (size_t i=0; i1) { - cout<<"total pve = "<x, i) << " "; } else { - cout<x, i))<<" "; + cout << exp(gsl_vector_get(s_fdf->x, i)) << " "; } } - cout<f, 1e-3); - } - while (status==GSL_CONTINUE && iterf, 1e-3); + } while (status == GSL_CONTINUE && iter < max_iter); // Obtain Hessian and Hessian inverse. - int sig=LogRL_dev12 (s_fdf->x, ¶ms, dev1, dev2); + int sig = LogRL_dev12(s_fdf->x, ¶ms, dev1, dev2); - gsl_permutation * pmt=gsl_permutation_alloc (n_vc+1); - LUDecomp (dev2, pmt, &sig); - LUInvert (dev2, pmt, Hessian); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc + 1); + LUDecomp(dev2, pmt, &sig); + LUInvert(dev2, pmt, Hessian); gsl_permutation_free(pmt); // Save sigma2 and se_sigma2. - v_sigma2.clear(); v_se_sigma2.clear(); - for (size_t i=0; ix, i); + d = gsl_vector_get(s_fdf->x, i); } else { - d=exp(gsl_vector_get(s_fdf->x, i)); + d = exp(gsl_vector_get(s_fdf->x, i)); } v_sigma2.push_back(d); if (noconstrain) { - d=-1.0*gsl_matrix_get(Hessian, i, i); + d = -1.0 * gsl_matrix_get(Hessian, i, i); } else { - d=-1.0*d*d*gsl_matrix_get(Hessian, i, i); + d = -1.0 * d * d * gsl_matrix_get(Hessian, i, i); } v_se_sigma2.push_back(sqrt(d)); } - s=0; - for (size_t i=0; ix, i); - d1=1; + d1 = gsl_vector_get(s_fdf->x, i); + d1 = 1; } else { - d1=exp(gsl_vector_get(s_fdf->x, i)); + d1 = exp(gsl_vector_get(s_fdf->x, i)); } - if (kx, j); - d2=1; - } else { - d2=exp(gsl_vector_get(s_fdf->x, j)); - } - - if (kx, j); + d2 = 1; + } else { + d2 = exp(gsl_vector_get(s_fdf->x, j)); + } + + if (k < n_vc) { + if (j == k) { + d2 *= v_traceG[k] * (s - v_sigma2[k] * v_traceG[k]) / (s * s); + } else if (j == n_vc) { + d2 *= -1 * v_traceG[k] * v_sigma2[k] / (s * s); + } else { + d2 *= -1 * v_traceG[j] * v_traceG[k] * v_sigma2[k] / (s * s); + } + } else { + if (j == k) { + d2 *= -1 * (s - v_sigma2[n_vc]) / (s * s); + } else { + d2 *= v_traceG[j] * v_sigma2[n_vc] / (s * s); + } + } + + d += -1.0 * d1 * d2 * gsl_matrix_get(Hessian, i, j); + } + } + + if (k < n_vc) { + v_se_pve.push_back(sqrt(d)); } else { - se_pve_total=sqrt(d); + se_pve_total = sqrt(d); } } @@ -1758,252 +1923,265 @@ void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K, } // Ks are not scaled. -void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W, - const gsl_vector *y) { - size_t n1=K->size1, n2=K->size2; - size_t n_vc=n2/n1; +void VC::CalcVCacl(const gsl_matrix *K, const gsl_matrix *W, + const gsl_vector *y) { + size_t n1 = K->size1, n2 = K->size2; + size_t n_vc = n2 / n1; double d, y2_sum, tau_inv, se_tau_inv; // New matrices/vectors. - gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2); - gsl_vector *y_scale=gsl_vector_alloc (n1); - gsl_vector *y2=gsl_vector_alloc (n1); - gsl_vector *n1_vec=gsl_vector_alloc (n1); - gsl_matrix *Ay=gsl_matrix_alloc (n1, n_vc); - gsl_matrix *K2=gsl_matrix_alloc (n1, n_vc*n_vc); - gsl_matrix *K_tmp=gsl_matrix_alloc (n1, n1); - gsl_matrix *V_mat=gsl_matrix_alloc (n1, n1); + gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2); + gsl_vector *y_scale = gsl_vector_alloc(n1); + gsl_vector *y2 = gsl_vector_alloc(n1); + gsl_vector *n1_vec = gsl_vector_alloc(n1); + gsl_matrix *Ay = gsl_matrix_alloc(n1, n_vc); + gsl_matrix *K2 = gsl_matrix_alloc(n1, n_vc * n_vc); + gsl_matrix *K_tmp = gsl_matrix_alloc(n1, n1); + gsl_matrix *V_mat = gsl_matrix_alloc(n1, n1); // Old matrices/vectors. - gsl_vector *pve=gsl_vector_alloc (n_vc); - gsl_vector *se_pve=gsl_vector_alloc (n_vc); - gsl_vector *q_vec=gsl_vector_alloc (n_vc); - gsl_matrix *S1=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *S2=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *J_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_vector *pve = gsl_vector_alloc(n_vc); + gsl_vector *se_pve = gsl_vector_alloc(n_vc); + gsl_vector *q_vec = gsl_vector_alloc(n_vc); + gsl_matrix *S1 = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *S2 = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *S_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *J_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc); int sig; - gsl_permutation * pmt=gsl_permutation_alloc (n_vc); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc); // Center and scale K by W, and standardize K further so that all // diagonal elements are 1 - for (size_t i=0; isize; i++) { - y2_sum+=gsl_vector_get(y2, i); + y2_sum = 0; + for (size_t i = 0; i < y2->size; i++) { + y2_sum += gsl_vector_get(y2, i); } // Compute the n_vc size q vector. - for (size_t i=0; isize1; t++) { - gsl_vector_view Ktmp_col=gsl_matrix_column (K_tmp, t); - gsl_vector_add (n1_vec, &Ktmp_col.vector); + for (size_t t = 0; t < K_tmp->size1; t++) { + gsl_vector_view Ktmp_col = gsl_matrix_column(K_tmp, t); + gsl_vector_add(n1_vec, &Ktmp_col.vector); } - gsl_vector_add_constant (n1_vec, -1.0); + gsl_vector_add_constant(n1_vec, -1.0); // Compute S1. - gsl_blas_ddot (n1_vec, y2, &d); - gsl_matrix_set (S1, i, j, 2*d); - if (i!=j) {gsl_matrix_set (S1, j, i, 2*d);} + gsl_blas_ddot(n1_vec, y2, &d); + gsl_matrix_set(S1, i, j, 2 * d); + if (i != j) { + gsl_matrix_set(S1, j, i, 2 * d); + } // Compute S2. - d=0; - for (size_t t=0; tsize; t++) { - d+=gsl_vector_get (n1_vec, t); + d = 0; + for (size_t t = 0; t < n1_vec->size; t++) { + d += gsl_vector_get(n1_vec, t); + } + gsl_matrix_set(S2, i, j, d); + if (i != j) { + gsl_matrix_set(S2, j, i, d); } - gsl_matrix_set (S2, i, j, d); - if (i!=j) {gsl_matrix_set (S2, j, i, d);} // Save information to compute J. - gsl_vector_view K2col1=gsl_matrix_column (K2, n_vc*i+j); - gsl_vector_view K2col2=gsl_matrix_column (K2, n_vc*j+i); + gsl_vector_view K2col1 = gsl_matrix_column(K2, n_vc * i + j); + gsl_vector_view K2col2 = gsl_matrix_column(K2, n_vc * j + i); gsl_vector_memcpy(&K2col1.vector, n1_vec); - if (i!=j) {gsl_vector_memcpy(&K2col2.vector, n1_vec);} + if (i != j) { + gsl_vector_memcpy(&K2col2.vector, n1_vec); + } } } // Iterate to solve tau and h's. - size_t it=0; - double s=1; - while (abs(s)>1e-3 && it<100) { + size_t it = 0; + double s = 1; + while (abs(s) > 1e-3 && it < 100) { // Update tau_inv. - gsl_blas_ddot (q_vec, pve, &d); - if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;} - tau_inv=y2_sum/(double)n1-d/((double)n1*((double)n1-1)); - if (it>0) {s/=tau_inv;} + gsl_blas_ddot(q_vec, pve, &d); + if (it > 0) { + s = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1)) - tau_inv; + } + tau_inv = y2_sum / (double)n1 - d / ((double)n1 * ((double)n1 - 1)); + if (it > 0) { + s /= tau_inv; + } // Update S. - gsl_matrix_memcpy (S_mat, S2); - gsl_matrix_scale (S_mat, -1*tau_inv); - gsl_matrix_add (S_mat, S1); + gsl_matrix_memcpy(S_mat, S2); + gsl_matrix_scale(S_mat, -1 * tau_inv); + gsl_matrix_add(S_mat, S1); // Update h=S^{-1}q. int sig; - gsl_permutation * pmt=gsl_permutation_alloc (n_vc); - LUDecomp (S_mat, pmt, &sig); - LUInvert (S_mat, pmt, Si_mat); - gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); + gsl_permutation *pmt = gsl_permutation_alloc(n_vc); + LUDecomp(S_mat, pmt, &sig); + LUInvert(S_mat, pmt, Si_mat); + gsl_blas_dgemv(CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); it++; } // Compute V matrix and A matrix (K_scale is destroyed, so need to // compute V first). - gsl_matrix_set_zero (V_mat); - for (size_t i=0; i1) { - cout<<"total pve = "<size1; - gsl_vector *geno=gsl_vector_alloc (ni_test); - gsl_vector *geno_miss=gsl_vector_alloc (ni_test); - gsl_vector *wz=gsl_vector_alloc (w->size); - gsl_vector_memcpy (wz, z); - gsl_vector_mul(wz, w); - - for (size_t t=0; t &indicator_idv, vector &indicator_snp, + const vector &vec_cat, const gsl_vector *w, + const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + string line; + char *ch_ptr; + + size_t n_miss; + double d, geno_mean, geno_var; + + size_t ni_test = XWz->size1; + gsl_vector *geno = gsl_vector_alloc(ni_test); + gsl_vector *geno_miss = gsl_vector_alloc(ni_test); + gsl_vector *wz = gsl_vector_alloc(w->size); + gsl_vector_memcpy(wz, z); + gsl_vector_mul(wz, w); + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + geno_mean = 0.0; + n_miss = 0; + geno_var = 0.0; + gsl_vector_set_all(geno_miss, 0); + + size_t j = 0; + for (size_t i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ch_ptr = strtok(NULL, " , \t"); + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(geno_miss, i, 0); + n_miss++; + } else { + d = atof(ch_ptr); + gsl_vector_set(geno, j, d); + gsl_vector_set(geno_miss, j, 1); + geno_mean += d; + geno_var += d * d; + } + j++; + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(geno_miss, i) == 0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]); + d = gsl_vector_get(wz, ns_test); + gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector); + + ns_test++; + } + + cout << endl; + + gsl_vector_free(geno); + gsl_vector_free(geno_miss); + gsl_vector_free(wz); + + infile.close(); + infile.clear(); + + return true; } // Read PLINK bed file and compute XWz. -bool PlinkXwz (const string &file_bed, const int display_pace, - vector &indicator_idv, vector &indicator_snp, - const vector &vec_cat, const gsl_vector *w, - const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"< b; - - size_t n_miss, ci_total, ci_test; - double d, geno_mean, geno_var; - - size_t ni_test=XWz->size1; - size_t ni_total=indicator_idv.size(); - gsl_vector *geno=gsl_vector_alloc (ni_test); - gsl_vector *wz=gsl_vector_alloc (w->size); - gsl_vector_memcpy (wz, z); - gsl_vector_mul(wz, w); - - int n_bit; - - // Calculate n_bit and c, the number of bit for each snp. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (size_t t=0; t &indicator_idv, vector &indicator_snp, + const vector &vec_cat, const gsl_vector *w, + const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + char ch[1]; + bitset<8> b; + + size_t n_miss, ci_total, ci_test; + double d, geno_mean, geno_var; + + size_t ni_test = XWz->size1; + size_t ni_total = indicator_idv.size(); + gsl_vector *geno = gsl_vector_alloc(ni_test); + gsl_vector *wz = gsl_vector_alloc(w->size); + gsl_vector_memcpy(wz, z); + gsl_vector_mul(wz, w); + + int n_bit; + + // Calculate n_bit and c, the number of bit for each snp. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + geno_mean = 0.0; + n_miss = 0; + ci_total = 0; + geno_var = 0.0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(geno, ci_test, 2.0); + geno_mean += 2.0; + geno_var += 4.0; + } else { + gsl_vector_set(geno, ci_test, 1.0); + geno_mean += 1.0; + geno_var += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(geno, ci_test, 0.0); + } else { + gsl_vector_set(geno, ci_test, -9.0); + n_miss++; + } + } + + ci_test++; + ci_total++; + } + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + d = gsl_vector_get(geno, i); + if (d == -9.0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + gsl_vector_view XWz_col = gsl_matrix_column(XWz, vec_cat[ns_test]); + d = gsl_vector_get(wz, ns_test); + gsl_blas_daxpy(d / sqrt(geno_var), geno, &XWz_col.vector); + + ns_test++; + } + cout << endl; + + gsl_vector_free(geno); + gsl_vector_free(wz); + + infile.close(); + infile.clear(); + + return true; } // Read multiple genotype files and compute XWz. -bool MFILEXwz (const size_t mfile_mode, const string &file_mfile, - const int display_pace, vector &indicator_idv, - vector > &mindicator_snp, - const vector &vec_cat, const gsl_vector *w, - const gsl_vector *z, gsl_matrix *XWz) { +bool MFILEXwz(const size_t mfile_mode, const string &file_mfile, + const int display_pace, vector &indicator_idv, + vector> &mindicator_snp, + const vector &vec_cat, const gsl_vector *w, + const gsl_vector *z, gsl_matrix *XWz) { gsl_matrix_set_zero(XWz); - igzstream infile (file_mfile.c_str(), igzstream::in); + igzstream infile(file_mfile.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open mfile file: "< &indicator_idv, vector &indicator_snp, - const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<size1; - gsl_vector *geno=gsl_vector_alloc (ni_test); - gsl_vector *geno_miss=gsl_vector_alloc (ni_test); - - for (size_t t=0; tsize2; i++) { - gsl_vector_const_view XWz_col= - gsl_matrix_const_column(XWz, i); - gsl_blas_ddot (geno, &XWz_col.vector, &d); - gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var)); - } - - ns_test++; - } - - cout< &indicator_idv, vector &indicator_snp, + const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return false; + } + + string line; + char *ch_ptr; + + size_t n_miss; + double d, geno_mean, geno_var; + + size_t ni_test = XWz->size1; + gsl_vector *geno = gsl_vector_alloc(ni_test); + gsl_vector *geno_miss = gsl_vector_alloc(ni_test); + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + !safeGetline(infile, line).eof(); + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + geno_mean = 0.0; + n_miss = 0; + geno_var = 0.0; + gsl_vector_set_all(geno_miss, 0); + + size_t j = 0; + for (size_t i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ch_ptr = strtok(NULL, " , \t"); + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(geno_miss, i, 0); + n_miss++; + } else { + d = atof(ch_ptr); + gsl_vector_set(geno, j, d); + gsl_vector_set(geno_miss, j, 1); + geno_mean += d; + geno_var += d * d; + } + j++; + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + if (gsl_vector_get(geno_miss, i) == 0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + for (size_t i = 0; i < XWz->size2; i++) { + gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i); + gsl_blas_ddot(geno, &XWz_col.vector, &d); + gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var)); + } + + ns_test++; + } + + cout << endl; + + gsl_vector_free(geno); + gsl_vector_free(geno_miss); + + infile.close(); + infile.clear(); + + return true; } // Read PLINK bed file and compute XWz. -bool PlinkXtXwz (const string &file_bed, const int display_pace, - vector &indicator_idv, vector &indicator_snp, - const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"< b; - - size_t n_miss, ci_total, ci_test; - double d, geno_mean, geno_var; - - size_t ni_test=XWz->size1; - size_t ni_total=indicator_idv.size(); - gsl_vector *geno=gsl_vector_alloc (ni_test); - - int n_bit; - - // Calculate n_bit and c, the number of bit for each snp. - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - // Print the first three magic numbers. - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - for (size_t t=0; tsize2; i++) { - gsl_vector_const_view XWz_col= - gsl_matrix_const_column(XWz, i); - gsl_blas_ddot (geno, &XWz_col.vector, &d); - gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var)); - } - - ns_test++; - } - cout< &indicator_idv, vector &indicator_snp, + const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return false; + } + + char ch[1]; + bitset<8> b; + + size_t n_miss, ci_total, ci_test; + double d, geno_mean, geno_var; + + size_t ni_test = XWz->size1; + size_t ni_total = indicator_idv.size(); + gsl_vector *geno = gsl_vector_alloc(ni_test); + + int n_bit; + + // Calculate n_bit and c, the number of bit for each snp. + if (ni_total % 4 == 0) { + n_bit = ni_total / 4; + } else { + n_bit = ni_total / 4 + 1; + } + + // Print the first three magic numbers. + for (int i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + for (size_t t = 0; t < indicator_snp.size(); ++t) { + if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { + ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1); + } + if (indicator_snp[t] == 0) { + continue; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + geno_mean = 0.0; + n_miss = 0; + ci_total = 0; + geno_var = 0.0; + ci_test = 0; + for (int i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0; + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == ni_total) { + break; + } + if (indicator_idv[ci_total] == 0) { + ci_total++; + continue; + } + + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(geno, ci_test, 2.0); + geno_mean += 2.0; + geno_var += 4.0; + } else { + gsl_vector_set(geno, ci_test, 1.0); + geno_mean += 1.0; + geno_var += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(geno, ci_test, 0.0); + } else { + gsl_vector_set(geno, ci_test, -9.0); + n_miss++; + } + } + + ci_test++; + ci_total++; + } + } + + geno_mean /= (double)(ni_test - n_miss); + geno_var += geno_mean * geno_mean * (double)n_miss; + geno_var /= (double)ni_test; + geno_var -= geno_mean * geno_mean; + + for (size_t i = 0; i < ni_test; ++i) { + d = gsl_vector_get(geno, i); + if (d == -9.0) { + gsl_vector_set(geno, i, geno_mean); + } + } + + gsl_vector_add_constant(geno, -1.0 * geno_mean); + + for (size_t i = 0; i < XWz->size2; i++) { + gsl_vector_const_view XWz_col = gsl_matrix_const_column(XWz, i); + gsl_blas_ddot(geno, &XWz_col.vector, &d); + gsl_matrix_set(XtXWz, ns_test, i, d / sqrt(geno_var)); + } + + ns_test++; + } + cout << endl; + + gsl_vector_free(geno); + + infile.close(); + infile.clear(); + + return true; } // Read multiple genotype files and compute XWz. -bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile, - const int display_pace, vector &indicator_idv, - vector > &mindicator_snp, const gsl_matrix *XWz, - gsl_matrix *XtXWz) { +bool MFILEXtXwz(const size_t mfile_mode, const string &file_mfile, + const int display_pace, vector &indicator_idv, + vector> &mindicator_snp, const gsl_matrix *XWz, + gsl_matrix *XtXWz) { gsl_matrix_set_zero(XtXWz); - igzstream infile (file_mfile.c_str(), igzstream::in); + igzstream infile(file_mfile.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open mfile file: "< &vec_cat, const vector &v_pve, - vector &v_se_pve, double &pve_total, - double &se_pve_total, vector &v_sigma2, - vector &v_se_sigma2, vector &v_enrich, - vector &v_se_enrich) { - size_t n_vc=XWz->size2, ns_test=w->size, ni_test=XWz->size1; + const gsl_matrix *XtXWz, const gsl_matrix *S_mat, + const gsl_matrix *Svar_mat, const gsl_vector *w, + const gsl_vector *z, const gsl_vector *s_vec, + const vector &vec_cat, const vector &v_pve, + vector &v_se_pve, double &pve_total, double &se_pve_total, + vector &v_sigma2, vector &v_se_sigma2, + vector &v_enrich, vector &v_se_enrich) { + size_t n_vc = XWz->size2, ns_test = w->size, ni_test = XWz->size1; // Set up matrices. - gsl_vector *w_pve=gsl_vector_alloc (ns_test); - gsl_vector *wz=gsl_vector_alloc (ns_test); - gsl_vector *zwz=gsl_vector_alloc (n_vc); - gsl_vector *zz=gsl_vector_alloc (n_vc); - gsl_vector *Xz_pve=gsl_vector_alloc (ni_test); - gsl_vector *WXtXWz=gsl_vector_alloc (ns_test); - - gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc); - gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_vector *w_pve = gsl_vector_alloc(ns_test); + gsl_vector *wz = gsl_vector_alloc(ns_test); + gsl_vector *zwz = gsl_vector_alloc(n_vc); + gsl_vector *zz = gsl_vector_alloc(n_vc); + gsl_vector *Xz_pve = gsl_vector_alloc(ni_test); + gsl_vector *WXtXWz = gsl_vector_alloc(ns_test); + + gsl_matrix *Si_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *Var_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *tmp_mat1 = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *VarEnrich_mat = gsl_matrix_alloc(n_vc, n_vc); + gsl_matrix *qvar_mat = gsl_matrix_alloc(n_vc, n_vc); double d, s0, s1, s, s_pve, s_snp; // Compute wz and zwz. - gsl_vector_memcpy (wz, z); - gsl_vector_mul (wz, w); + gsl_vector_memcpy(wz, z); + gsl_vector_mul(wz, w); - gsl_vector_set_zero (zwz); - gsl_vector_set_zero (zz); - for (size_t i=0; isize; i++) { - d=gsl_vector_get (wz, i)*gsl_vector_get (z, i); - d+=gsl_vector_get (zwz, vec_cat[i]); - gsl_vector_set (zwz, vec_cat[i], d); + gsl_vector_set_zero(zwz); + gsl_vector_set_zero(zz); + for (size_t i = 0; i < w->size; i++) { + d = gsl_vector_get(wz, i) * gsl_vector_get(z, i); + d += gsl_vector_get(zwz, vec_cat[i]); + gsl_vector_set(zwz, vec_cat[i], d); - d=gsl_vector_get (z, i)*gsl_vector_get (z, i); - d+=gsl_vector_get (zz, vec_cat[i]); - gsl_vector_set (zz, vec_cat[i], d); + d = gsl_vector_get(z, i) * gsl_vector_get(z, i); + d += gsl_vector_get(zz, vec_cat[i]); + gsl_vector_set(zz, vec_cat[i], d); } // Compute wz, ve and Xz_pve. - gsl_vector_set_zero (Xz_pve); s_pve=0; s_snp=0; - for (size_t i=0; isize; i++) { - d=v_pve[vec_cat[i]]/gsl_vector_get(s_vec, vec_cat[i]); - gsl_vector_set (w_pve, i, d); + for (size_t i = 0; i < w->size; i++) { + d = v_pve[vec_cat[i]] / gsl_vector_get(s_vec, vec_cat[i]); + gsl_vector_set(w_pve, i, d); } // Compute Vq (in qvar_mat). - s0=1-s_pve; - for (size_t i=0; i