/* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_multiroots.h" #include "gsl/gsl_min.h" #include "Eigen/Dense" #include "param.h" #include "io.h" #include "lapack.h" #include "eigenlib.h" #include "gzstream.h" #include "mathfunc.h" #include "lmm.h" #include "vc.h" using namespace std; using namespace Eigen; // In this file, X, Y are already transformed (i.e. UtX and UtY). void VC::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; file_cat=cPar.file_cat; file_beta=cPar.file_beta; file_cor=cPar.file_cor; setSnps=cPar.setSnps; file_out=cPar.file_out; path_out=cPar.path_out; time_UtX=0.0; time_opt=0.0; v_traceG=cPar.v_traceG; ni_total=cPar.ni_total; ns_total=cPar.ns_total; ns_test=cPar.ns_test; crt=cPar.crt; window_cm=cPar.window_cm; window_bp=cPar.window_bp; window_ns=cPar.window_ns; n_vc=cPar.n_vc; return; } void VC::CopyToParam (PARAM &cPar) { cPar.time_UtX=time_UtX; cPar.time_opt=time_opt; cPar.v_pve=v_pve; cPar.v_se_pve=v_se_pve; cPar.v_sigma2=v_sigma2; cPar.v_se_sigma2=v_se_sigma2; cPar.pve_total=pve_total; cPar.se_pve_total=se_pve_total; cPar.v_traceG=v_traceG; cPar.v_beta=v_beta; cPar.v_se_beta=v_se_beta; cPar.ni_total=ni_total; cPar.ns_total=ns_total; cPar.ns_test=ns_test; cPar.n_vc=n_vc; return; } void VC::WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec, const gsl_vector *qvar_vec, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat) { string file_str; file_str=path_out+"/"+file_out; file_str+=".qvec.txt"; ofstream outfile_q (file_str.c_str(), ofstream::out); if (!outfile_q) { cout<<"error writing file: "<size; i++) { outfile_q<size; i++) { outfile_q<size; i++) { outfile_q<size1; i++) { for (size_t j=0; jsize2; j++) { outfile_s<size1; i++) { for (size_t j=0; jsize2; j++) { outfile_s<K)->size1, n_vc=log_sigma2->size-1, n_cvt=(p->W)->size2; gsl_matrix *K_temp=gsl_matrix_alloc(n1, n1); gsl_matrix *HiW=gsl_matrix_alloc(n1, n_cvt); gsl_matrix *WtHiW=gsl_matrix_alloc(n_cvt, n_cvt); gsl_matrix *WtHiWi=gsl_matrix_alloc(n_cvt, n_cvt); gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1); double sigma2; // Calculate H = \sum_i^{k+1} \sigma_i^2 K_i. gsl_matrix_set_zero (p->P); for (size_t i=0; iK, 0, n1*i, n1, n1); gsl_matrix_memcpy (K_temp, &K_sub.matrix); } // When unconstrained, update on sigma2 instead of log_sigma2. if (p->noconstrain) { sigma2=gsl_vector_get (log_sigma2, i); } else { sigma2=exp(gsl_vector_get (log_sigma2, i) ); } gsl_matrix_scale(K_temp, sigma2); gsl_matrix_add (p->P, K_temp); } // Calculate H^{-1}. eigenlib_invert(p->P); eigenlib_dgemm ("N", "N", 1.0, p->P, p->W, 0.0, HiW); eigenlib_dgemm ("T", "N", 1.0, p->W, HiW, 0.0, WtHiW); eigenlib_invert(WtHiW); gsl_matrix_memcpy(WtHiWi, WtHiW); eigenlib_dgemm ("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi); eigenlib_dgemm ("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P); // Calculate Py, KPy, PKPy. gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py); double d; for (size_t i=0; iKPy_mat, i); gsl_vector_view PKPy=gsl_matrix_column (p->PKPy_mat, i); if (i==n_vc) { gsl_vector_memcpy (&KPy.vector, p->Py); } else { gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1); // Seems to be important to use gsl dgemv here instead of // eigenlib_dgemv; otherwise. gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector); } gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector); // When phenotypes are not normalized well, then some values in // the following matrix maybe NaN; change that to 0; this seems to // only happen when eigenlib_dgemv was used above. for (size_t j=0; jKPy_mat->size1; j++) { d=gsl_matrix_get (p->KPy_mat, j, i); if (std::isnan(d)) { gsl_matrix_set (p->KPy_mat, j, i, 0); cout<<"nan appears in "<PKPy_mat, j, i); if (std::isnan(d)) { gsl_matrix_set (p->PKPy_mat, j, i, 0); cout<<"nan appears in "<K)->size1, n_vc=log_sigma2->size-1; double tr, d; // Update parameters. UpdateParam (log_sigma2, p); // Calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy. for (size_t i=0; iP, l, l); } } else { tr=0; for (size_t l=0; lP, l); gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l); gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); tr+=d; } } gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i); gsl_blas_ddot(p->Py, &KPy_i.vector, &d); if (p->noconstrain) { d=(-0.5*tr+0.5*d); } else { d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i)); } gsl_vector_set(dev1, i, d); } return GSL_SUCCESS; } int LogRL_dev2 (const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) { VC_PARAM *p=(VC_PARAM *) params; size_t n_vc=log_sigma2->size-1; double d, sigma2_i, sigma2_j; // Update parameters. UpdateParam (log_sigma2, p); // Calculate dev2 = 0.5(yPKPKPy). for (size_t i=0; iKPy_mat, i); if (p->noconstrain) { sigma2_i=gsl_vector_get(log_sigma2, i); } else { sigma2_i=exp(gsl_vector_get(log_sigma2, i)); } for (size_t j=i; jPKPy_mat, j); gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d); if (p->noconstrain) { sigma2_j=gsl_vector_get(log_sigma2, j); d*=-0.5; } else { sigma2_j=exp(gsl_vector_get(log_sigma2, j)); d*=-0.5*sigma2_i*sigma2_j; } gsl_matrix_set(dev2, i, j, d); if (j!=i) {gsl_matrix_set(dev2, j, i, d);} } } gsl_matrix_memcpy (p->Hessian, dev2); return GSL_SUCCESS; } int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, gsl_matrix *dev2) { VC_PARAM *p=(VC_PARAM *) params; size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1; double tr, d, sigma2_i, sigma2_j; // Update parameters. UpdateParam (log_sigma2, p); for (size_t i=0; iP, l, l); } } else { tr=0; for (size_t l=0; lP, l); gsl_vector_const_view K_col=gsl_matrix_const_column (p->K, n1*i+l); gsl_blas_ddot(&P_row.vector, &K_col.vector, &d); tr+=d; } } gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i); gsl_blas_ddot(p->Py, &KPy_i.vector, &d); if (p->noconstrain) { sigma2_i=gsl_vector_get(log_sigma2, i); d=(-0.5*tr+0.5*d); } else { sigma2_i=exp(gsl_vector_get(log_sigma2, i)); d=(-0.5*tr+0.5*d)*sigma2_i; } gsl_vector_set(dev1, i, d); for (size_t j=i; jPKPy_mat, j); gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d); if (p->noconstrain) { sigma2_j=gsl_vector_get(log_sigma2, j); d*=-0.5; } else { sigma2_j=exp(gsl_vector_get(log_sigma2, j)); d*=-0.5*sigma2_i*sigma2_j; } gsl_matrix_set(dev2, i, j, d); if (j!=i) {gsl_matrix_set(dev2, j, i, d);} } } gsl_matrix_memcpy (p->Hessian, dev2); return GSL_SUCCESS; } // Read header to determine which column contains which item. bool ReadHeader_vc (const string &line, HEADER &header) { string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID", "rsid","RSID"}; set rs_set(rs_ptr, rs_ptr+10); string chr_ptr[]={"chr","CHR"}; set chr_set(chr_ptr, chr_ptr+2); string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", "bp", "BP"}; set pos_set(pos_ptr, pos_ptr+8); string cm_ptr[]={"cm","CM"}; set cm_set(cm_ptr, cm_ptr+2); string a1_ptr[]={"a1","A1","allele1","ALLELE1"}; set a1_set(a1_ptr, a1_ptr+4); string a0_ptr[]={"a0","A0","allele0","ALLELE0"}; set a0_set(a0_ptr, a0_ptr+4); string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"}; set z_set(z_ptr, z_ptr+6); string beta_ptr[]={"beta","BETA","b","B"}; set beta_set(beta_ptr, beta_ptr+4); string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"}; set sebeta_set(sebeta_ptr, sebeta_ptr+4); string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"}; set chisq_set(chisq_ptr, chisq_ptr+4); string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"}; set p_set(p_ptr, p_ptr+6); string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"}; set n_set(n_ptr, n_ptr+6); string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"}; set nmis_set(nmis_ptr, nmis_ptr+6); string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"}; set nobs_set(nobs_ptr, nobs_ptr+4); string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq", "ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"}; set af_set(af_ptr, af_ptr+10); string var_ptr[]={"var","VAR"}; set var_set(var_ptr, var_ptr+2); string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"}; set ws_set(ws_ptr, ws_ptr+4); string cor_ptr[]={"cor","COR","r","R"}; set cor_set(cor_ptr, cor_ptr+4); header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; header.cor_col=0; header.coln=0; char *ch_ptr; string type; size_t n_error=0; ch_ptr=strtok ((char *)line.c_str(), " , \t"); while (ch_ptr!=NULL) { type=ch_ptr; if (rs_set.count(type)!=0) { if (header.rs_col==0) { header.rs_col=header.coln+1; } else { cout<<"error! more than two rs columns in the file."< &setSnps, vector &vec_rs, vector &vec_n, vector &vec_cm, vector &vec_bp, map &mapRS2in, map &mapRS2var) { vec_rs.clear(); vec_n.clear(); mapRS2in.clear(); mapRS2var.clear(); igzstream infile (file_cor.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open cov file: "<0) {vec_cm.push_back(d_cm);} else {vec_cm.push_back(d_cm);} if (d_pos>0) {vec_bp.push_back(d_pos);} else {vec_bp.push_back(d_pos);} // Record mapRS2in and mapRS2var. if (setSnps.size()==0 || setSnps.count(rs)!=0) { if (mapRS2in.count(rs)==0) { mapRS2in[rs]=1; if (header.var_col!=0) { mapRS2var[rs]=var_x; } else if (header.af_col!=0) { var_x=2.0*af*(1.0-af); mapRS2var[rs]=var_x; } else {} ns_test++; } else { cout<<"error! more than one snp has the same id "< &mapRS2cat, map &mapRS2in, map &mapRS2var, map &mapRS2nsamp, gsl_vector *q_vec, gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total, size_t &ns_total) { mapRS2nsamp.clear(); igzstream infile (file_beta.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open beta file: "< vec_q, vec_qvar, vec_s; for (size_t i=0; isize; i++) { vec_q.push_back(0.0); vec_qvar.push_back(0.0); vec_s.push_back(0.0); } // Read header. HEADER header; !safeGetline(infile, line).eof(); ReadHeader_vc (line, header); if (header.n_col==0 ) { if (header.nobs_col==0 && header.nmis_col==0) { cout<<"error! missing sample size in the beta file."<1) { cout<<"error! more than one snp has the same id "<size; i++) { gsl_vector_set(q_vec, i, vec_q[i]); gsl_vector_set(qvar_vec, i, 2.0*vec_qvar[i]); gsl_vector_set(s_vec, i, vec_s[i]); } infile.clear(); infile.close(); return; } // Read covariance file the second time. // Look for rs, n_mis+n_obs, var, window_size, cov. // If window_cm/bp/ns is provided, then use these max values to // calibrate estimates. void ReadFile_cor (const string &file_cor, const vector &vec_rs, const vector &vec_n, const vector &vec_cm, const vector &vec_bp, const map &mapRS2cat, const map &mapRS2in, const map &mapRS2var, const map &mapRS2nsamp, const size_t crt, const double &window_cm, const double &window_bp, const double &window_ns, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total, size_t &ns_total, size_t &ns_test, size_t &ns_pair) { igzstream infile (file_cor.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open cov file: "< > mat_S, mat_Svar, mat_tmp; vector vec_qvar, vec_tmp; vector > > mat3d_Sbin; for (size_t i=0; isize1; i++) { vec_qvar.push_back(0.0); } for (size_t i=0; isize1; i++) { mat_S.push_back(vec_qvar); mat_Svar.push_back(vec_qvar); } for (size_t k=0; ksize1; i++) { mat_tmp.push_back(vec_tmp); } for (size_t i=0; isize1; i++) { mat3d_Sbin.push_back(mat_tmp); } string rs, chr, a1, a0, type, pos, cm; size_t n_total=0, n_mis=0, n_obs=0; double d_pos1, d_pos2, d_pos, d_cm1, d_cm2, d_cm; ns_test=0; ns_total=0; ns_pair=0; ni_total=0; // Header. HEADER header; !safeGetline(infile, line).eof(); ReadHeader_vc (line, header); while (!safeGetline(infile, line).eof()) { // Do not read cor values this time; upto col_n-1. d_pos1=0; d_cm1=0; ch_ptr=strtok ((char *)line.c_str(), " , \t"); for (size_t i=0; isize1; i++) { for (size_t j=i; jsize2; j++) { // Correct mat_S. n=0; var_y=0; var_x=0; mean_y=0; mean_x=0; cov_xy=0; for (size_t k=0; k0) { y=1/sqrt(y); mean_x+=x; mean_y+=y; var_x+=x*x; var_y+=y*y; cov_xy+=x*y; n++; } } cout<=5) { mean_x/=n; mean_y/=n; var_x/=n; var_y/=n; cov_xy/=n; var_x-=mean_x*mean_x; var_y-=mean_y*mean_y; cov_xy-=mean_x*mean_y; b=cov_xy/var_x; a=mean_y-b*mean_x; crt_factor=a/(b*(bin_size+0.5))+1; if (i==j) { mat_S[i][j]*=crt_factor; } else { mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor; } cout<size1; i++) { d1=gsl_vector_get(qvar_vec, i)+2*vec_qvar[i]; gsl_vector_set(qvar_vec, i, d1); for (size_t j=0; jsize2; j++) { if (i==j) { gsl_matrix_set(S_mat, i, j, mat_S[i][i]); gsl_matrix_set(Svar_mat, i, j, 2.0*mat_Svar[i][i]*ns_test*ns_test/(2.0*ns_pair) ); } else { gsl_matrix_set(S_mat, i, j, mat_S[i][j]+mat_S[j][i]); gsl_matrix_set(Svar_mat, i, j, 2.0*(mat_Svar[i][j]+mat_Svar[j][i])* ns_test*ns_test/(2.0*ns_pair) ); } } } infile.clear(); infile.close(); return; } // Use the new method to calculate variance components with summary // statistics first, use a function CalcS to compute S matrix (where // the diagonal elements are part of V(q) ), and then use bootstrap to // compute the variance for S, use a set of genotypes, phenotypes, and // individual ids, and snp category label. void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat, const gsl_vector *q_vec, const gsl_vector *s_vec, const double df, vector &v_pve, vector &v_se_pve, double &pve_total, double &se_pve_total, vector &v_sigma2, vector &v_se_sigma2, vector &v_enrich, vector &v_se_enrich) { size_t n_vc=S_mat->size1; gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_vector *pve=gsl_vector_alloc (n_vc); gsl_vector *pve_plus=gsl_vector_alloc (n_vc+1); gsl_vector *tmp=gsl_vector_alloc (n_vc+1); gsl_vector *sigma2persnp=gsl_vector_alloc (n_vc); gsl_vector *enrich=gsl_vector_alloc (n_vc); gsl_vector *se_pve=gsl_vector_alloc (n_vc); gsl_vector *se_sigma2persnp=gsl_vector_alloc (n_vc); gsl_vector *se_enrich=gsl_vector_alloc (n_vc); double d; // Calculate S^{-1}q. gsl_matrix_memcpy (tmp_mat, S_mat); int sig; gsl_permutation * pmt=gsl_permutation_alloc (n_vc); LUDecomp (tmp_mat, pmt, &sig); LUInvert (tmp_mat, pmt, Si_mat); // Calculate sigma2snp and pve. gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); gsl_vector_memcpy(sigma2persnp, pve); gsl_vector_div(sigma2persnp, s_vec); // Get qvar_mat. gsl_matrix_memcpy (qvar_mat, Vq); gsl_matrix_scale (qvar_mat, 1.0/(df*df)); // Calculate variance for these estimates. for (size_t i=0; isize1, n2=K->size2; size_t n_vc=n2/n1; double r=(double)n1/(double)(n1 - W->size2); double var_y, var_y_new; double d, tr, s, v; vector traceG_new; // New matrices/vectors. gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2); gsl_vector *y_scale=gsl_vector_alloc (n1); gsl_matrix *Kry=gsl_matrix_alloc (n1, n_vc); gsl_matrix *yKrKKry=gsl_matrix_alloc (n_vc, n_vc*(n_vc+1) ); gsl_vector *KKry=gsl_vector_alloc (n1); // Old matrices/vectors. gsl_vector *pve=gsl_vector_alloc (n_vc); gsl_vector *se_pve=gsl_vector_alloc (n_vc); gsl_vector *q_vec=gsl_vector_alloc (n_vc); gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); // Center and scale K by W. for (size_t i=0; i1) { cout<<"total pve = "<x, i))<<" "; } } cout<f, 1e-3); } while (status==GSL_CONTINUE && iterx, ¶ms, dev1, dev2); gsl_permutation * pmt=gsl_permutation_alloc (n_vc+1); LUDecomp (dev2, pmt, &sig); LUInvert (dev2, pmt, Hessian); gsl_permutation_free(pmt); // Save sigma2 and se_sigma2. v_sigma2.clear(); v_se_sigma2.clear(); for (size_t i=0; ix, i); } else { d=exp(gsl_vector_get(s_fdf->x, i)); } v_sigma2.push_back(d); if (noconstrain) { d=-1.0*gsl_matrix_get(Hessian, i, i); } else { d=-1.0*d*d*gsl_matrix_get(Hessian, i, i); } v_se_sigma2.push_back(sqrt(d)); } s=0; for (size_t i=0; ix, i); d1=1; } else { d1=exp(gsl_vector_get(s_fdf->x, i)); } if (kx, j); d2=1; } else { d2=exp(gsl_vector_get(s_fdf->x, j)); } if (ksize1, n2=K->size2; size_t n_vc=n2/n1; double d, y2_sum, tau_inv, se_tau_inv; // New matrices/vectors. gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2); gsl_vector *y_scale=gsl_vector_alloc (n1); gsl_vector *y2=gsl_vector_alloc (n1); gsl_vector *n1_vec=gsl_vector_alloc (n1); gsl_matrix *Ay=gsl_matrix_alloc (n1, n_vc); gsl_matrix *K2=gsl_matrix_alloc (n1, n_vc*n_vc); gsl_matrix *K_tmp=gsl_matrix_alloc (n1, n1); gsl_matrix *V_mat=gsl_matrix_alloc (n1, n1); // Old matrices/vectors. gsl_vector *pve=gsl_vector_alloc (n_vc); gsl_vector *se_pve=gsl_vector_alloc (n_vc); gsl_vector *q_vec=gsl_vector_alloc (n_vc); gsl_matrix *S1=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *S2=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *J_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); int sig; gsl_permutation * pmt=gsl_permutation_alloc (n_vc); // Center and scale K by W, and standardize K further so that all // diagonal elements are 1 for (size_t i=0; isize; i++) { y2_sum+=gsl_vector_get(y2, i); } // Compute the n_vc size q vector. for (size_t i=0; isize1; t++) { gsl_vector_view Ktmp_col=gsl_matrix_column (K_tmp, t); gsl_vector_add (n1_vec, &Ktmp_col.vector); } gsl_vector_add_constant (n1_vec, -1.0); // Compute S1. gsl_blas_ddot (n1_vec, y2, &d); gsl_matrix_set (S1, i, j, 2*d); if (i!=j) {gsl_matrix_set (S1, j, i, 2*d);} // Compute S2. d=0; for (size_t t=0; tsize; t++) { d+=gsl_vector_get (n1_vec, t); } gsl_matrix_set (S2, i, j, d); if (i!=j) {gsl_matrix_set (S2, j, i, d);} // Save information to compute J. gsl_vector_view K2col1=gsl_matrix_column (K2, n_vc*i+j); gsl_vector_view K2col2=gsl_matrix_column (K2, n_vc*j+i); gsl_vector_memcpy(&K2col1.vector, n1_vec); if (i!=j) {gsl_vector_memcpy(&K2col2.vector, n1_vec);} } } // Iterate to solve tau and h's. size_t it=0; double s=1; while (abs(s)>1e-3 && it<100) { // Update tau_inv. gsl_blas_ddot (q_vec, pve, &d); if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;} tau_inv=y2_sum/(double)n1-d/((double)n1*((double)n1-1)); if (it>0) {s/=tau_inv;} // Update S. gsl_matrix_memcpy (S_mat, S2); gsl_matrix_scale (S_mat, -1*tau_inv); gsl_matrix_add (S_mat, S1); // Update h=S^{-1}q. int sig; gsl_permutation * pmt=gsl_permutation_alloc (n_vc); LUDecomp (S_mat, pmt, &sig); LUInvert (S_mat, pmt, Si_mat); gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); it++; } // Compute V matrix and A matrix (K_scale is destroyed, so need to // compute V first). gsl_matrix_set_zero (V_mat); for (size_t i=0; i1) { cout<<"total pve = "<size1; gsl_vector *geno=gsl_vector_alloc (ni_test); gsl_vector *geno_miss=gsl_vector_alloc (ni_test); gsl_vector *wz=gsl_vector_alloc (w->size); gsl_vector_memcpy (wz, z); gsl_vector_mul(wz, w); for (size_t t=0; t &indicator_idv, vector &indicator_snp, const vector &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) { ifstream infile (file_bed.c_str(), ios::binary); if (!infile) { cout<<"error reading bed file:"< b; size_t n_miss, ci_total, ci_test; double d, geno_mean, geno_var; size_t ni_test=XWz->size1; size_t ni_total=indicator_idv.size(); gsl_vector *geno=gsl_vector_alloc (ni_test); gsl_vector *wz=gsl_vector_alloc (w->size); gsl_vector_memcpy (wz, z); gsl_vector_mul(wz, w); int n_bit; // Calculate n_bit and c, the number of bit for each snp. if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1; } // Print the first three magic numbers. for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; } for (size_t t=0; t &indicator_idv, vector > &mindicator_snp, const vector &vec_cat, const gsl_vector *w, const gsl_vector *z, gsl_matrix *XWz) { gsl_matrix_set_zero(XWz); igzstream infile (file_mfile.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open mfile file: "< &indicator_idv, vector &indicator_snp, const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { igzstream infile (file_geno.c_str(), igzstream::in); if (!infile) { cout<<"error reading genotype file:"<size1; gsl_vector *geno=gsl_vector_alloc (ni_test); gsl_vector *geno_miss=gsl_vector_alloc (ni_test); for (size_t t=0; tsize2; i++) { gsl_vector_const_view XWz_col= gsl_matrix_const_column(XWz, i); gsl_blas_ddot (geno, &XWz_col.vector, &d); gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var)); } ns_test++; } cout< &indicator_idv, vector &indicator_snp, const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) { ifstream infile (file_bed.c_str(), ios::binary); if (!infile) { cout<<"error reading bed file:"< b; size_t n_miss, ci_total, ci_test; double d, geno_mean, geno_var; size_t ni_test=XWz->size1; size_t ni_total=indicator_idv.size(); gsl_vector *geno=gsl_vector_alloc (ni_test); int n_bit; // Calculate n_bit and c, the number of bit for each snp. if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1; } // Print the first three magic numbers. for (int i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; } for (size_t t=0; tsize2; i++) { gsl_vector_const_view XWz_col= gsl_matrix_const_column(XWz, i); gsl_blas_ddot (geno, &XWz_col.vector, &d); gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var)); } ns_test++; } cout< &indicator_idv, vector > &mindicator_snp, const gsl_matrix *XWz, gsl_matrix *XtXWz) { gsl_matrix_set_zero(XtXWz); igzstream infile (file_mfile.c_str(), igzstream::in); if (!infile) { cout<<"error! fail to open mfile file: "< &vec_cat, const vector &v_pve, vector &v_se_pve, double &pve_total, double &se_pve_total, vector &v_sigma2, vector &v_se_sigma2, vector &v_enrich, vector &v_se_enrich) { size_t n_vc=XWz->size2, ns_test=w->size, ni_test=XWz->size1; // Set up matrices. gsl_vector *w_pve=gsl_vector_alloc (ns_test); gsl_vector *wz=gsl_vector_alloc (ns_test); gsl_vector *zwz=gsl_vector_alloc (n_vc); gsl_vector *zz=gsl_vector_alloc (n_vc); gsl_vector *Xz_pve=gsl_vector_alloc (ni_test); gsl_vector *WXtXWz=gsl_vector_alloc (ns_test); gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc); gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); double d, s0, s1, s, s_pve, s_snp; // Compute wz and zwz. gsl_vector_memcpy (wz, z); gsl_vector_mul (wz, w); gsl_vector_set_zero (zwz); gsl_vector_set_zero (zz); for (size_t i=0; isize; i++) { d=gsl_vector_get (wz, i)*gsl_vector_get (z, i); d+=gsl_vector_get (zwz, vec_cat[i]); gsl_vector_set (zwz, vec_cat[i], d); d=gsl_vector_get (z, i)*gsl_vector_get (z, i); d+=gsl_vector_get (zz, vec_cat[i]); gsl_vector_set (zz, vec_cat[i], d); } // Compute wz, ve and Xz_pve. gsl_vector_set_zero (Xz_pve); s_pve=0; s_snp=0; for (size_t i=0; isize; i++) { d=v_pve[vec_cat[i]]/gsl_vector_get(s_vec, vec_cat[i]); gsl_vector_set (w_pve, i, d); } // Compute Vq (in qvar_mat). s0=1-s_pve; for (size_t i=0; i