From 943e970c9cbc184dcca679fbe455f48c32242cdc Mon Sep 17 00:00:00 2001 From: xiangzhou Date: Mon, 23 May 2016 17:05:35 -0400 Subject: version 0.95alpha --- src/vc.cpp | 2376 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 2216 insertions(+), 160 deletions(-) (limited to 'src/vc.cpp') diff --git a/src/vc.cpp b/src/vc.cpp index 77cf746..94bf931 100644 --- a/src/vc.cpp +++ b/src/vc.cpp @@ -1,17 +1,17 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011 Xiang Zhou - + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see . */ @@ -26,8 +26,12 @@ #include #include #include -#include +#include #include +#include +#include +#include +#include #include #include "gsl/gsl_vector.h" @@ -39,9 +43,14 @@ #include "gsl/gsl_multiroots.h" #include "gsl/gsl_min.h" +#include "Eigen/Dense" + +#include "param.h" #include "io.h" #include "lapack.h" +#include "eigenlib.h" #include "gzstream.h" +#include "mathfunc.h" #ifdef FORCE_FLOAT #include "lmm_float.h" @@ -54,95 +63,194 @@ using namespace std; - +using namespace Eigen; //in this file, X, Y are already transformed (i.e. UtX and UtY) -void VC::CopyFromParam (PARAM &cPar) -{ - file_out=cPar.file_out; - - // v_sigma2=cPar.v_sigma2; - - time_UtX=0.0; - time_opt=0.0; +void VC::CopyFromParam (PARAM &cPar) +{ + a_mode=cPar.a_mode; + + file_cat=cPar.file_cat; + file_beta=cPar.file_beta; + file_cor=cPar.file_cor; - v_traceG=cPar.v_traceG; - - return; + setSnps=cPar.setSnps; + + file_out=cPar.file_out; + path_out=cPar.path_out; + + //v_sigma2=cPar.v_sigma2; + + time_UtX=0.0; + time_opt=0.0; + + v_traceG=cPar.v_traceG; + + ni_total=cPar.ni_total; + ns_total=cPar.ns_total; + ns_test=cPar.ns_test; + + crt=cPar.crt; + window_cm=cPar.window_cm; + window_bp=cPar.window_bp; + window_ns=cPar.window_ns; + + n_vc=cPar.n_vc; + + return; } -void VC::CopyToParam (PARAM &cPar) +void VC::CopyToParam (PARAM &cPar) { cPar.time_UtX=time_UtX; - cPar.time_opt=time_opt; - - cPar.v_sigma2=v_sigma2; - cPar.v_se_sigma2=v_se_sigma2; + cPar.time_opt=time_opt; + cPar.v_pve=v_pve; cPar.v_se_pve=v_se_pve; + cPar.v_sigma2=v_sigma2; + cPar.v_se_sigma2=v_se_sigma2; + cPar.pve_total=pve_total; + cPar.se_pve_total=se_pve_total; cPar.v_traceG=v_traceG; - + cPar.v_beta=v_beta; cPar.v_se_beta=v_se_beta; - + + cPar.ni_total=ni_total; + cPar.ns_total=ns_total; + cPar.ns_test=ns_test; + + cPar.n_vc=n_vc; + + return; +} + + + +void VC::WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec, const gsl_vector *qvar_vec, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat) +{ + string file_str; + file_str=path_out+"/"+file_out; + file_str+=".qvec.txt"; + + ofstream outfile_q (file_str.c_str(), ofstream::out); + if (!outfile_q) {cout<<"error writing file: "<size; i++) { + outfile_q<size; i++) { + outfile_q<size; i++) { + outfile_q<size1; i++) { + for (size_t j=0; jsize2; j++) { + outfile_s<size1; i++) { + for (size_t j=0; jsize2; j++) { + outfile_s<K)->size1, n_vc=log_sigma2->size-1, n_cvt=(p->W)->size2; - + gsl_matrix *K_temp=gsl_matrix_alloc(n1, n1); gsl_matrix *HiW=gsl_matrix_alloc(n1, n_cvt); gsl_matrix *WtHiW=gsl_matrix_alloc(n_cvt, n_cvt); gsl_matrix *WtHiWi=gsl_matrix_alloc(n_cvt, n_cvt); gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1); - double sigma2; + double sigma2; //calculate H=\sum_i^{k+1} \sigma_i^2 K_i gsl_matrix_set_zero (p->P); for (size_t i=0; iK, 0, n1*i, n1, n1); gsl_matrix_memcpy (K_temp, &K_sub.matrix); } - sigma2=exp(gsl_vector_get (log_sigma2, i) ); + //when unconstrained, update on sigma2 instead of log_sigma2 + if (p->noconstrain) { + sigma2=gsl_vector_get (log_sigma2, i); + } else { + sigma2=exp(gsl_vector_get (log_sigma2, i) ); + } gsl_matrix_scale(K_temp, sigma2); gsl_matrix_add (p->P, K_temp); } //calculate H^{-1} + /* int sig; gsl_permutation * pmt1=gsl_permutation_alloc (n1); - LUDecomp (p->P, pmt1, &sig); + LUDecomp (p->P, pmt1, &sig); LUInvert (p->P, pmt1, K_temp); gsl_permutation_free(pmt1); gsl_matrix_memcpy (p->P, K_temp); + */ + eigenlib_invert(p->P); //calculate P=H^{-1}-H^{-1}W(W^TH^{-1}W)^{-1}W^TH^{-1} - gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, p->P, p->W, 0.0, HiW); - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, p->W, HiW, 0.0, WtHiW); + //gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, p->P, p->W, 0.0, HiW); + //gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, p->W, HiW, 0.0, WtHiW); + + eigenlib_dgemm ("N", "N", 1.0, p->P, p->W, 0.0, HiW); + eigenlib_dgemm ("T", "N", 1.0, p->W, HiW, 0.0, WtHiW); - gsl_permutation * pmt2=gsl_permutation_alloc (n_cvt); - LUDecomp (WtHiW, pmt2, &sig); - LUInvert (WtHiW, pmt2, WtHiWi); - gsl_permutation_free(pmt2); + //gsl_permutation * pmt2=gsl_permutation_alloc (n_cvt); + //LUDecomp (WtHiW, pmt2, &sig); + //LUInvert (WtHiW, pmt2, WtHiWi); + //gsl_permutation_free(pmt2); + eigenlib_invert(WtHiW); + gsl_matrix_memcpy(WtHiWi, WtHiW); + + //gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi); + //gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, -1.0, HiW, WtHiWiWtHi, 1.0, p->P); + eigenlib_dgemm ("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi); + eigenlib_dgemm ("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P); - gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi); - gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, -1.0, HiW, WtHiWiWtHi, 1.0, p->P); - //calculate Py, KPy, PKPy - gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py); + gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py); + //eigenlib_dgemv("N", 1.0, p->P, p->y, 0.0, p->Py); + double d; for (size_t i=0; iKPy_mat, i); gsl_vector_view PKPy=gsl_matrix_column (p->PKPy_mat, i); @@ -150,11 +258,22 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) if (i==n_vc) { gsl_vector_memcpy (&KPy.vector, p->Py); } else { - gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1); + gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1); + //seems to be important to use gsl dgemv here instead of eigenlib_dgemv; otherwise gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector); + //eigenlib_dgemv("N", 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector); } - + gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector); + //eigenlib_dgemv("N", 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector); + + //when phenotypes are not normalized well, then some values in the following matrix maybe nan; change that to 0; this seems to only happen when eigenlib_dgemv was used above + for (size_t j=0; jKPy_mat->size1; j++) { + d=gsl_matrix_get (p->KPy_mat, j, i); + if (std::isnan(d)) {gsl_matrix_set (p->KPy_mat, j, i, 0); cout<<"nan appears in "<PKPy_mat, j, i); + if (std::isnan(d)) {gsl_matrix_set (p->PKPy_mat, j, i, 0); cout<<"nan appears in "<K)->size1, n_vc=log_sigma2->size-1; - + double tr, d; //update parameters @@ -199,8 +318,12 @@ int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1) gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i); gsl_blas_ddot(p->Py, &KPy_i.vector, &d); - d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i)); - + if (p->noconstrain) { + d=(-0.5*tr+0.5*d); + } else { + d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i)); + } + gsl_vector_set(dev1, i, d); } @@ -214,32 +337,47 @@ int LogRL_dev2 (const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2) VC_PARAM *p=(VC_PARAM *) params; size_t n_vc=log_sigma2->size-1; - + double d, sigma2_i, sigma2_j; //update parameters UpdateParam (log_sigma2, p); - + //calculate dev2=0.5(yPKPKPy) for (size_t i=0; iKPy_mat, i); - sigma2_i=exp(gsl_vector_get(log_sigma2, i)); + if (p->noconstrain) { + sigma2_i=gsl_vector_get(log_sigma2, i); + } else { + sigma2_i=exp(gsl_vector_get(log_sigma2, i)); + } for (size_t j=i; jPKPy_mat, j); gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d); - sigma2_j=exp(gsl_vector_get(log_sigma2, j)); - - d*=-0.5*sigma2_i*sigma2_j; + if (p->noconstrain) { + sigma2_j=gsl_vector_get(log_sigma2, j); + d*=-0.5; + } else { + sigma2_j=exp(gsl_vector_get(log_sigma2, j)); + d*=-0.5*sigma2_i*sigma2_j; + } gsl_matrix_set(dev2, i, j, d); if (j!=i) {gsl_matrix_set(dev2, j, i, d);} - } + } } gsl_matrix_memcpy (p->Hessian, dev2); - + /* + for (size_t i=0; isize1; i++) { + for (size_t j=0; jsize2; j++) { + cout<K)->size1, n_vc=log_sigma2->size-1; - + double tr, d, sigma2_i, sigma2_j; //update parameters UpdateParam (log_sigma2, p); - //calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy - //calculate dev2=0.5(yPKPKPy) + //calculate dev1=(-0.5*trace(PK_i)+0.5*yPK_iPy)*sigma2_i + //calculate dev2=0.5(yPK_iPK_jPy)*sigma2_i*sigma2_j for (size_t i=0; iKPy_mat, i); gsl_blas_ddot(p->Py, &KPy_i.vector, &d); - sigma2_i=exp(gsl_vector_get(log_sigma2, i)); - d=(-0.5*tr+0.5*d)*sigma2_i; - + if (p->noconstrain) { + sigma2_i=gsl_vector_get(log_sigma2, i); + d=(-0.5*tr+0.5*d); + } else { + sigma2_i=exp(gsl_vector_get(log_sigma2, i)); + d=(-0.5*tr+0.5*d)*sigma2_i; + } + gsl_vector_set(dev1, i, d); - + for (size_t j=i; jPKPy_mat, j); gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d); - sigma2_j=exp(gsl_vector_get(log_sigma2, j)); - d*=-0.5*sigma2_i*sigma2_j; + if (p->noconstrain) { + sigma2_j=gsl_vector_get(log_sigma2, j); + d*=-0.5; + } else { + sigma2_j=exp(gsl_vector_get(log_sigma2, j)); + d*=-0.5*sigma2_i*sigma2_j; + } gsl_matrix_set(dev2, i, j, d); if (j!=i) {gsl_matrix_set(dev2, j, i, d);} - } + } } @@ -303,141 +451,2049 @@ int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, g -void VC::CalcVCreml (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y) + +//read header to determine which column contains which item +bool ReadHeader (const string &line, HEADER &header) { - size_t n1=K->size1, n2=K->size2; - size_t n_vc=n2/n1; - gsl_vector *log_sigma2=gsl_vector_alloc (n_vc+1); - double d, s; + string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID"}; + set rs_set(rs_ptr, rs_ptr+10); + string chr_ptr[]={"chr","CHR"}; + set chr_set(chr_ptr, chr_ptr+2); + string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", "bp", "BP"}; + set pos_set(pos_ptr, pos_ptr+8); + string cm_ptr[]={"cm","CM"}; + set cm_set(cm_ptr, cm_ptr+2); + string a1_ptr[]={"a1","A1","allele1","ALLELE1"}; + set a1_set(a1_ptr, a1_ptr+4); + string a0_ptr[]={"a0","A0","allele0","ALLELE0"}; + set a0_set(a0_ptr, a0_ptr+4); + + string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"}; + set z_set(z_ptr, z_ptr+6); + string beta_ptr[]={"beta","BETA","b","B"}; + set beta_set(beta_ptr, beta_ptr+4); + string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"}; + set sebeta_set(sebeta_ptr, sebeta_ptr+4); + string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"}; + set chisq_set(chisq_ptr, chisq_ptr+4); + string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"}; + set p_set(p_ptr, p_ptr+6); + + string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"}; + set n_set(n_ptr, n_ptr+6); + string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"}; + set nmis_set(nmis_ptr, nmis_ptr+6); + string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"}; + set nobs_set(nobs_ptr, nobs_ptr+4); + + string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq","ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"}; + set af_set(af_ptr, af_ptr+10); + string var_ptr[]={"var","VAR"}; + set var_set(var_ptr, var_ptr+2); + + string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"}; + set ws_set(ws_ptr, ws_ptr+4); + string cor_ptr[]={"cor","COR","r","R"}; + set cor_set(cor_ptr, cor_ptr+4); + + header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; header.cor_col=0; header.coln=0; + + char *ch_ptr; + string type; + size_t n_error=0; + + ch_ptr=strtok ((char *)line.c_str(), " , \t"); + while (ch_ptr!=NULL) { + type=ch_ptr; + if (rs_set.count(type)!=0) { + if (header.rs_col==0) {header.rs_col=header.coln+1;} else {cout<<"error! more than two rs columns in the file."< &setSnps, vector &vec_rs, vector &vec_n, vector &vec_cm, vector &vec_bp, map &mapRS2in, map &mapRS2var) +{ + vec_rs.clear(); + vec_n.clear(); + mapRS2in.clear(); + mapRS2var.clear(); + + igzstream infile (file_cor.c_str(), igzstream::in); + if (!infile) {cout<<"error! fail to open cov file: "<0) {vec_cm.push_back(d_cm);} else {vec_cm.push_back(d_cm);} + if (d_pos>0) {vec_bp.push_back(d_pos);} else {vec_bp.push_back(d_pos);} + + //record mapRS2in and mapRS2var + if (setSnps.size()==0 || setSnps.count(rs)!=0) { + if (mapRS2in.count(rs)==0) { + mapRS2in[rs]=1; + + if (header.var_col!=0) { + mapRS2var[rs]=var_x; + } else if (header.af_col!=0) { + var_x=2.0*af*(1.0-af); + mapRS2var[rs]=var_x; + } else {} + + ns_test++; + + } else { + cout<<"error! more than one snp has the same id "< &mapRS2cat, map &mapRS2in, map &mapRS2var, map &mapRS2nsamp, gsl_vector *q_vec, gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total, size_t &ns_total) +{ + mapRS2nsamp.clear(); + + igzstream infile (file_beta.c_str(), igzstream::in); + if (!infile) {cout<<"error! fail to open beta file: "< vec_q, vec_qvar, vec_s; + for (size_t i=0; isize; i++) { + vec_q.push_back(0.0); + vec_qvar.push_back(0.0); + vec_s.push_back(0.0); + } + + //read header + HEADER header; + !safeGetline(infile, line).eof(); + ReadHeader (line, header); + + if (header.n_col==0 ) { + if (header.nobs_col==0 && header.nmis_col==0) { + cout<<"error! missing sample size in the beta file."<f, 1e-3); + if (header.n_col==0) { + n_total=n_mis+n_obs; + } + + //both z values and beta/se_beta have directions, while chisq/pvalue do not + if (header.z_col!=0) { + zsquare=z*z; + } else if (header.beta_col!=0 && header.sebeta_col!=0) { + z=beta/se_beta; + zsquare=z*z; + } else if (header.chisq_col!=0) { + zsquare=chisq; + } else if (header.p_col!=0) { + zsquare=gsl_cdf_chisq_Qinv (pvalue, 1); + } else {zsquare=0;} + + //if the snp is also present in cor file, then do calculations + if ((header.var_col!=0 || header.af_col!=0 || mapRS2var.count(rs)!=0) && mapRS2in.count(rs)!=0 && (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) { + if (mapRS2in.at(rs)>1) { + cout<<"error! more than one snp has the same id "<f, ¶ms, dev1, dev2); + for (size_t i=0; isize; i++) { + gsl_vector_set(q_vec, i, vec_q[i]); + gsl_vector_set(qvar_vec, i, 2.0*vec_qvar[i]); + gsl_vector_set(s_vec, i, vec_s[i]); + } - gsl_permutation * pmt=gsl_permutation_alloc (n_vc+1); - LUDecomp (dev2, pmt, &sig); - LUInvert (dev2, pmt, Hessian); - gsl_permutation_free(pmt); - //save data - v_sigma2.clear(); - for (size_t i=0; ix, i)); - v_sigma2.push_back(d); + infile.clear(); + infile.close(); + + return; +} + + + + + +//read covariance file the second time +//look for rs, n_mis+n_obs, var, window_size, cov +//if window_cm/bp/ns is provided, then use these max values to calibrate estimates +void ReadFile_cor (const string &file_cor, const vector &vec_rs, const vector &vec_n, const vector &vec_cm, const vector &vec_bp, const map &mapRS2cat, const map &mapRS2in, const map &mapRS2var, const map &mapRS2nsamp, const size_t crt, const double &window_cm, const double &window_bp, const double &window_ns, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total, size_t &ns_total, size_t &ns_test, size_t &ns_pair) +{ + igzstream infile (file_cor.c_str(), igzstream::in); + if (!infile) {cout<<"error! fail to open cov file: "< > mat_S, mat_Svar, mat_tmp; + vector vec_qvar, vec_tmp; + vector > > mat3d_Sbin; + + for (size_t i=0; isize1; i++) { + vec_qvar.push_back(0.0); } - v_se_sigma2.clear(); - for (size_t i=0; isize1; i++) { + mat_S.push_back(vec_qvar); + mat_Svar.push_back(vec_qvar); } - s=0; - for (size_t i=0; isize1; i++) { + mat_tmp.push_back(vec_tmp); + } + for (size_t i=0; isize1; i++) { + mat3d_Sbin.push_back(mat_tmp); } - v_se_pve.clear(); - for (size_t i=0; isize1; i++) { + for (size_t j=i; jsize2; j++) { + + //correct mat_S + n=0; var_y=0; var_x=0; mean_y=0; mean_x=0; cov_xy=0; + for (size_t k=0; k0) { + y=1/sqrt(y); + mean_x+=x; mean_y+=y; var_x+=x*x; var_y+=y*y; cov_xy+=x*y; + n++; + } + } + cout<=5) { + mean_x/=n; mean_y/=n; var_x/=n; var_y/=n; cov_xy/=n; + var_x-=mean_x*mean_x; var_y-=mean_y*mean_y; cov_xy-=mean_x*mean_y; + b=cov_xy/var_x; + a=mean_y-b*mean_x; + crt_factor=a/(b*(bin_size+0.5))+1; + if (i==j) { + mat_S[i][j]*=crt_factor; + } else { + mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor; + } + cout<size1; i++) { + d1=gsl_vector_get(qvar_vec, i)+2*vec_qvar[i]; + gsl_vector_set(qvar_vec, i, d1); + for (size_t j=0; jsize2; j++) { + if (i==j) { + gsl_matrix_set(S_mat, i, j, mat_S[i][i]); + gsl_matrix_set(Svar_mat, i, j, 2.0*mat_Svar[i][i]*ns_test*ns_test/(2.0*ns_pair) ); + } else { + gsl_matrix_set(S_mat, i, j, mat_S[i][j]+mat_S[j][i]); + gsl_matrix_set(Svar_mat, i, j, 2.0*(mat_Svar[i][j]+mat_Svar[j][i])*ns_test*ns_test/(2.0*ns_pair) ); + } + } + } + + + + infile.clear(); + infile.close(); return; } - +//copied from lmm.cpp; is used in the following function VCss +//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1 +size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) { + if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<a) {l=a; h=b;} else {l=b; h=a;} + + size_t n=n_cvt+2; + index=(2*n-l+2)*(l-1)/2+h-l; + + return index; +} + + +//use the new method to calculate variance components with summary statistics +//first, use a function CalcS to compute S matrix (where the diagonal elements are part of V(q) ), and then use bootstrap to compute the variance for S, use a set of genotypes, phenotypes, and individual ids, and snp category label +void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat, const gsl_vector *q_vec, const gsl_vector *s_vec, const double df, vector &v_pve, vector &v_se_pve, double &pve_total, double &se_pve_total, vector &v_sigma2, vector &v_se_sigma2, vector &v_enrich, vector &v_se_enrich) { + size_t n_vc=S_mat->size1; + + gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); + + gsl_vector *pve=gsl_vector_alloc (n_vc); + gsl_vector *pve_plus=gsl_vector_alloc (n_vc+1); + gsl_vector *tmp=gsl_vector_alloc (n_vc+1); + gsl_vector *sigma2persnp=gsl_vector_alloc (n_vc); + gsl_vector *enrich=gsl_vector_alloc (n_vc); + gsl_vector *se_pve=gsl_vector_alloc (n_vc); + gsl_vector *se_sigma2persnp=gsl_vector_alloc (n_vc); + gsl_vector *se_enrich=gsl_vector_alloc (n_vc); + + double d; + + //calculate S^{-1}q + gsl_matrix_memcpy (tmp_mat, S_mat); + int sig; + gsl_permutation * pmt=gsl_permutation_alloc (n_vc); + LUDecomp (tmp_mat, pmt, &sig); + LUInvert (tmp_mat, pmt, Si_mat); + + //calculate sigma2snp and pve + gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); + gsl_vector_memcpy(sigma2persnp, pve); + gsl_vector_div(sigma2persnp, s_vec); + + //get qvar_mat + /* + if (n_block==0 || n_block==1) { + double s=1.0; + for (size_t i=0; isize1, n2=K->size2; + size_t n_vc=n2/n1; + + double r=(double)n1/(double)(n1 - W->size2); + double var_y, var_y_new; + double d, tr, s, v; + vector traceG_new; + + //new matrices/vectors + gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2); + gsl_vector *y_scale=gsl_vector_alloc (n1); + gsl_matrix *Kry=gsl_matrix_alloc (n1, n_vc); + gsl_matrix *yKrKKry=gsl_matrix_alloc (n_vc, n_vc*(n_vc+1) ); + gsl_vector *KKry=gsl_vector_alloc (n1); + + //old matrices/vectors + gsl_vector *pve=gsl_vector_alloc (n_vc); + gsl_vector *se_pve=gsl_vector_alloc (n_vc); + gsl_vector *q_vec=gsl_vector_alloc (n_vc); + gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); + + //center and scale K by W + for (size_t i=0; i1) { + cout<<"total pve = "<size1); + time_start=clock(); + for (size_t i=0; i<1000; i++) { + gsl_blas_dgemv(CblasNoTrans, 1.0, K2, &W_col.vector, 0.0, v); + } + cout<<"standard time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<size1, K->size1); + gsl_matrix *K3=gsl_matrix_alloc(K->size1, K->size1); + + gsl_matrix_memcpy(K2copy, K2); + time_start=clock(); + EigenDecomp(K2copy, K3, v, 0); + cout<<"standard time 0: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<size1); + LUDecomp (K2copy, pmt1, &sigcopy); + LUInvert (K2copy, pmt1, K3); + gsl_permutation_free(pmt1); + cout<<"standard time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<x, i))<<" "; + } + } + cout<f, 1e-3); + } + while (status==GSL_CONTINUE && iterx, ¶ms, dev1, dev2); + /* + for (size_t i=0; isize1; i++) { + for (size_t j=0; jsize2; j++) { + cout<x, i); + } else { + d=exp(gsl_vector_get(s_fdf->x, i)); + } + v_sigma2.push_back(d); + + if (noconstrain) { + d=-1.0*gsl_matrix_get(Hessian, i, i); + } else { + d=-1.0*d*d*gsl_matrix_get(Hessian, i, i); + } + v_se_sigma2.push_back(sqrt(d)); + } + + s=0; + for (size_t i=0; ix, i); + d1=1; + } else { + d1=exp(gsl_vector_get(s_fdf->x, i)); + } + + if (kx, j); + d2=1; + } else { + d2=exp(gsl_vector_get(s_fdf->x, j)); + } + + if (k &indicator_idv, vector &indicator_snp, const vector &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) +{ + igzstream infile (file_geno.c_str(), igzstream::in); + //ifstream infile (file_geno.c_str(), ifstream::in); + if (!infile) {cout<<"error reading genotype file:"<size1; + gsl_vector *geno=gsl_vector_alloc (ni_test); + gsl_vector *geno_miss=gsl_vector_alloc (ni_test); + gsl_vector *wz=gsl_vector_alloc (w->size); + gsl_vector_memcpy (wz, z); + gsl_vector_mul(wz, w); + + for (size_t t=0; t &indicator_idv, vector &indicator_snp, const vector &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz) +{ + ifstream infile (file_bed.c_str(), ios::binary); + if (!infile) {cout<<"error reading bed file:"< b; + + size_t n_miss, ci_total, ci_test; + double d, geno_mean, geno_var; + + size_t ni_test=XWz->size1; + size_t ni_total=indicator_idv.size(); + gsl_vector *geno=gsl_vector_alloc (ni_test); + gsl_vector *wz=gsl_vector_alloc (w->size); + gsl_vector_memcpy (wz, z); + gsl_vector_mul(wz, w); + + int n_bit; + //calculate n_bit and c, the number of bit for each snp + if (ni_total%4==0) {n_bit=ni_total/4;} + else {n_bit=ni_total/4+1; } + + //print the first three majic numbers + for (int i=0; i<3; ++i) { + infile.read(ch,1); + b=ch[0]; + } + + for (size_t t=0; t &indicator_idv, vector > &mindicator_snp, const vector &vec_cat, const gsl_vector *w, const gsl_vector *z, gsl_matrix *XWz) +{ + gsl_matrix_set_zero(XWz); + + igzstream infile (file_mfile.c_str(), igzstream::in); + if (!infile) {cout<<"error! fail to open mfile file: "< &indicator_idv, vector &indicator_snp, const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) +{ + igzstream infile (file_geno.c_str(), igzstream::in); + //ifstream infile (file_geno.c_str(), ifstream::in); + if (!infile) {cout<<"error reading genotype file:"<size1; + gsl_vector *geno=gsl_vector_alloc (ni_test); + gsl_vector *geno_miss=gsl_vector_alloc (ni_test); + + for (size_t t=0; tsize2; i++) { + gsl_vector_const_view XWz_col=gsl_matrix_const_column(XWz, i); + gsl_blas_ddot (geno, &XWz_col.vector, &d); + gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var)); + } + + ns_test++; + } + + cout< &indicator_idv, vector &indicator_snp, const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz) +{ + ifstream infile (file_bed.c_str(), ios::binary); + if (!infile) {cout<<"error reading bed file:"< b; + + size_t n_miss, ci_total, ci_test; + double d, geno_mean, geno_var; + + size_t ni_test=XWz->size1; + size_t ni_total=indicator_idv.size(); + gsl_vector *geno=gsl_vector_alloc (ni_test); + + int n_bit; + + //calculate n_bit and c, the number of bit for each snp + if (ni_total%4==0) {n_bit=ni_total/4;} + else {n_bit=ni_total/4+1; } + + //print the first three majic numbers + for (int i=0; i<3; ++i) { + infile.read(ch,1); + b=ch[0]; + } + + for (size_t t=0; tsize2; i++) { + gsl_vector_const_view XWz_col=gsl_matrix_const_column(XWz, i); + gsl_blas_ddot (geno, &XWz_col.vector, &d); + gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var)); + } + + ns_test++; + } + cout< &indicator_idv, vector > &mindicator_snp, const gsl_matrix *XWz, gsl_matrix *XtXWz) +{ + gsl_matrix_set_zero(XtXWz); + + igzstream infile (file_mfile.c_str(), igzstream::in); + if (!infile) {cout<<"error! fail to open mfile file: "< &vec_cat, const vector &v_pve, vector &v_se_pve, double &pve_total, double &se_pve_total, vector &v_sigma2, vector &v_se_sigma2, vector &v_enrich, vector &v_se_enrich) { + size_t n_vc=XWz->size2, ns_test=w->size, ni_test=XWz->size1; + + //set up matrices + gsl_vector *w_pve=gsl_vector_alloc (ns_test); + gsl_vector *wz=gsl_vector_alloc (ns_test); + gsl_vector *zwz=gsl_vector_alloc (n_vc); + gsl_vector *zz=gsl_vector_alloc (n_vc); + gsl_vector *Xz_pve=gsl_vector_alloc (ni_test); + gsl_vector *WXtXWz=gsl_vector_alloc (ns_test); + + gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc); + + double d, s0, s1, s, s_pve, s_snp; + + //compute wz and zwz + gsl_vector_memcpy (wz, z); + gsl_vector_mul (wz, w); + + gsl_vector_set_zero (zwz); + gsl_vector_set_zero (zz); + for (size_t i=0; isize; i++) { + d=gsl_vector_get (wz, i)*gsl_vector_get (z, i); + d+=gsl_vector_get (zwz, vec_cat[i]); + gsl_vector_set (zwz, vec_cat[i], d); + + d=gsl_vector_get (z, i)*gsl_vector_get (z, i); + d+=gsl_vector_get (zz, vec_cat[i]); + gsl_vector_set (zz, vec_cat[i], d); + } + + //compute wz, ve and Xz_pve + gsl_vector_set_zero (Xz_pve); s_pve=0; s_snp=0; + for (size_t i=0; isize; i++) { + d=v_pve[vec_cat[i]]/gsl_vector_get(s_vec, vec_cat[i]); + gsl_vector_set (w_pve, i, d); + } + + //compute Vq (in qvar_mat) + s0=1-s_pve; + for (size_t i=0; i