From c18588b6d00650b9ce742229fdf1eca7133f58fc Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Thu, 4 May 2017 14:41:32 -0500 Subject: Local updates made by Xiang---shared via email on May 4, 2017, subject: gemma on expression data. --- src/vc.cpp | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 295 insertions(+), 5 deletions(-) (limited to 'src/vc.cpp') diff --git a/src/vc.cpp b/src/vc.cpp index f17a9e9..c0aa40d 100644 --- a/src/vc.cpp +++ b/src/vc.cpp @@ -453,7 +453,7 @@ int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, g //read header to determine which column contains which item -bool ReadHeader_vc (const string &line, HEADER &header) +bool ReadHeader (const string &line, HEADER &header) { string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID"}; set rs_set(rs_ptr, rs_ptr+10); @@ -586,7 +586,7 @@ void ReadFile_cor (const string &file_cor, const set &setSnps, vector &vec_rs, const v HEADER header; !safeGetline(infile, line).eof(); - ReadHeader_vc (line, header); + ReadHeader (line, header); while (!safeGetline(infile, line).eof()) { //do not read cor values this time; upto col_n-1 @@ -1059,6 +1059,25 @@ void ReadFile_cor (const string &file_cor, const vector &vec_rs, const v return; } + + + + +//copied from lmm.cpp; is used in the following function VCss +//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1 +size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) { + if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<a) {l=a; h=b;} else {l=b; h=a;} + + size_t n=n_cvt+2; + index=(2*n-l+2)*(l-1)/2+h-l; + + return index; +} + + //use the new method to calculate variance components with summary statistics //first, use a function CalcS to compute S matrix (where the diagonal elements are part of V(q) ), and then use bootstrap to compute the variance for S, use a set of genotypes, phenotypes, and individual ids, and snp category label void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat, const gsl_vector *q_vec, const gsl_vector *s_vec, const double df, vector &v_pve, vector &v_se_pve, double &pve_total, double &se_pve_total, vector &v_sigma2, vector &v_se_sigma2, vector &v_enrich, vector &v_se_enrich) { @@ -1336,7 +1355,7 @@ void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y gsl_vector_set(q_vec, i, d); } - //compuate yKrKKry, which is used later for confidence interval + //compute yKrKKry, which is used later for confidence interval for (size_t i=0; isize1, n2=K->size2; + size_t n_vc=n2/n1; + + double d, y2_sum, tau_inv, se_tau_inv; + + //new matrices/vectors + gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2); + gsl_vector *y_scale=gsl_vector_alloc (n1); + gsl_vector *y2=gsl_vector_alloc (n1); + gsl_vector *n1_vec=gsl_vector_alloc (n1); + gsl_matrix *Ay=gsl_matrix_alloc (n1, n_vc); + gsl_matrix *K2=gsl_matrix_alloc (n1, n_vc*n_vc); + gsl_matrix *K_tmp=gsl_matrix_alloc (n1, n1); + gsl_matrix *V_mat=gsl_matrix_alloc (n1, n1); + + //old matrices/vectors + gsl_vector *pve=gsl_vector_alloc (n_vc); + gsl_vector *se_pve=gsl_vector_alloc (n_vc); + gsl_vector *q_vec=gsl_vector_alloc (n_vc); + gsl_matrix *S1=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *S2=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *J_mat=gsl_matrix_alloc (n_vc, n_vc); + gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc); + + int sig; + gsl_permutation * pmt=gsl_permutation_alloc (n_vc); + + //center and scale K by W + //and standardize K further so that all diagonal elements are 1 + for (size_t i=0; isize; i++) { + y2_sum+=gsl_vector_get(y2, i); + } + + //compute the n_vc size q vector + for (size_t i=0; isize1; t++) { + gsl_vector_view Ktmp_col=gsl_matrix_column (K_tmp, t); + gsl_vector_add (n1_vec, &Ktmp_col.vector); + } + gsl_vector_add_constant (n1_vec, -1.0); + + //compute S1 + gsl_blas_ddot (n1_vec, y2, &d); + gsl_matrix_set (S1, i, j, 2*d); + if (i!=j) {gsl_matrix_set (S1, j, i, 2*d);} + + //compute S2 + d=0; + for (size_t t=0; tsize; t++) { + d+=gsl_vector_get (n1_vec, t); + } + gsl_matrix_set (S2, i, j, d); + if (i!=j) {gsl_matrix_set (S2, j, i, d);} + + //save information to compute J + gsl_vector_view K2col1=gsl_matrix_column (K2, n_vc*i+j); + gsl_vector_view K2col2=gsl_matrix_column (K2, n_vc*j+i); + + gsl_vector_memcpy(&K2col1.vector, n1_vec); + if (i!=j) {gsl_vector_memcpy(&K2col2.vector, n1_vec);} + } + } + + //iterate to solve tau and h's + size_t it=0; + double s=1; + while (abs(s)>1e-3 && it<100) { + //update tau_inv + gsl_blas_ddot (q_vec, pve, &d); + if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;} + tau_inv=y2_sum/(double)n1-d/((double)n1*((double)n1-1)); + if (it>0) {s/=tau_inv;} + + //update S + gsl_matrix_memcpy (S_mat, S2); + gsl_matrix_scale (S_mat, -1*tau_inv); + gsl_matrix_add (S_mat, S1); + + //update h=S^{-1}q + int sig; + gsl_permutation * pmt=gsl_permutation_alloc (n_vc); + LUDecomp (S_mat, pmt, &sig); + LUInvert (S_mat, pmt, Si_mat); + gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve); + + //cout<1) { + cout<<"total pve = "<