about summary refs log tree commit diff
path: root/src/vc.cpp
diff options
context:
space:
mode:
authorPeter Carbonetto2017-05-04 14:41:32 -0500
committerPeter Carbonetto2017-05-04 14:41:32 -0500
commitc18588b6d00650b9ce742229fdf1eca7133f58fc (patch)
tree2a7262fc48dbbdca244d0860c8b5167c69f3c553 /src/vc.cpp
parent452f232cb627d7180bf1c845dff9ddd88af6a600 (diff)
downloadpangemma-c18588b6d00650b9ce742229fdf1eca7133f58fc.tar.gz
Local updates made by Xiang---shared via email on May 4, 2017, subject: gemma on expression data.
Diffstat (limited to 'src/vc.cpp')
-rw-r--r--src/vc.cpp300
1 files changed, 295 insertions, 5 deletions
diff --git a/src/vc.cpp b/src/vc.cpp
index f17a9e9..c0aa40d 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -453,7 +453,7 @@ int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, g
 
 
 //read header to determine which column contains which item
-bool ReadHeader_vc (const string &line, HEADER &header)
+bool ReadHeader (const string &line, HEADER &header)
 {
   string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID"};
   set<string> rs_set(rs_ptr, rs_ptr+10);
@@ -586,7 +586,7 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps, vector<st
 
   //header
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader (line, header);
 
   if (header.n_col==0 ) {
     if (header.nobs_col==0 && header.nmis_col==0) {
@@ -700,7 +700,7 @@ void ReadFile_beta (const bool flag_priorscale, const string &file_beta, const m
   //read header
   HEADER header;
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader (line, header);
 
   if (header.n_col==0 ) {
     if (header.nobs_col==0 && header.nmis_col==0) {
@@ -869,7 +869,7 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, const v
   HEADER header;
 
   !safeGetline(infile, line).eof();
-  ReadHeader_vc (line, header);
+  ReadHeader (line, header);
 
   while (!safeGetline(infile, line).eof()) {
     //do not read cor values this time; upto col_n-1
@@ -1059,6 +1059,25 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, const v
   return;
 }
 
+
+
+
+
+//copied from lmm.cpp; is used in the following function VCss
+//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
+size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
+	if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
+	size_t index;
+	size_t l, h;
+	if (b>a) {l=a; h=b;} else {l=b; h=a;}
+
+	size_t n=n_cvt+2;
+	index=(2*n-l+2)*(l-1)/2+h-l;
+
+	return index;
+}
+
+
 //use the new method to calculate variance components with summary statistics
 //first, use a function CalcS to compute S matrix (where the diagonal elements are part of V(q) ), and then use bootstrap to compute the variance for S, use a set of genotypes, phenotypes, and individual ids, and snp category label
 void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat, const gsl_vector *q_vec, const gsl_vector *s_vec, const double df, vector<double> &v_pve, vector<double> &v_se_pve, double &pve_total, double &se_pve_total, vector<double> &v_sigma2, vector<double> &v_se_sigma2, vector<double> &v_enrich, vector<double> &v_se_enrich) {
@@ -1336,7 +1355,7 @@ void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y
     gsl_vector_set(q_vec, i, d);
   }
 
-  //compuate yKrKKry, which is used later for confidence interval
+  //compute yKrKKry, which is used later for confidence interval
   for (size_t i=0; i<n_vc; i++) {
     gsl_vector_const_view Kry_coli=gsl_matrix_const_column (Kry, i);
     for (size_t j=i; j<n_vc; j++) {
@@ -1842,6 +1861,277 @@ void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K, const gsl_matrix *W,
 
 
 
+//Ks are not scaled;
+void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y)
+{
+  size_t n1=K->size1, n2=K->size2;
+  size_t n_vc=n2/n1;
+
+  double d, y2_sum, tau_inv, se_tau_inv;
+
+  //new matrices/vectors
+  gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2);
+  gsl_vector *y_scale=gsl_vector_alloc (n1);
+  gsl_vector *y2=gsl_vector_alloc (n1);
+  gsl_vector *n1_vec=gsl_vector_alloc (n1);
+  gsl_matrix *Ay=gsl_matrix_alloc (n1, n_vc);
+  gsl_matrix *K2=gsl_matrix_alloc (n1, n_vc*n_vc);
+  gsl_matrix *K_tmp=gsl_matrix_alloc (n1, n1);
+  gsl_matrix *V_mat=gsl_matrix_alloc (n1, n1);
+
+  //old matrices/vectors
+  gsl_vector *pve=gsl_vector_alloc (n_vc);
+  gsl_vector *se_pve=gsl_vector_alloc (n_vc);
+  gsl_vector *q_vec=gsl_vector_alloc (n_vc);
+  gsl_matrix *S1=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *S2=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *J_mat=gsl_matrix_alloc (n_vc, n_vc);
+  gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+
+  int sig;
+  gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+
+  //center and scale K by W
+  //and standardize K further so that all diagonal elements are 1
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_matrix_view Kscale_sub = gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+    gsl_matrix_const_view K_sub = gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1);
+    gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix);
+
+    CenterMatrix (&Kscale_sub.matrix, W);
+    StandardizeMatrix (&Kscale_sub.matrix);
+  }
+
+  //center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1)
+  gsl_vector_memcpy (y_scale, y);
+  CenterVector (y_scale, W);
+  //  StandardizeVector (y_scale);
+
+  //compute y^2 and sum(y^2), which is also the variance of y*n1
+  gsl_vector_memcpy (y2, y_scale);
+  gsl_vector_mul (y2, y_scale);
+
+  y2_sum=0;
+  for (size_t i=0; i<y2->size; i++) {
+    y2_sum+=gsl_vector_get(y2, i);
+  }
+
+  //compute the n_vc size q vector
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_matrix_const_view Kscale_sub = gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, n1_vec);
+
+    gsl_blas_ddot (n1_vec, y_scale, &d);
+    gsl_vector_set(q_vec, i, d-y2_sum);
+  }
+
+  //compute the n_vc by n_vc S1 and S2 matrix (and eventually S=S1-\tau^{-1}S2)
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_matrix_const_view Kscale_sub1 = gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+
+    for (size_t j=i; j<n_vc; j++) {
+      gsl_matrix_const_view Kscale_sub2 = gsl_matrix_const_submatrix (K_scale, 0, n1*j, n1, n1);
+
+      gsl_matrix_memcpy (K_tmp, &Kscale_sub1.matrix);
+      gsl_matrix_mul_elements (K_tmp, &Kscale_sub2.matrix);
+
+      gsl_vector_set_zero(n1_vec);
+      for (size_t t=0; t<K_tmp->size1; t++) {
+	gsl_vector_view Ktmp_col=gsl_matrix_column (K_tmp, t);
+	gsl_vector_add (n1_vec, &Ktmp_col.vector);
+      }
+      gsl_vector_add_constant (n1_vec, -1.0);
+
+      //compute S1
+      gsl_blas_ddot (n1_vec, y2, &d);
+      gsl_matrix_set (S1, i, j, 2*d);
+      if (i!=j) {gsl_matrix_set (S1, j, i, 2*d);}
+
+      //compute S2
+      d=0;
+      for (size_t t=0; t<n1_vec->size; t++) {
+	d+=gsl_vector_get (n1_vec, t);
+      }
+      gsl_matrix_set (S2, i, j, d);
+      if (i!=j) {gsl_matrix_set (S2, j, i, d);}
+
+      //save information to compute J
+      gsl_vector_view K2col1=gsl_matrix_column (K2, n_vc*i+j);
+      gsl_vector_view K2col2=gsl_matrix_column (K2, n_vc*j+i);
+
+      gsl_vector_memcpy(&K2col1.vector, n1_vec);
+      if (i!=j) {gsl_vector_memcpy(&K2col2.vector, n1_vec);}
+    }
+  }
+
+  //iterate to solve tau and h's
+  size_t it=0;
+  double s=1;
+  while (abs(s)>1e-3 && it<100) {
+    //update tau_inv
+    gsl_blas_ddot (q_vec, pve, &d);
+    if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;}
+    tau_inv=y2_sum/(double)n1-d/((double)n1*((double)n1-1));
+    if (it>0) {s/=tau_inv;}
+
+    //update S
+    gsl_matrix_memcpy (S_mat, S2);
+    gsl_matrix_scale (S_mat, -1*tau_inv);
+    gsl_matrix_add (S_mat, S1);
+
+    //update h=S^{-1}q
+    int sig;
+    gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+    LUDecomp (S_mat, pmt, &sig);
+    LUInvert (S_mat, pmt, Si_mat);
+    gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+
+    //cout<<tau_inv<<endl;
+    it++;
+  }
+
+  //compute V matrix and A matrix (K_scale is destroyed, so need to compute V first)
+  gsl_matrix_set_zero (V_mat);
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_matrix_view Kscale_sub = gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+
+    //compute V
+    gsl_matrix_memcpy (K_tmp, &Kscale_sub.matrix);
+    gsl_matrix_scale (K_tmp, gsl_vector_get(pve, i));
+    gsl_matrix_add (V_mat, K_tmp);
+
+    //compute A; the corresponding Kscale is destroyed
+    gsl_matrix_const_view K2_sub = gsl_matrix_const_submatrix (K2, 0, n_vc*i, n1, n_vc);
+    gsl_blas_dgemv (CblasNoTrans, 1.0, &K2_sub.matrix, pve, 0.0, n1_vec);
+
+    for (size_t t=0; t<n1; t++) {
+      gsl_matrix_set (K_scale, t, n1*i+t, gsl_vector_get(n1_vec, t) );
+    }
+
+    //compute Ay
+    gsl_vector_view Ay_col=gsl_matrix_column (Ay, i);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, 0.0, &Ay_col.vector);
+  }
+  gsl_matrix_scale (V_mat, tau_inv);
+
+  //compute J matrix
+  for (size_t i=0; i<n_vc; i++) {
+    gsl_vector_view Ay_col1=gsl_matrix_column (Ay, i);
+    gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, &Ay_col1.vector, 0.0, n1_vec);
+
+    for (size_t j=i; j<n_vc; j++) {
+      gsl_vector_view Ay_col2=gsl_matrix_column (Ay, j);
+
+      gsl_blas_ddot (&Ay_col2.vector, n1_vec, &d);
+      gsl_matrix_set (J_mat, i, j, 2.0*d);
+      if (i!=j) {gsl_matrix_set (J_mat, j, i, 2.0*d);}
+    }
+  }
+
+  //compute H^{-1}JH^{-1} as V(\hat h), where H=S2*tau_inv; this is stored in Var_mat
+  gsl_matrix_memcpy (S_mat, S2);
+  gsl_matrix_scale (S_mat, tau_inv);
+
+  LUDecomp (S_mat, pmt, &sig);
+  LUInvert (S_mat, pmt, Si_mat);
+
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, J_mat, 0.0, S_mat);
+  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, S_mat, Si_mat, 0.0, Var_mat);
+
+  //compute variance for tau_inv
+  gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, y_scale, 0.0, n1_vec);
+  gsl_blas_ddot (y_scale, n1_vec, &d);
+  se_tau_inv=sqrt(2*d)/(double)n1;
+
+  //transform pve back to the original scale and save data
+  v_pve.clear(); v_se_pve.clear();
+  v_sigma2.clear(); v_se_sigma2.clear();
+
+  pve_total=0, se_pve_total=0;
+  for (size_t i=0; i<n_vc; i++) {
+    d=gsl_vector_get (pve, i);
+    pve_total+=d;
+
+    v_pve.push_back(d);
+    v_sigma2.push_back(d*tau_inv/v_traceG[i] );
+
+    d=sqrt(gsl_matrix_get (Var_mat, i, i));
+    v_se_pve.push_back(d);
+    v_se_sigma2.push_back(d*tau_inv/v_traceG[i]);
+
+    //d*=sqrt(var_y/v_traceG[i]-v_sigma2[i]);
+    //v_se_pve.push_back(d/var_y);
+
+    for (size_t j=0; j<n_vc; j++) {
+      se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+    }
+  }
+  v_sigma2.push_back( (1-pve_total)*tau_inv );
+  v_se_sigma2.push_back(sqrt(se_pve_total)*tau_inv );
+  se_pve_total=sqrt(se_pve_total);
+
+  cout<<"sigma2 = ";
+  for (size_t i=0; i<n_vc+1; i++) {
+    cout<<v_sigma2[i]<<" ";
+  }
+  cout<<endl;
+
+  cout<<"se(sigma2) = ";
+  for (size_t i=0; i<n_vc+1; i++) {
+    cout<<v_se_sigma2[i]<<" ";
+  }
+  cout<<endl;
+
+  cout<<"pve = ";
+  for (size_t i=0; i<n_vc; i++) {
+    cout<<v_pve[i]<<" ";
+  }
+  cout<<endl;
+
+  cout<<"se(pve) = ";
+  for (size_t i=0; i<n_vc; i++) {
+    cout<<v_se_pve[i]<<" ";
+  }
+  cout<<endl;
+
+  if (n_vc>1) {
+    cout<<"total pve = "<<pve_total<<endl;
+    cout<<"se(total pve) = "<<se_pve_total<<endl;
+  }
+
+  gsl_permutation_free(pmt);
+
+  gsl_matrix_free(K_scale);
+  gsl_vector_free(y_scale);
+  gsl_vector_free(y2);
+  gsl_vector_free(n1_vec);
+  gsl_matrix_free(Ay);
+  gsl_matrix_free(K2);
+  gsl_matrix_free(K_tmp);
+  gsl_matrix_free(V_mat);
+
+  gsl_vector_free(pve);
+  gsl_vector_free(se_pve);
+  gsl_vector_free(q_vec);
+  gsl_matrix_free(S1);
+  gsl_matrix_free(S2);
+  gsl_matrix_free(S_mat);
+  gsl_matrix_free(Si_mat);
+  gsl_matrix_free(J_mat);
+  gsl_matrix_free(Var_mat);
+
+  return;
+}
+
+
+
+
+
+
+
 //read bimbam mean genotype file and compute XWz
 bool BimbamXwz (const string &file_geno, const int display_pace, vector<int> &indicator_idv, vector<int> &indicator_snp, const vector<size_t> &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz)
 {