aboutsummaryrefslogtreecommitdiff
path: root/src/vc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vc.cpp')
-rw-r--r--src/vc.cpp2240
1 files changed, 2148 insertions, 92 deletions
diff --git a/src/vc.cpp b/src/vc.cpp
index 77cf746..94bf931 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -1,17 +1,17 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011 Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
@@ -26,8 +26,12 @@
#include <cmath>
#include <iostream>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include <bitset>
+#include <vector>
+#include <set>
+#include <map>
+#include <string>
#include <cstring>
#include "gsl/gsl_vector.h"
@@ -39,9 +43,14 @@
#include "gsl/gsl_multiroots.h"
#include "gsl/gsl_min.h"
+#include "Eigen/Dense"
+
+#include "param.h"
#include "io.h"
#include "lapack.h"
+#include "eigenlib.h"
#include "gzstream.h"
+#include "mathfunc.h"
#ifdef FORCE_FLOAT
#include "lmm_float.h"
@@ -54,95 +63,194 @@
using namespace std;
-
+using namespace Eigen;
//in this file, X, Y are already transformed (i.e. UtX and UtY)
-void VC::CopyFromParam (PARAM &cPar)
-{
- file_out=cPar.file_out;
-
- // v_sigma2=cPar.v_sigma2;
-
- time_UtX=0.0;
- time_opt=0.0;
+void VC::CopyFromParam (PARAM &cPar)
+{
+ a_mode=cPar.a_mode;
- v_traceG=cPar.v_traceG;
-
- return;
+ file_cat=cPar.file_cat;
+ file_beta=cPar.file_beta;
+ file_cor=cPar.file_cor;
+
+ setSnps=cPar.setSnps;
+
+ file_out=cPar.file_out;
+ path_out=cPar.path_out;
+
+ //v_sigma2=cPar.v_sigma2;
+
+ time_UtX=0.0;
+ time_opt=0.0;
+
+ v_traceG=cPar.v_traceG;
+
+ ni_total=cPar.ni_total;
+ ns_total=cPar.ns_total;
+ ns_test=cPar.ns_test;
+
+ crt=cPar.crt;
+ window_cm=cPar.window_cm;
+ window_bp=cPar.window_bp;
+ window_ns=cPar.window_ns;
+
+ n_vc=cPar.n_vc;
+
+ return;
}
-void VC::CopyToParam (PARAM &cPar)
+void VC::CopyToParam (PARAM &cPar)
{
cPar.time_UtX=time_UtX;
- cPar.time_opt=time_opt;
-
- cPar.v_sigma2=v_sigma2;
- cPar.v_se_sigma2=v_se_sigma2;
+ cPar.time_opt=time_opt;
+
cPar.v_pve=v_pve;
cPar.v_se_pve=v_se_pve;
+ cPar.v_sigma2=v_sigma2;
+ cPar.v_se_sigma2=v_se_sigma2;
+ cPar.pve_total=pve_total;
+ cPar.se_pve_total=se_pve_total;
cPar.v_traceG=v_traceG;
-
+
cPar.v_beta=v_beta;
cPar.v_se_beta=v_se_beta;
-
+
+ cPar.ni_total=ni_total;
+ cPar.ns_total=ns_total;
+ cPar.ns_test=ns_test;
+
+ cPar.n_vc=n_vc;
+
+ return;
+}
+
+
+
+void VC::WriteFile_qs (const gsl_vector *s_vec, const gsl_vector *q_vec, const gsl_vector *qvar_vec, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat)
+{
+ string file_str;
+ file_str=path_out+"/"+file_out;
+ file_str+=".qvec.txt";
+
+ ofstream outfile_q (file_str.c_str(), ofstream::out);
+ if (!outfile_q) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+
+ for (size_t i=0; i<s_vec->size; i++) {
+ outfile_q<<gsl_vector_get(s_vec, i)<<endl;
+ }
+ for (size_t i=0; i<q_vec->size; i++) {
+ outfile_q<<gsl_vector_get(q_vec, i)<<endl;
+ }
+ for (size_t i=0; i<qvar_vec->size; i++) {
+ outfile_q<<gsl_vector_get(qvar_vec, i)<<endl;
+ }
+
+ outfile_q.clear();
+ outfile_q.close();
+
+ file_str=path_out+"/"+file_out;
+ file_str+=".smat.txt";
+
+ ofstream outfile_s (file_str.c_str(), ofstream::out);
+ if (!outfile_s) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+
+ for (size_t i=0; i<S_mat->size1; i++) {
+ for (size_t j=0; j<S_mat->size2; j++) {
+ outfile_s<<gsl_matrix_get(S_mat, i, j)<<"\t";
+ }
+ outfile_s<<endl;
+ }
+ for (size_t i=0; i<Svar_mat->size1; i++) {
+ for (size_t j=0; j<Svar_mat->size2; j++) {
+ outfile_s<<gsl_matrix_get(Svar_mat, i, j)<<"\t";
+ }
+ outfile_s<<endl;
+ }
+
+ outfile_s.clear();
+ outfile_s.close();
+
return;
}
+
+
+
+
+
void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p)
{
size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1, n_cvt=(p->W)->size2;
-
+
gsl_matrix *K_temp=gsl_matrix_alloc(n1, n1);
gsl_matrix *HiW=gsl_matrix_alloc(n1, n_cvt);
gsl_matrix *WtHiW=gsl_matrix_alloc(n_cvt, n_cvt);
gsl_matrix *WtHiWi=gsl_matrix_alloc(n_cvt, n_cvt);
gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1);
- double sigma2;
+ double sigma2;
//calculate H=\sum_i^{k+1} \sigma_i^2 K_i
gsl_matrix_set_zero (p->P);
for (size_t i=0; i<n_vc+1; i++) {
if (i==n_vc) {
- gsl_matrix_set_identity (K_temp);
+ gsl_matrix_set_identity (K_temp);
} else {
gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
gsl_matrix_memcpy (K_temp, &K_sub.matrix);
}
- sigma2=exp(gsl_vector_get (log_sigma2, i) );
+ //when unconstrained, update on sigma2 instead of log_sigma2
+ if (p->noconstrain) {
+ sigma2=gsl_vector_get (log_sigma2, i);
+ } else {
+ sigma2=exp(gsl_vector_get (log_sigma2, i) );
+ }
gsl_matrix_scale(K_temp, sigma2);
gsl_matrix_add (p->P, K_temp);
}
//calculate H^{-1}
+ /*
int sig;
gsl_permutation * pmt1=gsl_permutation_alloc (n1);
- LUDecomp (p->P, pmt1, &sig);
+ LUDecomp (p->P, pmt1, &sig);
LUInvert (p->P, pmt1, K_temp);
gsl_permutation_free(pmt1);
gsl_matrix_memcpy (p->P, K_temp);
+ */
+ eigenlib_invert(p->P);
//calculate P=H^{-1}-H^{-1}W(W^TH^{-1}W)^{-1}W^TH^{-1}
- gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, p->P, p->W, 0.0, HiW);
- gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, p->W, HiW, 0.0, WtHiW);
+ //gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, p->P, p->W, 0.0, HiW);
+ //gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, p->W, HiW, 0.0, WtHiW);
+
+ eigenlib_dgemm ("N", "N", 1.0, p->P, p->W, 0.0, HiW);
+ eigenlib_dgemm ("T", "N", 1.0, p->W, HiW, 0.0, WtHiW);
- gsl_permutation * pmt2=gsl_permutation_alloc (n_cvt);
- LUDecomp (WtHiW, pmt2, &sig);
- LUInvert (WtHiW, pmt2, WtHiWi);
- gsl_permutation_free(pmt2);
+ //gsl_permutation * pmt2=gsl_permutation_alloc (n_cvt);
+ //LUDecomp (WtHiW, pmt2, &sig);
+ //LUInvert (WtHiW, pmt2, WtHiWi);
+ //gsl_permutation_free(pmt2);
+ eigenlib_invert(WtHiW);
+ gsl_matrix_memcpy(WtHiWi, WtHiW);
+
+ //gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
+ //gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
+ eigenlib_dgemm ("N", "T", 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
+ eigenlib_dgemm ("N", "N", -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
- gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, WtHiWi, HiW, 0.0, WtHiWiWtHi);
- gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, -1.0, HiW, WtHiWiWtHi, 1.0, p->P);
-
//calculate Py, KPy, PKPy
- gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py);
+ //eigenlib_dgemv("N", 1.0, p->P, p->y, 0.0, p->Py);
+ double d;
for (size_t i=0; i<n_vc+1; i++) {
gsl_vector_view KPy=gsl_matrix_column (p->KPy_mat, i);
gsl_vector_view PKPy=gsl_matrix_column (p->PKPy_mat, i);
@@ -150,11 +258,22 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p)
if (i==n_vc) {
gsl_vector_memcpy (&KPy.vector, p->Py);
} else {
- gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
+ gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
+ //seems to be important to use gsl dgemv here instead of eigenlib_dgemv; otherwise
gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector);
+ //eigenlib_dgemv("N", 1.0, &K_sub.matrix, p->Py, 0.0, &KPy.vector);
}
-
+
gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector);
+ //eigenlib_dgemv("N", 1.0, p->P, &KPy.vector, 0.0, &PKPy.vector);
+
+ //when phenotypes are not normalized well, then some values in the following matrix maybe nan; change that to 0; this seems to only happen when eigenlib_dgemv was used above
+ for (size_t j=0; j<p->KPy_mat->size1; j++) {
+ d=gsl_matrix_get (p->KPy_mat, j, i);
+ if (std::isnan(d)) {gsl_matrix_set (p->KPy_mat, j, i, 0); cout<<"nan appears in "<<i<<" "<<j<<endl;}
+ d=gsl_matrix_get (p->PKPy_mat, j, i);
+ if (std::isnan(d)) {gsl_matrix_set (p->PKPy_mat, j, i, 0); cout<<"nan appears in "<<i<<" "<<j<<endl;}
+ }
}
gsl_matrix_free (K_temp);
@@ -173,7 +292,7 @@ int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1)
VC_PARAM *p=(VC_PARAM *) params;
size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1;
-
+
double tr, d;
//update parameters
@@ -199,8 +318,12 @@ int LogRL_dev1 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1)
gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
- d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i));
-
+ if (p->noconstrain) {
+ d=(-0.5*tr+0.5*d);
+ } else {
+ d=(-0.5*tr+0.5*d)*exp(gsl_vector_get(log_sigma2, i));
+ }
+
gsl_vector_set(dev1, i, d);
}
@@ -214,32 +337,47 @@ int LogRL_dev2 (const gsl_vector *log_sigma2, void *params, gsl_matrix *dev2)
VC_PARAM *p=(VC_PARAM *) params;
size_t n_vc=log_sigma2->size-1;
-
+
double d, sigma2_i, sigma2_j;
//update parameters
UpdateParam (log_sigma2, p);
-
+
//calculate dev2=0.5(yPKPKPy)
for (size_t i=0; i<n_vc+1; i++) {
gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
- sigma2_i=exp(gsl_vector_get(log_sigma2, i));
+ if (p->noconstrain) {
+ sigma2_i=gsl_vector_get(log_sigma2, i);
+ } else {
+ sigma2_i=exp(gsl_vector_get(log_sigma2, i));
+ }
for (size_t j=i; j<n_vc+1; j++) {
gsl_vector_view PKPy_j=gsl_matrix_column (p->PKPy_mat, j);
gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
- sigma2_j=exp(gsl_vector_get(log_sigma2, j));
-
- d*=-0.5*sigma2_i*sigma2_j;
+ if (p->noconstrain) {
+ sigma2_j=gsl_vector_get(log_sigma2, j);
+ d*=-0.5;
+ } else {
+ sigma2_j=exp(gsl_vector_get(log_sigma2, j));
+ d*=-0.5*sigma2_i*sigma2_j;
+ }
gsl_matrix_set(dev2, i, j, d);
if (j!=i) {gsl_matrix_set(dev2, j, i, d);}
- }
+ }
}
gsl_matrix_memcpy (p->Hessian, dev2);
-
+ /*
+ for (size_t i=0; i<dev2->size1; i++) {
+ for (size_t j=0; j<dev2->size2; j++) {
+ cout<<gsl_matrix_get (dev2, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
return GSL_SUCCESS;
}
@@ -250,14 +388,14 @@ int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, g
VC_PARAM *p=(VC_PARAM *) params;
size_t n1=(p->K)->size1, n_vc=log_sigma2->size-1;
-
+
double tr, d, sigma2_i, sigma2_j;
//update parameters
UpdateParam (log_sigma2, p);
- //calculate dev1=-0.5*trace(PK_i)+0.5*yPKPy
- //calculate dev2=0.5(yPKPKPy)
+ //calculate dev1=(-0.5*trace(PK_i)+0.5*yPK_iPy)*sigma2_i
+ //calculate dev2=0.5(yPK_iPK_jPy)*sigma2_i*sigma2_j
for (size_t i=0; i<n_vc+1; i++) {
if (i==n_vc) {
tr=0;
@@ -277,21 +415,31 @@ int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, g
gsl_vector_view KPy_i=gsl_matrix_column (p->KPy_mat, i);
gsl_blas_ddot(p->Py, &KPy_i.vector, &d);
- sigma2_i=exp(gsl_vector_get(log_sigma2, i));
- d=(-0.5*tr+0.5*d)*sigma2_i;
-
+ if (p->noconstrain) {
+ sigma2_i=gsl_vector_get(log_sigma2, i);
+ d=(-0.5*tr+0.5*d);
+ } else {
+ sigma2_i=exp(gsl_vector_get(log_sigma2, i));
+ d=(-0.5*tr+0.5*d)*sigma2_i;
+ }
+
gsl_vector_set(dev1, i, d);
-
+
for (size_t j=i; j<n_vc+1; j++) {
gsl_vector_view PKPy_j=gsl_matrix_column (p->PKPy_mat, j);
gsl_blas_ddot(&KPy_i.vector, &PKPy_j.vector, &d);
- sigma2_j=exp(gsl_vector_get(log_sigma2, j));
- d*=-0.5*sigma2_i*sigma2_j;
+ if (p->noconstrain) {
+ sigma2_j=gsl_vector_get(log_sigma2, j);
+ d*=-0.5;
+ } else {
+ sigma2_j=exp(gsl_vector_get(log_sigma2, j));
+ d*=-0.5*sigma2_i*sigma2_j;
+ }
gsl_matrix_set(dev2, i, j, d);
if (j!=i) {gsl_matrix_set(dev2, j, i, d);}
- }
+ }
}
@@ -303,13 +451,1195 @@ int LogRL_dev12 (const gsl_vector *log_sigma2, void *params, gsl_vector *dev1, g
-void VC::CalcVCreml (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y)
+
+//read header to determine which column contains which item
+bool ReadHeader (const string &line, HEADER &header)
+{
+ string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID"};
+ set<string> rs_set(rs_ptr, rs_ptr+10);
+ string chr_ptr[]={"chr","CHR"};
+ set<string> chr_set(chr_ptr, chr_ptr+2);
+ string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", "bp", "BP"};
+ set<string> pos_set(pos_ptr, pos_ptr+8);
+ string cm_ptr[]={"cm","CM"};
+ set<string> cm_set(cm_ptr, cm_ptr+2);
+ string a1_ptr[]={"a1","A1","allele1","ALLELE1"};
+ set<string> a1_set(a1_ptr, a1_ptr+4);
+ string a0_ptr[]={"a0","A0","allele0","ALLELE0"};
+ set<string> a0_set(a0_ptr, a0_ptr+4);
+
+ string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
+ set<string> z_set(z_ptr, z_ptr+6);
+ string beta_ptr[]={"beta","BETA","b","B"};
+ set<string> beta_set(beta_ptr, beta_ptr+4);
+ string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"};
+ set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4);
+ string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"};
+ set<string> chisq_set(chisq_ptr, chisq_ptr+4);
+ string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"};
+ set<string> p_set(p_ptr, p_ptr+6);
+
+ string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"};
+ set<string> n_set(n_ptr, n_ptr+6);
+ string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"};
+ set<string> nmis_set(nmis_ptr, nmis_ptr+6);
+ string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"};
+ set<string> nobs_set(nobs_ptr, nobs_ptr+4);
+
+ string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq","ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"};
+ set<string> af_set(af_ptr, af_ptr+10);
+ string var_ptr[]={"var","VAR"};
+ set<string> var_set(var_ptr, var_ptr+2);
+
+ string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"};
+ set<string> ws_set(ws_ptr, ws_ptr+4);
+ string cor_ptr[]={"cor","COR","r","R"};
+ set<string> cor_set(cor_ptr, cor_ptr+4);
+
+ header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; header.cor_col=0; header.coln=0;
+
+ char *ch_ptr;
+ string type;
+ size_t n_error=0;
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ while (ch_ptr!=NULL) {
+ type=ch_ptr;
+ if (rs_set.count(type)!=0) {
+ if (header.rs_col==0) {header.rs_col=header.coln+1;} else {cout<<"error! more than two rs columns in the file."<<endl; n_error++;}
+ } else if (chr_set.count(type)!=0) {
+ if (header.chr_col==0) {header.chr_col=header.coln+1;} else {cout<<"error! more than two chr columns in the file."<<endl; n_error++;}
+ } else if (pos_set.count(type)!=0) {
+ if (header.pos_col==0) {header.pos_col=header.coln+1;} else {cout<<"error! more than two pos columns in the file."<<endl; n_error++;}
+ } else if (cm_set.count(type)!=0) {
+ if (header.cm_col==0) {header.cm_col=header.coln+1;} else {cout<<"error! more than two cm columns in the file."<<endl; n_error++;}
+ } else if (a1_set.count(type)!=0) {
+ if (header.a1_col==0) {header.a1_col=header.coln+1;} else {cout<<"error! more than two allele1 columns in the file."<<endl; n_error++;}
+ } else if (a0_set.count(type)!=0) {
+ if (header.a0_col==0) {header.a0_col=header.coln+1;} else {cout<<"error! more than two allele0 columns in the file."<<endl; n_error++;}
+ } else if (z_set.count(type)!=0) {
+ if (header.z_col==0) {header.z_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
+ } else if (beta_set.count(type)!=0) {
+ if (header.beta_col==0) {header.beta_col=header.coln+1;} else {cout<<"error! more than two beta columns in the file."<<endl; n_error++;}
+ } else if (sebeta_set.count(type)!=0) {
+ if (header.sebeta_col==0) {header.sebeta_col=header.coln+1;} else {cout<<"error! more than two se_beta columns in the file."<<endl; n_error++;}
+ } else if (chisq_set.count(type)!=0) {
+ if (header.chisq_col==0) {header.chisq_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
+ } else if (p_set.count(type)!=0) {
+ if (header.p_col==0) {header.p_col=header.coln+1;} else {cout<<"error! more than two p columns in the file."<<endl; n_error++;}
+ } else if (n_set.count(type)!=0) {
+ if (header.n_col==0) {header.n_col=header.coln+1;} else {cout<<"error! more than two n_total columns in the file."<<endl; n_error++;}
+ } else if (nmis_set.count(type)!=0) {
+ if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else {cout<<"error! more than two n_mis columns in the file."<<endl; n_error++;}
+ } else if (nobs_set.count(type)!=0) {
+ if (header.nobs_col==0) {header.nobs_col=header.coln+1;} else {cout<<"error! more than two n_obs columns in the file."<<endl; n_error++;}
+ } else if (ws_set.count(type)!=0) {
+ if (header.ws_col==0) {header.ws_col=header.coln+1;} else {cout<<"error! more than two window_size columns in the file."<<endl; n_error++;}
+ } else if (af_set.count(type)!=0) {
+ if (header.af_col==0) {header.af_col=header.coln+1;} else {cout<<"error! more than two af columns in the file."<<endl; n_error++;}
+ } else if (cor_set.count(type)!=0) {
+ if (header.cor_col==0) {header.cor_col=header.coln+1;} else {cout<<"error! more than two cor columns in the file."<<endl; n_error++;}
+ } else {}
+
+ ch_ptr=strtok (NULL, " , \t");
+ header.coln++;
+ }
+
+ if (header.cor_col!=0 && header.cor_col!=header.coln) {cout<<"error! the cor column should be the last column."<<endl; n_error++;}
+
+ if (header.rs_col==0) {
+ if (header.chr_col!=0 && header.pos_col!=0) {
+ cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl;
+ } else {
+ cout<<"error! missing an rs column."<<endl; n_error++;
+ }
+ }
+
+ if (n_error==0) {return true;} else {return false;}
+}
+
+
+
+
+
+
+//read cov file the first time, record mapRS2in, mapRS2var (in case var is not provided in the z file), store vec_n and vec_rs
+void ReadFile_cor (const string &file_cor, const set<string> &setSnps, vector<string> &vec_rs, vector<size_t> &vec_n, vector<double> &vec_cm, vector<double> &vec_bp, map<string, size_t> &mapRS2in, map<string, double> &mapRS2var)
+{
+ vec_rs.clear();
+ vec_n.clear();
+ mapRS2in.clear();
+ mapRS2var.clear();
+
+ igzstream infile (file_cor.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open cov file: "<<file_cor<<endl; return;}
+
+ string line;
+ char *ch_ptr;
+
+ string rs, chr, a1, a0, pos, cm;
+ double af=0, var_x=0, d_pos, d_cm;
+ size_t n_total=0, n_mis=0, n_obs=0, ni_total=0;
+ size_t ns_test=0, ns_total=0;
+
+ HEADER header;
+
+ //header
+ !safeGetline(infile, line).eof();
+ ReadHeader (line, header);
+
+ if (header.n_col==0 ) {
+ if (header.nobs_col==0 && header.nmis_col==0) {
+ cout<<"error! missing sample size in the cor file."<<endl;
+ } else {
+ cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+ }
+ }
+
+ while (!safeGetline(infile, line).eof()) {
+ //do not read cor values this time; upto col_n-1
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+ n_total=0; n_mis=0; n_obs=0; af=0; var_x=0; d_cm=0; d_pos=0;
+ for (size_t i=0; i<header.coln-1; i++) {
+ if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
+ if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
+ if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr; d_pos=atof(ch_ptr);}
+ if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr; d_cm=atof(ch_ptr);}
+ if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
+ if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+
+ if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
+ if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
+ if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+
+ if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
+ if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+
+ ch_ptr=strtok (NULL, " , \t");
+ }
+
+ if (header.rs_col==0) {
+ rs=chr+":"+pos;
+ }
+
+ if (header.n_col==0) {
+ n_total=n_mis+n_obs;
+ }
+
+ //record rs, n
+ vec_rs.push_back(rs);
+ vec_n.push_back(n_total);
+ if (d_cm>0) {vec_cm.push_back(d_cm);} else {vec_cm.push_back(d_cm);}
+ if (d_pos>0) {vec_bp.push_back(d_pos);} else {vec_bp.push_back(d_pos);}
+
+ //record mapRS2in and mapRS2var
+ if (setSnps.size()==0 || setSnps.count(rs)!=0) {
+ if (mapRS2in.count(rs)==0) {
+ mapRS2in[rs]=1;
+
+ if (header.var_col!=0) {
+ mapRS2var[rs]=var_x;
+ } else if (header.af_col!=0) {
+ var_x=2.0*af*(1.0-af);
+ mapRS2var[rs]=var_x;
+ } else {}
+
+ ns_test++;
+
+ } else {
+ cout<<"error! more than one snp has the same id "<<rs<<" in cor file?"<<endl;
+ }
+ }
+
+ //record max pos,
+
+ ni_total=max(ni_total, n_total);
+ ns_total++;
+ }
+
+ // cout<<"## number of analyzed individuals in the reference = "<<ni_total<<endl;
+ // cout<<"## number of analyzed SNPs in the reference = "<<ns_total<<endl;
+
+ infile.close();
+ infile.clear();
+
+ return;
+}
+
+
+
+
+
+
+//read beta file, store mapRS2var if var is provided here, calculate q and var_y
+void ReadFile_beta (const bool flag_priorscale, const string &file_beta, const map<string, size_t> &mapRS2cat, map<string, size_t> &mapRS2in, map<string, double> &mapRS2var, map<string, size_t> &mapRS2nsamp, gsl_vector *q_vec, gsl_vector *qvar_vec, gsl_vector *s_vec, size_t &ni_total, size_t &ns_total)
+{
+ mapRS2nsamp.clear();
+
+ igzstream infile (file_beta.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open beta file: "<<file_beta<<endl; return;}
+
+ string line;
+ char *ch_ptr;
+ string type;
+
+ string rs, chr, a1, a0, pos, cm;
+ double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
+ size_t n_total=0, n_mis=0, n_obs=0;
+ size_t ns_test=0;
+ ns_total=0; ni_total=0;
+
+ vector<double> vec_q, vec_qvar, vec_s;
+ for (size_t i=0; i<q_vec->size; i++) {
+ vec_q.push_back(0.0);
+ vec_qvar.push_back(0.0);
+ vec_s.push_back(0.0);
+ }
+
+ //read header
+ HEADER header;
+ !safeGetline(infile, line).eof();
+ ReadHeader (line, header);
+
+ if (header.n_col==0 ) {
+ if (header.nobs_col==0 && header.nmis_col==0) {
+ cout<<"error! missing sample size in the beta file."<<endl;
+ } else {
+ cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+ }
+ }
+
+ if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) && header.chisq_col==0 && header.p_col==0) {
+ cout<<"error! missing z scores in the beta file."<<endl;
+ }
+
+ if (header.af_col==0 && header.var_col==0 && mapRS2var.size()==0) {
+ cout<<"error! missing allele frequency in the beta file."<<endl;
+ }
+
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+ z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
+ n_total=0; n_mis=0; n_obs=0; af=0; var_x=0;
+ for (size_t i=0; i<header.coln; i++) {
+ if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
+ if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
+ if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
+ if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
+ if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
+ if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+
+ if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
+ if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
+ if (header.sebeta_col!=0 && header.sebeta_col==i+1) {se_beta=atof(ch_ptr);}
+ if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
+ if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
+
+ if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
+ if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
+ if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+
+ if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
+ if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+
+ ch_ptr=strtok (NULL, " , \t");
+ }
+
+ if (header.rs_col==0) {
+ rs=chr+":"+pos;
+ }
+
+ if (header.n_col==0) {
+ n_total=n_mis+n_obs;
+ }
+
+ //both z values and beta/se_beta have directions, while chisq/pvalue do not
+ if (header.z_col!=0) {
+ zsquare=z*z;
+ } else if (header.beta_col!=0 && header.sebeta_col!=0) {
+ z=beta/se_beta;
+ zsquare=z*z;
+ } else if (header.chisq_col!=0) {
+ zsquare=chisq;
+ } else if (header.p_col!=0) {
+ zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
+ } else {zsquare=0;}
+
+ //if the snp is also present in cor file, then do calculations
+ if ((header.var_col!=0 || header.af_col!=0 || mapRS2var.count(rs)!=0) && mapRS2in.count(rs)!=0 && (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
+ if (mapRS2in.at(rs)>1) {
+ cout<<"error! more than one snp has the same id "<<rs<<" in beta file?"<<endl;
+ break;
+ }
+
+ if (header.var_col==0) {
+ if (header.af_col!=0) {
+ var_x=2.0*af*(1.0-af);
+ } else {
+ var_x=mapRS2var.at(rs);
+ }
+ }
+
+ if (flag_priorscale) {var_x=1;}
+
+ mapRS2in[rs]++;
+ mapRS2var[rs]=var_x;
+ mapRS2nsamp[rs]=n_total;
+
+ if (mapRS2cat.size()!=0) {
+ vec_q[mapRS2cat.at(rs) ]+=(zsquare-1.0)*var_x/(double)n_total;
+ vec_s[mapRS2cat.at(rs) ]+=var_x;
+ vec_qvar[mapRS2cat.at(rs) ]+=var_x*var_x/((double)n_total*(double)n_total);
+ } else {
+ vec_q[0]+=(zsquare-1.0)*var_x/(double)n_total;
+ vec_s[0]+=var_x;
+ vec_qvar[0]+=var_x*var_x/((double)n_total*(double)n_total);
+ }
+
+ ni_total=max(ni_total, n_total);
+ ns_test++;
+ }
+
+ ns_total++;
+ }
+
+ for (size_t i=0; i<q_vec->size; i++) {
+ gsl_vector_set(q_vec, i, vec_q[i]);
+ gsl_vector_set(qvar_vec, i, 2.0*vec_qvar[i]);
+ gsl_vector_set(s_vec, i, vec_s[i]);
+ }
+
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+
+
+
+//read covariance file the second time
+//look for rs, n_mis+n_obs, var, window_size, cov
+//if window_cm/bp/ns is provided, then use these max values to calibrate estimates
+void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, const vector<size_t> &vec_n, const vector<double> &vec_cm, const vector<double> &vec_bp, const map<string, size_t> &mapRS2cat, const map<string, size_t> &mapRS2in, const map<string, double> &mapRS2var, const map<string, size_t> &mapRS2nsamp, const size_t crt, const double &window_cm, const double &window_bp, const double &window_ns, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *qvar_vec, size_t &ni_total, size_t &ns_total, size_t &ns_test, size_t &ns_pair)
+{
+ igzstream infile (file_cor.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open cov file: "<<file_cor<<endl; return;}
+
+ string line;
+ char *ch_ptr;
+
+ string rs1, rs2;
+ double d1, d2, d3, cor, var1, var2;
+ size_t n_nb, nsamp1, nsamp2, n12, bin_size=10, bin;
+
+ vector<vector<double> > mat_S, mat_Svar, mat_tmp;
+ vector<double> vec_qvar, vec_tmp;
+ vector<vector<vector<double> > > mat3d_Sbin;
+
+ for (size_t i=0; i<S_mat->size1; i++) {
+ vec_qvar.push_back(0.0);
+ }
+
+ for (size_t i=0; i<S_mat->size1; i++) {
+ mat_S.push_back(vec_qvar);
+ mat_Svar.push_back(vec_qvar);
+ }
+
+ for (size_t k=0; k<bin_size; k++) {
+ vec_tmp.push_back(0.0);
+ }
+ for (size_t i=0; i<S_mat->size1; i++) {
+ mat_tmp.push_back(vec_tmp);
+ }
+ for (size_t i=0; i<S_mat->size1; i++) {
+ mat3d_Sbin.push_back(mat_tmp);
+ }
+
+ string rs, chr, a1, a0, type, pos, cm;
+ size_t n_total=0, n_mis=0, n_obs=0;
+ double d_pos1, d_pos2, d_pos, d_cm1, d_cm2, d_cm;
+ ns_test=0; ns_total=0; ns_pair=0; ni_total=0;
+
+ //header
+ HEADER header;
+
+ !safeGetline(infile, line).eof();
+ ReadHeader (line, header);
+
+ while (!safeGetline(infile, line).eof()) {
+ //do not read cor values this time; upto col_n-1
+ d_pos1=0; d_cm1=0;
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ for (size_t i=0; i<header.coln-1; i++) {
+ if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
+ if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
+ if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr; d_pos1=atof(ch_ptr);}
+ if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr; d_cm1=atof(ch_ptr); }
+ if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
+ if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+
+ if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
+ if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
+ if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+
+ ch_ptr=strtok (NULL, " , \t");
+ }
+
+ if (header.rs_col==0) {
+ rs=chr+":"+pos;
+ }
+
+ if (header.n_col==0) {
+ n_total=n_mis+n_obs;
+ }
+
+ rs1=rs;
+
+ if ( (mapRS2cat.size()==0 || mapRS2cat.count(rs1)!=0) && mapRS2in.count(rs1)!=0 && mapRS2in.at(rs1)==2) {
+ var1=mapRS2var.at(rs1);
+ nsamp1=mapRS2nsamp.at(rs1);
+ d2=var1*var1;
+
+ if (mapRS2cat.size()!=0) {
+ mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ]+=(1-1.0/(double)vec_n[ns_total])*d2;
+ mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ]+=d2*d2/((double)vec_n[ns_total]*(double)vec_n[ns_total]);
+ if (crt==1) {
+ mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs1) ][0]+=(1-1.0/(double)vec_n[ns_total])*d2;
+ }
+ } else {
+ //mat_S[0][0]+=(1-1.0/(double)vec_n[ns_total])*d2;
+ mat_S[0][0]+=(1-1.0/(double)vec_n[ns_total])*d2;
+ mat_Svar[0][0]+=d2*d2/((double)vec_n[ns_total]*(double)vec_n[ns_total]);
+ if (crt==1) {
+ mat3d_Sbin[0][0][0]+=(1-1.0/(double)vec_n[ns_total])*d2;
+ }
+ }
+
+ n_nb=0;
+ while(ch_ptr!=NULL) {
+ type=ch_ptr;
+ if (type.compare("NA")!=0 && type.compare("na")!=0 && type.compare("nan")!=0 && type.compare("-nan")!=0) {
+ cor=atof(ch_ptr);
+ rs2=vec_rs[ns_total+n_nb+1];
+ d_pos2=vec_bp[ns_total+n_nb+1];
+ d_cm2=vec_cm[ns_total+n_nb+1];
+ d_pos=abs(d_pos2-d_pos1);
+ d_cm=abs(d_cm2-d_cm1);
+
+ if ( (mapRS2cat.size()==0 || mapRS2cat.count(rs2)!=0) && mapRS2in.count(rs2)!=0 && mapRS2in.at(rs2)==2) {
+ var2=mapRS2var.at(rs2);
+ nsamp2=mapRS2nsamp.at(rs2);
+ d1=cor*cor-1.0/(double)min(vec_n[ns_total], vec_n[ns_total+n_nb+1]);
+ d2=var1*var2;
+ d3=cor*cor/((double)nsamp1*(double)nsamp2);
+ n12=min(vec_n[ns_total], vec_n[ns_total+n_nb+1]);
+
+ //compute bin
+ if (crt==1) {
+ if (window_cm!=0 && d_cm1!=0 && d_cm2!=0) {
+ bin=min( (int)floor(d_cm/window_cm*bin_size), (int)bin_size);
+ } else if (window_bp!=0 && d_pos1!=0 && d_pos2!=0) {
+ bin=min( (int)floor(d_pos/window_bp*bin_size), (int)bin_size);
+ } else if (window_ns!=0) {
+ bin=min( (int)floor(((double)n_nb+1)/window_ns*bin_size), (int)bin_size);
+ }
+ }
+
+ //if (mat_S[0][0]!=mat_S[0][0] && flag_nan==0) {
+ //if (rs1.compare("rs10915560")==0 || rs1.compare("rs241273")==0) {cout<<rs1<<" "<<rs2<<" "<<ns_total<<" "<<n_nb<<" "<<vec_n[ns_total]<<" "<<vec_n[ns_total+n_nb+1]<<" "<<nsamp1<<" "<<nsamp2<<" "<<var1<<" "<<var2<<" "<<cor<<" "<<d1<<" "<<d2<<" "<<d3<<" "<<mat_S[0][0]<<endl; flag_nan++;}
+ if (mapRS2cat.size()!=0) {
+ if (mapRS2cat.at(rs1)==mapRS2cat.at(rs2)) {
+ vec_qvar[mapRS2cat.at(rs1)]+=2*d3*d2;
+ mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=2*d1*d2;
+ mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=2*d2*d2/((double)n12*(double)n12);
+ if (crt==1) {
+ mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ][bin]+=2*d1*d2;
+ }
+ } else {
+ mat_S[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=d1*d2;
+ mat_Svar[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ]+=d2*d2/((double)n12*(double)n12);
+ if (crt==1) {
+ mat3d_Sbin[mapRS2cat.at(rs1) ][mapRS2cat.at(rs2) ][bin]+=d1*d2;
+ }
+ }
+ } else {
+ vec_qvar[0]+=2*d3*d2;
+ mat_S[0][0]+=2*d1*d2;
+ mat_Svar[0][0]+=2*d2*d2/((double)n12*(double)n12);
+
+ if (crt==1) {
+ mat3d_Sbin[0][0][bin]+=2*d1*d2;
+ }
+ }
+ ns_pair++;
+ }
+ }
+
+ ch_ptr=strtok (NULL, " , \t");
+ n_nb++;
+ }
+ ni_total=max(ni_total, n_total);
+ ns_test++;
+ }
+
+ ns_total++;
+ }
+
+ //use S_bin to fit a rational function y=1/(a+bx)^2, where x=seq(0.5,bin_size-0.5,by=1)
+ //and then compute a correlation factor as a percentage
+ double a, b, x, y, n, var_y, var_x, mean_y, mean_x, cov_xy, crt_factor;
+ if (crt==1) {
+ for (size_t i=0; i<S_mat->size1; i++) {
+ for (size_t j=i; j<S_mat->size2; j++) {
+
+ //correct mat_S
+ n=0; var_y=0; var_x=0; mean_y=0; mean_x=0; cov_xy=0;
+ for (size_t k=0; k<bin_size; k++) {
+ if (j==i) {
+ y=mat3d_Sbin[i][j][k];
+ } else {
+ y=mat3d_Sbin[i][j][k]+mat3d_Sbin[j][i][k];
+ }
+ x=k+0.5;
+ cout<<y<<", ";
+ if (y>0) {
+ y=1/sqrt(y);
+ mean_x+=x; mean_y+=y; var_x+=x*x; var_y+=y*y; cov_xy+=x*y;
+ n++;
+ }
+ }
+ cout<<endl;
+
+ if (n>=5) {
+ mean_x/=n; mean_y/=n; var_x/=n; var_y/=n; cov_xy/=n;
+ var_x-=mean_x*mean_x; var_y-=mean_y*mean_y; cov_xy-=mean_x*mean_y;
+ b=cov_xy/var_x;
+ a=mean_y-b*mean_x;
+ crt_factor=a/(b*(bin_size+0.5))+1;
+ if (i==j) {
+ mat_S[i][j]*=crt_factor;
+ } else {
+ mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor;
+ }
+ cout<<crt_factor<<endl;
+ //correct qvar
+ if (i==j) {
+ vec_qvar[i]*=crt_factor; //=vec_qvar[i]*crt_factor+(ns_test*ns_test-ns_pair*crt_factor)/pow(ni_total, 3.0);
+ }
+ }
+ }
+ }
+ }
+
+ //save to gsl_vector and gsl_matrix: qvar_vec, S_mat, Svar_mat
+ for (size_t i=0; i<S_mat->size1; i++) {
+ d1=gsl_vector_get(qvar_vec, i)+2*vec_qvar[i];
+ gsl_vector_set(qvar_vec, i, d1);
+ for (size_t j=0; j<S_mat->size2; j++) {
+ if (i==j) {
+ gsl_matrix_set(S_mat, i, j, mat_S[i][i]);
+ gsl_matrix_set(Svar_mat, i, j, 2.0*mat_Svar[i][i]*ns_test*ns_test/(2.0*ns_pair) );
+ } else {
+ gsl_matrix_set(S_mat, i, j, mat_S[i][j]+mat_S[j][i]);
+ gsl_matrix_set(Svar_mat, i, j, 2.0*(mat_Svar[i][j]+mat_Svar[j][i])*ns_test*ns_test/(2.0*ns_pair) );
+ }
+ }
+ }
+
+
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+
+
+
+//copied from lmm.cpp; is used in the following function VCss
+//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
+size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
+ if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
+ size_t index;
+ size_t l, h;
+ if (b>a) {l=a; h=b;} else {l=b; h=a;}
+
+ size_t n=n_cvt+2;
+ index=(2*n-l+2)*(l-1)/2+h-l;
+
+ return index;
+}
+
+
+//use the new method to calculate variance components with summary statistics
+//first, use a function CalcS to compute S matrix (where the diagonal elements are part of V(q) ), and then use bootstrap to compute the variance for S, use a set of genotypes, phenotypes, and individual ids, and snp category label
+void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat, const gsl_vector *q_vec, const gsl_vector *s_vec, const double df, vector<double> &v_pve, vector<double> &v_se_pve, double &pve_total, double &se_pve_total, vector<double> &v_sigma2, vector<double> &v_se_sigma2, vector<double> &v_enrich, vector<double> &v_se_enrich) {
+ size_t n_vc=S_mat->size1;
+
+ gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
+
+ gsl_vector *pve=gsl_vector_alloc (n_vc);
+ gsl_vector *pve_plus=gsl_vector_alloc (n_vc+1);
+ gsl_vector *tmp=gsl_vector_alloc (n_vc+1);
+ gsl_vector *sigma2persnp=gsl_vector_alloc (n_vc);
+ gsl_vector *enrich=gsl_vector_alloc (n_vc);
+ gsl_vector *se_pve=gsl_vector_alloc (n_vc);
+ gsl_vector *se_sigma2persnp=gsl_vector_alloc (n_vc);
+ gsl_vector *se_enrich=gsl_vector_alloc (n_vc);
+
+ double d;
+
+ //calculate S^{-1}q
+ gsl_matrix_memcpy (tmp_mat, S_mat);
+ int sig;
+ gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+ LUDecomp (tmp_mat, pmt, &sig);
+ LUInvert (tmp_mat, pmt, Si_mat);
+
+ //calculate sigma2snp and pve
+ gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+ gsl_vector_memcpy(sigma2persnp, pve);
+ gsl_vector_div(sigma2persnp, s_vec);
+
+ //get qvar_mat
+ /*
+ if (n_block==0 || n_block==1) {
+ double s=1.0;
+ for (size_t i=0; i<n_vc; i++) {
+ d=gsl_vector_get(pve, i);
+ gsl_vector_set(pve_plus, i, d);
+ s-=d;
+ }
+ gsl_vector_set(pve_plus, n_vc, s);
+
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ size_t t_ij=GetabIndex (i+1, j+1, n_vc-2);
+ gsl_matrix_const_view Vsub=gsl_matrix_const_submatrix(V, 0, t_ij*(n_vc+1), n_vc+1, n_vc+1);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, &Vsub.matrix, pve_plus, 0.0, tmp);
+ gsl_blas_ddot (pve_plus, tmp, &d);
+
+ d*=2/(df*df);
+
+ gsl_matrix_set (qvar_mat, i, j, d);
+ if (i!=j) {gsl_matrix_set (qvar_mat, j, i, d);}
+ //cout<<t_ij<<"/"<<d<<" ";
+ }
+ //cout<<endl;
+ }
+ } else {
+ */
+ gsl_matrix_memcpy (qvar_mat, Vq);
+ gsl_matrix_scale (qvar_mat, 1.0/(df*df));
+ //}
+
+ //gsl_matrix_memcpy (qvar_mat, S_mat);
+ //gsl_matrix_scale (qvar_mat, 2/(df*df));
+
+ //calculate variance for these estimates
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ d=gsl_matrix_get(Svar_mat, i, j);
+ d*=gsl_vector_get(pve, i)*gsl_vector_get(pve, j);
+ //cout<<d<<" ";
+
+ d+=gsl_matrix_get(qvar_mat, i, j);
+ gsl_matrix_set(Var_mat, i, j, d);
+ if (i!=j) {gsl_matrix_set(Var_mat, j, i, d);}
+ }
+ //cout<<endl;
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0, tmp_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0, Var_mat);
+
+ for (size_t i=0; i<n_vc; i++) {
+ d=sqrt(gsl_matrix_get(Var_mat, i, i));
+ gsl_vector_set(se_pve, i, d);
+ d/=gsl_vector_get(s_vec, i);
+ gsl_vector_set(se_sigma2persnp, i, d);
+ }
+
+ //compute pve_total, se_pve_total
+ pve_total=0; se_pve_total=0;
+ for (size_t i=0; i<n_vc; i++) {
+ pve_total+=gsl_vector_get(pve, i);
+
+ for (size_t j=0; j<n_vc; j++) {
+ se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+ }
+ }
+ se_pve_total=sqrt(se_pve_total);
+
+ //compute enrichment and its variance
+ double s_pve=0, s_snp=0;
+ for (size_t i=0; i<n_vc; i++) {
+ s_pve+=gsl_vector_get(pve, i);
+ s_snp+=gsl_vector_get(s_vec, i);
+ }
+ gsl_vector_memcpy (enrich, sigma2persnp);
+ gsl_vector_scale (enrich, s_snp/s_pve);
+
+ gsl_matrix_set_identity(tmp_mat);
+
+ double d1;
+ for (size_t i=0; i<n_vc; i++) {
+ d=gsl_vector_get(pve, i)/s_pve;
+ d1=gsl_vector_get(s_vec, i);
+ for (size_t j=0; j<n_vc; j++) {
+ if (i==j) {
+ gsl_matrix_set(tmp_mat, i, j, (1-d)/d1*s_snp/s_pve);
+ } else {
+ gsl_matrix_set(tmp_mat, i, j, -1*d/d1*s_snp/s_pve);
+ }
+ }
+ }
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0, tmp_mat1);
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0, VarEnrich_mat);
+
+ for (size_t i=0; i<n_vc; i++) {
+ d=sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
+ gsl_vector_set(se_enrich, i, d);
+ }
+
+ cout<<"pve = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<gsl_vector_get(pve, i)<<" ";
+ }
+ cout<<endl;
+
+ cout<<"se(pve) = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<gsl_vector_get(se_pve, i)<<" ";
+ }
+ cout<<endl;
+
+ cout<<"sigma2 per snp = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<gsl_vector_get(sigma2persnp, i)<<" ";
+ }
+ cout<<endl;
+
+ cout<<"se(sigma2 per snp) = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<gsl_vector_get(se_sigma2persnp, i)<<" ";
+ }
+ cout<<endl;
+
+ cout<<"enrichment = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<gsl_vector_get(enrich, i)<<" ";
+ }
+ cout<<endl;
+
+ cout<<"se(enrichment) = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<gsl_vector_get(se_enrich, i)<<" ";
+ }
+ cout<<endl;
+
+ //save data
+ v_pve.clear(); v_se_pve.clear();
+ v_sigma2.clear(); v_se_sigma2.clear();
+ v_enrich.clear(); v_se_enrich.clear();
+ for (size_t i=0; i<n_vc; i++) {
+ d=gsl_vector_get(pve, i);
+ v_pve.push_back(d);
+ d=gsl_vector_get(se_pve, i);
+ v_se_pve.push_back(d);
+
+ d=gsl_vector_get(sigma2persnp, i);
+ v_sigma2.push_back(d);
+ d=gsl_vector_get(se_sigma2persnp, i);
+ v_se_sigma2.push_back(d);
+
+ d=gsl_vector_get(enrich, i);
+ v_enrich.push_back(d);
+ d=gsl_vector_get(se_enrich, i);
+ v_se_enrich.push_back(d);
+ }
+
+ //delete matrices
+ gsl_matrix_free(Si_mat);
+ gsl_matrix_free(Var_mat);
+ gsl_matrix_free(VarEnrich_mat);
+ gsl_matrix_free(tmp_mat);
+ gsl_matrix_free(tmp_mat1);
+ gsl_matrix_free(qvar_mat);
+
+ gsl_vector_free(pve);
+ gsl_vector_free(pve_plus);
+ gsl_vector_free(tmp);
+ gsl_vector_free(sigma2persnp);
+ gsl_vector_free(enrich);
+ gsl_vector_free(se_pve);
+ gsl_vector_free(se_sigma2persnp);
+ gsl_vector_free(se_enrich);
+
+ return;
+}
+
+
+
+
+
+//Ks are not scaled;
+void VC::CalcVChe (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y)
+{
+ size_t n1=K->size1, n2=K->size2;
+ size_t n_vc=n2/n1;
+
+ double r=(double)n1/(double)(n1 - W->size2);
+ double var_y, var_y_new;
+ double d, tr, s, v;
+ vector<double> traceG_new;
+
+ //new matrices/vectors
+ gsl_matrix *K_scale=gsl_matrix_alloc (n1, n2);
+ gsl_vector *y_scale=gsl_vector_alloc (n1);
+ gsl_matrix *Kry=gsl_matrix_alloc (n1, n_vc);
+ gsl_matrix *yKrKKry=gsl_matrix_alloc (n_vc, n_vc*(n_vc+1) );
+ gsl_vector *KKry=gsl_vector_alloc (n1);
+
+ //old matrices/vectors
+ gsl_vector *pve=gsl_vector_alloc (n_vc);
+ gsl_vector *se_pve=gsl_vector_alloc (n_vc);
+ gsl_vector *q_vec=gsl_vector_alloc (n_vc);
+ gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *S_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+
+ //center and scale K by W
+ for (size_t i=0; i<n_vc; i++) {
+ gsl_matrix_view Kscale_sub = gsl_matrix_submatrix (K_scale, 0, n1*i, n1, n1);
+ gsl_matrix_const_view K_sub = gsl_matrix_const_submatrix (K, 0, n1*i, n1, n1);
+ gsl_matrix_memcpy (&Kscale_sub.matrix, &K_sub.matrix);
+
+ CenterMatrix (&Kscale_sub.matrix, W);
+ d=ScaleMatrix (&Kscale_sub.matrix);
+ traceG_new.push_back(d);
+ }
+
+ //center y by W, and standardize it to have variance 1 (t(y)%*%y/n=1)
+ gsl_vector_memcpy (y_scale, y);
+ CenterVector (y_scale, W);
+
+ var_y=VectorVar (y);
+ var_y_new=VectorVar (y_scale);
+
+ StandardizeVector (y_scale);
+
+ //compute Kry, which is used for confidence interval; also compute q_vec (*n^2)
+ for (size_t i=0; i<n_vc; i++) {
+ gsl_matrix_const_view Kscale_sub = gsl_matrix_const_submatrix (K_scale, 0, n1*i, n1, n1);
+ gsl_vector_view Kry_col=gsl_matrix_column (Kry, i);
+
+ gsl_vector_memcpy (&Kry_col.vector, y_scale);
+ gsl_blas_dgemv(CblasNoTrans, 1.0, &Kscale_sub.matrix, y_scale, -1.0*r, &Kry_col.vector);
+
+ gsl_blas_ddot (&Kry_col.vector, y_scale, &d);
+ gsl_vector_set(q_vec, i, d);
+ }
+
+ //compuate yKrKKry, which is used later for confidence interval
+ for (size_t i=0; i<n_vc; i++) {
+ gsl_vector_const_view Kry_coli=gsl_matrix_const_column (Kry, i);
+ for (size_t j=i; j<n_vc; j++) {
+ gsl_vector_const_view Kry_colj=gsl_matrix_const_column (Kry, j);
+ for (size_t l=0; l<n_vc; l++) {
+ gsl_matrix_const_view Kscale_sub = gsl_matrix_const_submatrix (K_scale, 0, n1*l, n1, n1);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, &Kscale_sub.matrix, &Kry_coli.vector, 0.0, KKry);
+ gsl_blas_ddot (&Kry_colj.vector, KKry, &d);
+ gsl_matrix_set(yKrKKry, i, l*n_vc+j, d);
+ if (i!=j) {gsl_matrix_set(yKrKKry, j, l*n_vc+i, d);}
+ }
+ gsl_blas_ddot (&Kry_coli.vector, &Kry_colj.vector, &d);
+ gsl_matrix_set(yKrKKry, i, n_vc*n_vc+j, d);
+ if (i!=j) {gsl_matrix_set(yKrKKry, j, n_vc*n_vc+i, d);}
+ }
+ }
+
+ //compute Sij (*n^2)
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ tr=0;
+ for (size_t l=0; l<n1; l++) {
+ gsl_vector_const_view Ki_col=gsl_matrix_const_column (K_scale, i*n1+l);
+ gsl_vector_const_view Kj_col=gsl_matrix_const_column (K_scale, j*n1+l);
+ gsl_blas_ddot (&Ki_col.vector, &Kj_col.vector, &d);
+ tr+=d;
+ }
+
+ tr=tr-r*(double)n1;
+ gsl_matrix_set (S_mat, i, j, tr);
+ if (i!=j) {gsl_matrix_set (S_mat, j, i, tr);}
+ }
+ }
+
+ /*
+ cout<<"q_vec = "<<endl;
+ for (size_t i=0; i<q_vec->size; i++) {
+ cout<<gsl_vector_get(q_vec, i)<<" ";
+ }
+ cout<<endl;
+
+ cout<<"S_mat = "<<endl;
+ for (size_t i=0; i<S_mat->size1; i++) {
+ for (size_t j=0; j<S_mat->size2; j++) {
+ cout<<gsl_matrix_get(S_mat, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+
+ //compute S^{-1}q
+ int sig;
+ gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+ LUDecomp (S_mat, pmt, &sig);
+ LUInvert (S_mat, pmt, Si_mat);
+
+ //compute pve (on the transformed scale)
+ gsl_blas_dgemv (CblasNoTrans, 1.0, Si_mat, q_vec, 0.0, pve);
+
+ //compute q_var (*n^4)
+ gsl_matrix_set_zero (qvar_mat);
+ s=1;
+ for (size_t i=0; i<n_vc; i++) {
+ d=gsl_vector_get(pve, i);
+ gsl_matrix_view yKrKKry_sub=gsl_matrix_submatrix(yKrKKry, 0, i*n_vc, n_vc, n_vc);
+ gsl_matrix_memcpy (tmp_mat, &yKrKKry_sub.matrix);
+ gsl_matrix_scale(tmp_mat, d);
+ gsl_matrix_add (qvar_mat, tmp_mat);
+ s-=d;
+ }
+ gsl_matrix_view yKrKKry_sub=gsl_matrix_submatrix(yKrKKry, 0, n_vc*n_vc, n_vc, n_vc);
+ gsl_matrix_memcpy (tmp_mat, &yKrKKry_sub.matrix);
+ gsl_matrix_scale(tmp_mat, s);
+ gsl_matrix_add (qvar_mat, tmp_mat);
+
+ gsl_matrix_scale(qvar_mat, 2.0);
+
+ //compute S^{-1}var_qS^{-1}
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, qvar_mat, 0.0, tmp_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0, Var_mat);
+
+ //transform pve back to the original scale and save data
+ v_pve.clear(); v_se_pve.clear();
+ v_sigma2.clear(); v_se_sigma2.clear();
+
+ s=1.0, v=0, pve_total=0, se_pve_total=0;
+ for (size_t i=0; i<n_vc; i++) {
+ d=gsl_vector_get (pve, i);
+ //cout<<var_y<<" "<<var_y_new<<" "<<v_traceG[i]<<" "<<traceG_new[i]<<endl;
+ v_sigma2.push_back(d*var_y_new/traceG_new[i]);
+ v_pve.push_back(d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y));
+ s-=d;
+ pve_total+=d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y);
+
+ d=sqrt(gsl_matrix_get (Var_mat, i, i));
+ v_se_sigma2.push_back(d*var_y_new/traceG_new[i]);
+ v_se_pve.push_back(d*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y));
+
+ //d*=sqrt(var_y/v_traceG[i]-v_sigma2[i]);
+ //v_se_pve.push_back(d/var_y);
+
+ for (size_t j=0; j<n_vc; j++) {
+ v+=gsl_matrix_get(Var_mat, i, j);
+ se_pve_total+=gsl_matrix_get(Var_mat, i, j)*(var_y_new/traceG_new[i])*(v_traceG[i]/var_y)*(var_y_new/traceG_new[j])*(v_traceG[j]/var_y);
+ }
+ }
+ v_sigma2.push_back(s*r*var_y_new);
+ v_se_sigma2.push_back(sqrt(v)*r*var_y_new);
+ se_pve_total=sqrt(se_pve_total);
+
+ cout<<"sigma2 = ";
+ for (size_t i=0; i<n_vc+1; i++) {
+ cout<<v_sigma2[i]<<" ";
+ }
+ cout<<endl;
+
+ cout<<"se(sigma2) = ";
+ for (size_t i=0; i<n_vc+1; i++) {
+ cout<<v_se_sigma2[i]<<" ";
+ }
+ cout<<endl;
+
+ cout<<"pve = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<v_pve[i]<<" ";
+ }
+ cout<<endl;
+
+ cout<<"se(pve) = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<v_se_pve[i]<<" ";
+ }
+ cout<<endl;
+
+ if (n_vc>1) {
+ cout<<"total pve = "<<pve_total<<endl;
+ cout<<"se(total pve) = "<<se_pve_total<<endl;
+ }
+
+ gsl_permutation_free(pmt);
+ gsl_matrix_free(K_scale);
+ gsl_vector_free(y_scale);
+ gsl_matrix_free(Kry);
+ gsl_matrix_free(yKrKKry);
+ gsl_vector_free(KKry);
+
+ //old matrices/vectors
+ gsl_vector_free(pve);
+ gsl_vector_free(se_pve);
+ gsl_vector_free(q_vec);
+ gsl_matrix_free(qvar_mat);
+ gsl_matrix_free(tmp_mat);
+ gsl_matrix_free(S_mat);
+ gsl_matrix_free(Si_mat);
+ gsl_matrix_free(Var_mat);
+
+ return;
+}
+
+
+
+
+//reml for log(sigma2) based on the AI algorithm
+void VC::CalcVCreml (bool noconstrain, const gsl_matrix *K, const gsl_matrix *W, const gsl_vector *y)
{
size_t n1=K->size1, n2=K->size2;
size_t n_vc=n2/n1;
gsl_vector *log_sigma2=gsl_vector_alloc (n_vc+1);
double d, s;
+ /*
+ //compare eigenlib vs lapack
+ //dgemm
+ gsl_matrix *K2=gsl_matrix_alloc(K->size1, K->size1);
+
+ clock_t time_start=clock();
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, K, K, 0.0, K2);
+ cout<<"standard time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ for (size_t j=0; j<2; j++) {
+ cout<<gsl_matrix_get(K2, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+
+ time_start=clock();
+ lapack_dgemm ((char *)"N", (char *)"T", 1.0, K, K, 0.0, K2);
+ cout<<"lapack time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ for (size_t j=0; j<2; j++) {
+ cout<<gsl_matrix_get(K2, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+
+ time_start=clock();
+ eigenlib_dgemm((char *)"N", (char *)"T", 1.0, K, K, 0.0, K2);
+ cout<<"eigenlib time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ for (size_t j=0; j<2; j++) {
+ cout<<gsl_matrix_get(K2, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+
+ //dgemv
+ gsl_vector_const_view W_col=gsl_matrix_const_column (K, 0);
+ gsl_vector *v=gsl_vector_alloc (K->size1);
+ time_start=clock();
+ for (size_t i=0; i<1000; i++) {
+ gsl_blas_dgemv(CblasNoTrans, 1.0, K2, &W_col.vector, 0.0, v);
+ }
+ cout<<"standard time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ cout<<gsl_vector_get(v, i)<<endl;
+ }
+
+ time_start=clock();
+ for (size_t i=0; i<1000; i++) {
+ eigenlib_dgemv((char *)"N", 1.0, K2, &W_col.vector, 0.0, v);
+ }
+ cout<<"eigenlib time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ cout<<gsl_vector_get(v, i)<<endl;
+ }
+
+ //eigen
+ gsl_matrix *K2copy=gsl_matrix_alloc(K->size1, K->size1);
+ gsl_matrix *K3=gsl_matrix_alloc(K->size1, K->size1);
+
+ gsl_matrix_memcpy(K2copy, K2);
+ time_start=clock();
+ EigenDecomp(K2copy, K3, v, 0);
+ cout<<"standard time 0: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ cout<<gsl_vector_get(v, i)<<endl;
+ }
+
+ gsl_matrix_memcpy(K2copy, K2);
+ time_start=clock();
+ EigenDecomp(K2copy, K3, v, 1);
+ cout<<"standard time 1: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ cout<<gsl_vector_get(v, i)<<endl;
+ }
+
+ gsl_matrix_memcpy(K2copy, K2);
+ time_start=clock();
+ eigenlib_eigensymm(K2copy, K3, v);
+ cout<<"eigenlib time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ cout<<gsl_vector_get(v, i)<<endl;
+ }
+
+
+
+ //invert
+ gsl_matrix_memcpy(K2copy, K2);
+ time_start=clock();
+ int sigcopy;
+ gsl_permutation * pmt1=gsl_permutation_alloc (K2->size1);
+ LUDecomp (K2copy, pmt1, &sigcopy);
+ LUInvert (K2copy, pmt1, K3);
+ gsl_permutation_free(pmt1);
+ cout<<"standard time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ for (size_t j=0; j<2; j++) {
+ cout<<gsl_matrix_get(K3, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+
+ gsl_matrix_memcpy(K2copy, K2);
+ time_start=clock();
+ eigenlib_invert(K2copy);
+ cout<<"eigen time: "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+ for (size_t i=0; i<2; i++) {
+ for (size_t j=0; j<2; j++) {
+ cout<<gsl_matrix_get(K2copy, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+
//set up params
gsl_matrix *P=gsl_matrix_alloc (n1, n1);
gsl_vector *Py=gsl_vector_alloc (n1);
@@ -318,18 +1648,26 @@ void VC::CalcVCreml (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector
gsl_vector *dev1=gsl_vector_alloc (n_vc+1);
gsl_matrix *dev2=gsl_matrix_alloc (n_vc+1, n_vc+1);
gsl_matrix *Hessian=gsl_matrix_alloc (n_vc+1, n_vc+1);
- VC_PARAM params={K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian};
+ VC_PARAM params={K, W, y, P, Py, KPy_mat, PKPy_mat, Hessian, noconstrain};
//initialize sigma2/log_sigma2
+ CalcVChe (K, W, y);
+
gsl_blas_ddot (y, y, &s);
s/=(double)n1;
for (size_t i=0; i<n_vc+1; i++) {
+ if (noconstrain) {
+ d=v_sigma2[i];
+ } else {
+ if (v_sigma2[i]<=0) {d=log(0.1);} else {d=log(v_sigma2[i]);}
+ }
+ /*
if (i==n_vc) {
d=s/((double)n_vc+1.0);
} else {
d=s/( ((double)n_vc+1.0)*v_traceG[i]);
}
-
+ */
gsl_vector_set (log_sigma2, i, d);
}
// gsl_vector_set (log_sigma2, 0, 0.38);
@@ -338,7 +1676,11 @@ void VC::CalcVCreml (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector
cout<<"iteration "<<0<<endl;
cout<<"sigma2 = ";
for (size_t i=0; i<n_vc+1; i++) {
- cout<<exp(gsl_vector_get(log_sigma2, i))<<" ";
+ if (noconstrain) {
+ cout<<gsl_vector_get(log_sigma2, i)<<" ";
+ } else {
+ cout<<exp(gsl_vector_get(log_sigma2, i))<<" ";
+ }
}
cout<<endl;
@@ -349,15 +1691,15 @@ void VC::CalcVCreml (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector
FDF.f=&LogRL_dev1;
FDF.df=&LogRL_dev2;
FDF.fdf=&LogRL_dev12;
-
- //set up solver
+
+ //set up solver
int status;
int iter=0, max_iter=100;
const gsl_multiroot_fdfsolver_type *T_fdf;
gsl_multiroot_fdfsolver *s_fdf;
T_fdf=gsl_multiroot_fdfsolver_hybridsj;
- s_fdf=gsl_multiroot_fdfsolver_alloc (T_fdf, n_vc+1);
+ s_fdf=gsl_multiroot_fdfsolver_alloc (T_fdf, n_vc+1);
gsl_multiroot_fdfsolver_set (s_fdf, &FDF, log_sigma2);
@@ -370,37 +1712,55 @@ void VC::CalcVCreml (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector
cout<<"iteration "<<iter<<endl;
cout<<"sigma2 = ";
for (size_t i=0; i<n_vc+1; i++) {
- cout<<exp(gsl_vector_get(s_fdf->x, i))<<" ";
+ if (noconstrain) {
+ cout<<gsl_vector_get(s_fdf->x, i)<<" ";
+ } else {
+ cout<<exp(gsl_vector_get(s_fdf->x, i))<<" ";
+ }
}
cout<<endl;
+ /*
cout<<"derivatives = ";
for (size_t i=0; i<n_vc+1; i++) {
cout<<gsl_vector_get(s_fdf->f, i)<<" ";
}
cout<<endl;
-
- status=gsl_multiroot_test_residual (s_fdf->f, 1e-3);
+ */
+ status=gsl_multiroot_test_residual (s_fdf->f, 1e-3);
}
- while (status==GSL_CONTINUE && iter<max_iter);
-
- //obtain Hessian inverse
- int sig=LogRL_dev12 (s_fdf->f, &params, dev1, dev2);
+ while (status==GSL_CONTINUE && iter<max_iter);
+
+ //obtain Hessian and Hessian inverse
+ int sig=LogRL_dev12 (s_fdf->x, &params, dev1, dev2);
+ /*
+ for (size_t i=0; i<dev2->size1; i++) {
+ for (size_t j=0; j<dev2->size2; j++) {
+ cout<<gsl_matrix_get (dev2, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
gsl_permutation * pmt=gsl_permutation_alloc (n_vc+1);
- LUDecomp (dev2, pmt, &sig);
+ LUDecomp (dev2, pmt, &sig);
LUInvert (dev2, pmt, Hessian);
gsl_permutation_free(pmt);
- //save data
- v_sigma2.clear();
+ //save sigma2 and se_sigma2
+ v_sigma2.clear(); v_se_sigma2.clear();
for (size_t i=0; i<n_vc+1; i++) {
- d=exp(gsl_vector_get(s_fdf->x, i));
+ if (noconstrain) {
+ d=gsl_vector_get(s_fdf->x, i);
+ } else {
+ d=exp(gsl_vector_get(s_fdf->x, i));
+ }
v_sigma2.push_back(d);
- }
- v_se_sigma2.clear();
- for (size_t i=0; i<n_vc+1; i++) {
- d=-1.0*v_sigma2[i]*v_sigma2[i]*gsl_matrix_get(Hessian, i, i);
+ if (noconstrain) {
+ d=-1.0*gsl_matrix_get(Hessian, i, i);
+ } else {
+ d=-1.0*d*d*gsl_matrix_get(Hessian, i, i);
+ }
v_se_sigma2.push_back(sqrt(d));
}
@@ -409,20 +1769,80 @@ void VC::CalcVCreml (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector
s+=v_traceG[i]*v_sigma2[i];
}
s+=v_sigma2[n_vc];
-
- v_pve.clear();
+
+ //compute pve
+ v_pve.clear(); pve_total=0;
for (size_t i=0; i<n_vc; i++) {
d=v_traceG[i]*v_sigma2[i]/s;
v_pve.push_back(d);
+ pve_total+=d;
}
- v_se_pve.clear();
- for (size_t i=0; i<n_vc; i++) {
- d=v_traceG[i]*(s-v_sigma2[i]*v_traceG[i])/(s*s)*v_se_sigma2[i]*v_se_sigma2[i];
- v_se_pve.push_back(sqrt(d) );
+ //compute se_pve; k=n_vc+1: total
+ double d1, d2;
+ v_se_pve.clear(); se_pve_total=0;
+ for (size_t k=0; k<n_vc+1; k++) {
+ d=0;
+ for (size_t i=0; i<n_vc+1; i++) {
+ if (noconstrain) {
+ d1=gsl_vector_get(s_fdf->x, i);
+ d1=1;
+ } else {
+ d1=exp(gsl_vector_get(s_fdf->x, i));
+ }
+
+ if (k<n_vc) {
+ if (i==k) {
+ d1*=v_traceG[k]*(s-v_sigma2[k]*v_traceG[k])/(s*s);
+ } else if (i==n_vc) {
+ d1*=-1*v_traceG[k]*v_sigma2[k]/(s*s);
+ } else {
+ d1*=-1*v_traceG[i]*v_traceG[k]*v_sigma2[k]/(s*s);
+ }
+ } else {
+ if (i==k) {
+ d1*=-1*(s-v_sigma2[n_vc])/(s*s);
+ } else {
+ d1*=v_traceG[i]*v_sigma2[n_vc]/(s*s);
+ }
+ }
+
+ for (size_t j=0; j<n_vc+1; j++) {
+ if (noconstrain) {
+ d2=gsl_vector_get(s_fdf->x, j);
+ d2=1;
+ } else {
+ d2=exp(gsl_vector_get(s_fdf->x, j));
+ }
+
+ if (k<n_vc) {
+ if (j==k) {
+ d2*=v_traceG[k]*(s-v_sigma2[k]*v_traceG[k])/(s*s);
+ } else if (j==n_vc) {
+ d2*=-1*v_traceG[k]*v_sigma2[k]/(s*s);
+ } else {
+ d2*=-1*v_traceG[j]*v_traceG[k]*v_sigma2[k]/(s*s);
+ }
+ } else {
+ if (j==k) {
+ d2*=-1*(s-v_sigma2[n_vc])/(s*s);
+ } else {
+ d2*=v_traceG[j]*v_sigma2[n_vc]/(s*s);
+ }
+ }
+
+ d+=-1.0*d1*d2*gsl_matrix_get(Hessian, i, j);
+ }
+ }
+
+ if (k<n_vc) {
+ v_se_pve.push_back(sqrt(d) );
+ } else {
+ se_pve_total=sqrt(d);
+ }
}
-
- gsl_multiroot_fdfsolver_free(s_fdf);
+
+ gsl_multiroot_fdfsolver_free(s_fdf);
gsl_vector_free(log_sigma2);
gsl_matrix_free(P);
@@ -437,7 +1857,643 @@ void VC::CalcVCreml (const gsl_matrix *K, const gsl_matrix *W, const gsl_vector
}
-
+
+//read bimbam mean genotype file and compute XWz
+bool BimbamXwz (const string &file_geno, const int display_pace, vector<int> &indicator_idv, vector<int> &indicator_snp, const vector<size_t> &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz)
+{
+ igzstream infile (file_geno.c_str(), igzstream::in);
+ //ifstream infile (file_geno.c_str(), ifstream::in);
+ if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+
+ string line;
+ char *ch_ptr;
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test=XWz->size1;
+ gsl_vector *geno=gsl_vector_alloc (ni_test);
+ gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
+ gsl_vector *wz=gsl_vector_alloc (w->size);
+ gsl_vector_memcpy (wz, z);
+ gsl_vector_mul(wz, w);
+
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ geno_mean=0.0; n_miss=0; geno_var=0.0;
+ gsl_vector_set_all(geno_miss, 0);
+
+ size_t j=0;
+ for (size_t i=0; i<indicator_idv.size(); ++i) {
+ if (indicator_idv[i]==0) {continue;}
+ ch_ptr=strtok (NULL, " , \t");
+ if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(geno_miss, i, 0); n_miss++;}
+ else {
+ d=atof(ch_ptr);
+ gsl_vector_set (geno, j, d);
+ gsl_vector_set (geno_miss, j, 1);
+ geno_mean+=d;
+ geno_var+=d*d;
+ }
+ j++;
+ }
+
+ geno_mean/=(double)(ni_test-n_miss);
+ geno_var+=geno_mean*geno_mean*(double)n_miss;
+ geno_var/=(double)ni_test;
+ geno_var-=geno_mean*geno_mean;
+// geno_var=geno_mean*(1-geno_mean*0.5);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+ }
+
+ gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+ gsl_vector_view XWz_col=gsl_matrix_column(XWz, vec_cat[ns_test]);
+ d=gsl_vector_get (wz, ns_test);
+ gsl_blas_daxpy (d/sqrt(geno_var), geno, &XWz_col.vector);
+
+ ns_test++;
+ }
+
+ cout<<endl;
+
+ gsl_vector_free (geno);
+ gsl_vector_free (geno_miss);
+ gsl_vector_free (wz);
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+
+
+
+
+//read plink bed file and compute XWz
+bool PlinkXwz (const string &file_bed, const int display_pace, vector<int> &indicator_idv, vector<int> &indicator_snp, const vector<size_t> &vec_cat, const gsl_vector *w, const gsl_vector *z, size_t ns_test, gsl_matrix *XWz)
+{
+ ifstream infile (file_bed.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t n_miss, ci_total, ci_test;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test=XWz->size1;
+ size_t ni_total=indicator_idv.size();
+ gsl_vector *geno=gsl_vector_alloc (ni_test);
+ gsl_vector *wz=gsl_vector_alloc (w->size);
+ gsl_vector_memcpy (wz, z);
+ gsl_vector_mul(wz, w);
+
+ int n_bit;
+ //calculate n_bit and c, the number of bit for each snp
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1; }
+
+ //print the first three majic numbers
+ for (int i=0; i<3; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ }
+
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+ //read genotypes
+ geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+ if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(geno, ci_test, 2.0); geno_mean+=2.0; geno_var+=4.0; }
+ else {gsl_vector_set(geno, ci_test, 1.0); geno_mean+=1.0; geno_var+=1.0;}
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
+ else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
+ }
+
+ ci_test++;
+ ci_total++;
+ }
+ }
+
+
+ geno_mean/=(double)(ni_test-n_miss);
+ geno_var+=geno_mean*geno_mean*(double)n_miss;
+ geno_var/=(double)ni_test;
+ geno_var-=geno_mean*geno_mean;
+// geno_var=geno_mean*(1-geno_mean*0.5);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ d=gsl_vector_get(geno,i);
+ if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
+ }
+
+ gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+ gsl_vector_view XWz_col=gsl_matrix_column(XWz, vec_cat[ns_test]);
+ d=gsl_vector_get (wz, ns_test);
+ gsl_blas_daxpy (d/sqrt(geno_var), geno, &XWz_col.vector);
+
+ ns_test++;
+ }
+ cout<<endl;
+
+ gsl_vector_free (geno);
+ gsl_vector_free (wz);
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+
+//read multiple genotype files and compute XWz
+bool MFILEXwz (const size_t mfile_mode, const string &file_mfile, const int display_pace, vector<int> &indicator_idv, vector<vector<int> > &mindicator_snp, const vector<size_t> &vec_cat, const gsl_vector *w, const gsl_vector *z, gsl_matrix *XWz)
+{
+ gsl_matrix_set_zero(XWz);
+
+ igzstream infile (file_mfile.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open mfile file: "<<file_mfile<<endl; return false;}
+
+ string file_name;
+ size_t l=0, ns_test=0;
+
+ while (!safeGetline(infile, file_name).eof()) {
+ if (mfile_mode==1) {
+ file_name+=".bed";
+ PlinkXwz (file_name, display_pace, indicator_idv, mindicator_snp[l], vec_cat, w, z, ns_test, XWz);
+ } else {
+ BimbamXwz (file_name, display_pace, indicator_idv, mindicator_snp[l], vec_cat, w, z, ns_test, XWz);
+ }
+
+ l++;
+ }
+
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+
+
+
+
+//read bimbam mean genotype file and compute X_i^TX_jWz
+bool BimbamXtXwz (const string &file_geno, const int display_pace, vector<int> &indicator_idv, vector<int> &indicator_snp, const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz)
+{
+ igzstream infile (file_geno.c_str(), igzstream::in);
+ //ifstream infile (file_geno.c_str(), ifstream::in);
+ if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+
+ string line;
+ char *ch_ptr;
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test=XWz->size1;
+ gsl_vector *geno=gsl_vector_alloc (ni_test);
+ gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
+
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ geno_mean=0.0; n_miss=0; geno_var=0.0;
+ gsl_vector_set_all(geno_miss, 0);
+
+ size_t j=0;
+ for (size_t i=0; i<indicator_idv.size(); ++i) {
+ if (indicator_idv[i]==0) {continue;}
+ ch_ptr=strtok (NULL, " , \t");
+ if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(geno_miss, i, 0); n_miss++;}
+ else {
+ d=atof(ch_ptr);
+ gsl_vector_set (geno, j, d);
+ gsl_vector_set (geno_miss, j, 1);
+ geno_mean+=d;
+ geno_var+=d*d;
+ }
+ j++;
+ }
+
+ geno_mean/=(double)(ni_test-n_miss);
+ geno_var+=geno_mean*geno_mean*(double)n_miss;
+ geno_var/=(double)ni_test;
+ geno_var-=geno_mean*geno_mean;
+// geno_var=geno_mean*(1-geno_mean*0.5);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+ }
+
+ gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+ for (size_t i=0; i<XWz->size2; i++) {
+ gsl_vector_const_view XWz_col=gsl_matrix_const_column(XWz, i);
+ gsl_blas_ddot (geno, &XWz_col.vector, &d);
+ gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var));
+ }
+
+ ns_test++;
+ }
+
+ cout<<endl;
+
+ gsl_vector_free (geno);
+ gsl_vector_free (geno_miss);
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+
+
+
+
+//read plink bed file and compute XWz
+bool PlinkXtXwz (const string &file_bed, const int display_pace, vector<int> &indicator_idv, vector<int> &indicator_snp, const gsl_matrix *XWz, size_t ns_test, gsl_matrix *XtXWz)
+{
+ ifstream infile (file_bed.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t n_miss, ci_total, ci_test;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test=XWz->size1;
+ size_t ni_total=indicator_idv.size();
+ gsl_vector *geno=gsl_vector_alloc (ni_test);
+
+ int n_bit;
+
+ //calculate n_bit and c, the number of bit for each snp
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1; }
+
+ //print the first three majic numbers
+ for (int i=0; i<3; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ }
+
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+ //read genotypes
+ geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+ if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(geno, ci_test, 2.0); geno_mean+=2.0; geno_var+=4.0; }
+ else {gsl_vector_set(geno, ci_test, 1.0); geno_mean+=1.0; geno_var+=1.0;}
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
+ else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
+ }
+
+ ci_test++;
+ ci_total++;
+ }
+ }
+
+
+ geno_mean/=(double)(ni_test-n_miss);
+ geno_var+=geno_mean*geno_mean*(double)n_miss;
+ geno_var/=(double)ni_test;
+ geno_var-=geno_mean*geno_mean;
+// geno_var=geno_mean*(1-geno_mean*0.5);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ d=gsl_vector_get(geno,i);
+ if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
+ }
+
+ gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+ for (size_t i=0; i<XWz->size2; i++) {
+ gsl_vector_const_view XWz_col=gsl_matrix_const_column(XWz, i);
+ gsl_blas_ddot (geno, &XWz_col.vector, &d);
+ gsl_matrix_set (XtXWz, ns_test, i, d/sqrt(geno_var));
+ }
+
+ ns_test++;
+ }
+ cout<<endl;
+
+ gsl_vector_free (geno);
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+
+//read multiple genotype files and compute XWz
+bool MFILEXtXwz (const size_t mfile_mode, const string &file_mfile, const int display_pace, vector<int> &indicator_idv, vector<vector<int> > &mindicator_snp, const gsl_matrix *XWz, gsl_matrix *XtXWz)
+{
+ gsl_matrix_set_zero(XtXWz);
+
+ igzstream infile (file_mfile.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open mfile file: "<<file_mfile<<endl; return false;}
+
+ string file_name;
+ size_t l=0, ns_test=0;
+
+ while (!safeGetline(infile, file_name).eof()) {
+ if (mfile_mode==1) {
+ file_name+=".bed";
+ PlinkXtXwz (file_name, display_pace, indicator_idv, mindicator_snp[l], XWz, ns_test, XtXWz);
+ } else {
+ BimbamXtXwz (file_name, display_pace, indicator_idv, mindicator_snp[l], XWz, ns_test, XtXWz);
+ }
+
+ l++;
+ }
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+//compute confidence intervals from summary statistics
+void CalcCIss(const gsl_matrix *Xz, const gsl_matrix *XWz, const gsl_matrix *XtXWz, const gsl_matrix *S_mat, const gsl_matrix *Svar_mat, const gsl_vector *w, const gsl_vector *z, const gsl_vector *s_vec, const vector<size_t> &vec_cat, const vector<double> &v_pve, vector<double> &v_se_pve, double &pve_total, double &se_pve_total, vector<double> &v_sigma2, vector<double> &v_se_sigma2, vector<double> &v_enrich, vector<double> &v_se_enrich) {
+ size_t n_vc=XWz->size2, ns_test=w->size, ni_test=XWz->size1;
+
+ //set up matrices
+ gsl_vector *w_pve=gsl_vector_alloc (ns_test);
+ gsl_vector *wz=gsl_vector_alloc (ns_test);
+ gsl_vector *zwz=gsl_vector_alloc (n_vc);
+ gsl_vector *zz=gsl_vector_alloc (n_vc);
+ gsl_vector *Xz_pve=gsl_vector_alloc (ni_test);
+ gsl_vector *WXtXWz=gsl_vector_alloc (ns_test);
+
+ gsl_matrix *Si_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *Var_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *tmp_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *tmp_mat1=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *VarEnrich_mat=gsl_matrix_alloc (n_vc, n_vc);
+ gsl_matrix *qvar_mat=gsl_matrix_alloc (n_vc, n_vc);
+
+ double d, s0, s1, s, s_pve, s_snp;
+
+ //compute wz and zwz
+ gsl_vector_memcpy (wz, z);
+ gsl_vector_mul (wz, w);
+
+ gsl_vector_set_zero (zwz);
+ gsl_vector_set_zero (zz);
+ for (size_t i=0; i<w->size; i++) {
+ d=gsl_vector_get (wz, i)*gsl_vector_get (z, i);
+ d+=gsl_vector_get (zwz, vec_cat[i]);
+ gsl_vector_set (zwz, vec_cat[i], d);
+
+ d=gsl_vector_get (z, i)*gsl_vector_get (z, i);
+ d+=gsl_vector_get (zz, vec_cat[i]);
+ gsl_vector_set (zz, vec_cat[i], d);
+ }
+
+ //compute wz, ve and Xz_pve
+ gsl_vector_set_zero (Xz_pve); s_pve=0; s_snp=0;
+ for (size_t i=0; i<n_vc; i++) {
+ s_pve+=v_pve[i];
+ s_snp+=gsl_vector_get(s_vec, i);
+
+ gsl_vector_const_view Xz_col=gsl_matrix_const_column (Xz, i);
+ gsl_blas_daxpy (v_pve[i]/gsl_vector_get(s_vec, i), &Xz_col.vector, Xz_pve);
+ }
+
+ //set up wpve vector
+ for (size_t i=0; i<w->size; i++) {
+ d=v_pve[vec_cat[i]]/gsl_vector_get(s_vec, vec_cat[i]);
+ gsl_vector_set (w_pve, i, d);
+ }
+
+ //compute Vq (in qvar_mat)
+ s0=1-s_pve;
+ for (size_t i=0; i<n_vc; i++) {
+ s0+=gsl_vector_get (zz, i)*v_pve[i]/gsl_vector_get(s_vec, i);
+ }
+
+ for (size_t i=0; i<n_vc; i++) {
+ s1=s0;
+ s1-=gsl_vector_get (zwz, i)*(1-s_pve)/gsl_vector_get(s_vec, i);
+
+ gsl_vector_const_view XWz_col1=gsl_matrix_const_column (XWz, i);
+ gsl_vector_const_view XtXWz_col1=gsl_matrix_const_column (XtXWz, i);
+
+ gsl_vector_memcpy (WXtXWz, &XtXWz_col1.vector);
+ gsl_vector_mul (WXtXWz, w_pve);
+
+ gsl_blas_ddot (Xz_pve, &XWz_col1.vector, &d);
+ s1-=d/gsl_vector_get(s_vec, i);
+
+ for (size_t j=0; j<n_vc; j++) {
+ s=s1;
+
+ s-=gsl_vector_get (zwz, j)*(1-s_pve)/gsl_vector_get(s_vec, j);
+
+ gsl_vector_const_view XWz_col2=gsl_matrix_const_column (XWz, j);
+ gsl_vector_const_view XtXWz_col2=gsl_matrix_const_column (XtXWz, j);
+
+ gsl_blas_ddot (WXtXWz, &XtXWz_col2.vector, &d);
+ s+=d/(gsl_vector_get(s_vec, i)*gsl_vector_get(s_vec, j));
+
+ gsl_blas_ddot (&XWz_col1.vector, &XWz_col2.vector, &d);
+ s+=d/(gsl_vector_get(s_vec, i)*gsl_vector_get(s_vec, j))*(1-s_pve);
+
+ gsl_blas_ddot (Xz_pve, &XWz_col2.vector, &d);
+ s-=d/gsl_vector_get(s_vec, j);
+
+ gsl_matrix_set (qvar_mat, i, j, s);
+ }
+
+ }
+
+ d=(double)(ni_test-1);
+ gsl_matrix_scale (qvar_mat, 2.0/(d*d*d));
+
+ //cout<<scientific<<gsl_matrix_get(qvar_mat, 0, 0)<<endl;
+
+ //calculate S^{-1}
+ gsl_matrix_memcpy (tmp_mat, S_mat);
+ int sig;
+ gsl_permutation * pmt=gsl_permutation_alloc (n_vc);
+ LUDecomp (tmp_mat, pmt, &sig);
+ LUInvert (tmp_mat, pmt, Si_mat);
+
+ //calculate variance for the estimates
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ d=gsl_matrix_get(Svar_mat, i, j);
+ d*=v_pve[i]*v_pve[j];
+ //cout<<d<<" ";
+
+ d+=gsl_matrix_get(qvar_mat, i, j);
+ gsl_matrix_set(Var_mat, i, j, d);
+ if (i!=j) {gsl_matrix_set(Var_mat, j, i, d);}
+ }
+ //cout<<endl;
+ }
+
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Si_mat, Var_mat, 0.0, tmp_mat);
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Si_mat, 0.0, Var_mat);
+
+ //compute sigma2 per snp, enrich
+ v_sigma2.clear(); v_enrich.clear();
+ for (size_t i=0; i<n_vc; i++) {
+ v_sigma2.push_back(v_pve[i]/gsl_vector_get(s_vec, i) );
+ v_enrich.push_back(v_pve[i]/gsl_vector_get(s_vec, i)*s_snp/s_pve);
+ }
+
+ //compute se_pve, se_sigma2
+ for (size_t i=0; i<n_vc; i++) {
+ d=sqrt(gsl_matrix_get(Var_mat, i, i));
+ v_se_pve.push_back(d);
+ v_se_sigma2.push_back(d/gsl_vector_get(s_vec, i));
+ }
+
+ //compute pve_total, se_pve_total
+ pve_total=0;
+ for (size_t i=0; i<n_vc; i++) {
+ pve_total+=v_pve[i];
+ }
+
+ se_pve_total=0;
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=0; j<n_vc; j++) {
+ se_pve_total+=gsl_matrix_get(Var_mat, i, j);
+ }
+ }
+ se_pve_total=sqrt(se_pve_total);
+
+ //compute se_enrich
+ gsl_matrix_set_identity(tmp_mat);
+
+ double d1;
+ for (size_t i=0; i<n_vc; i++) {
+ d=v_pve[i]/s_pve;
+ d1=gsl_vector_get(s_vec, i);
+ for (size_t j=0; j<n_vc; j++) {
+ if (i==j) {
+ gsl_matrix_set(tmp_mat, i, j, (1-d)/d1*s_snp/s_pve);
+ } else {
+ gsl_matrix_set(tmp_mat, i, j, -1*d/d1*s_snp/s_pve);
+ }
+ }
+ }
+ gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, tmp_mat, Var_mat, 0.0, tmp_mat1);
+ gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, tmp_mat1, tmp_mat, 0.0, VarEnrich_mat);
+
+ for (size_t i=0; i<n_vc; i++) {
+ d=sqrt(gsl_matrix_get(VarEnrich_mat, i, i));
+ v_se_enrich.push_back(d);
+ }
+
+ cout<<"pve = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<v_pve[i]<<" ";
+ }
+ cout<<endl;
+
+ cout<<"se(pve) = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<v_se_pve[i]<<" ";
+ }
+ cout<<endl;
+
+ cout<<"sigma2 per snp = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<v_sigma2[i]<<" ";
+ }
+ cout<<endl;
+
+ cout<<"se(sigma2 per snp) = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<v_se_sigma2[i]<<" ";
+ }
+ cout<<endl;
+
+ cout<<"enrichment = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<v_enrich[i]<<" ";
+ }
+ cout<<endl;
+
+ cout<<"se(enrichment) = ";
+ for (size_t i=0; i<n_vc; i++) {
+ cout<<v_se_enrich[i]<<" ";
+ }
+ cout<<endl;
+
+ //delete matrices
+ gsl_matrix_free(Si_mat);
+ gsl_matrix_free(Var_mat);
+ gsl_matrix_free(VarEnrich_mat);
+ gsl_matrix_free(tmp_mat);
+ gsl_matrix_free(tmp_mat1);
+ gsl_matrix_free(qvar_mat);
+
+ gsl_vector_free(w_pve);
+ gsl_vector_free(wz);
+ gsl_vector_free(zwz);
+ gsl_vector_free(WXtXWz);
+ gsl_vector_free(Xz_pve);
+
+ return;
+}