From 943e970c9cbc184dcca679fbe455f48c32242cdc Mon Sep 17 00:00:00 2001 From: xiangzhou Date: Mon, 23 May 2016 17:05:35 -0400 Subject: version 0.95alpha --- src/param.cpp | 878 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 634 insertions(+), 244 deletions(-) (limited to 'src/param.cpp') diff --git a/src/param.cpp b/src/param.cpp index 33b7b48..0a63a16 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -64,7 +64,7 @@ n_accept(0), n_mh(10), geo_mean(2000.0), randseed(-1), -window_cm(0), window_bp(0), window_ns(0), +window_cm(0), window_bp(0), window_ns(0), n_block(200), error(false), ni_subsample(0), n_cvt(1), n_vc(1), time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) @@ -77,19 +77,27 @@ void PARAM::ReadFiles (void) { string file_str; - - if (!file_cat.empty()) { + //read cat file + if (!file_mcat.empty()) { + if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;} + } else if (!file_cat.empty()) { if (ReadFile_cat (file_cat, mapRS2cat, n_vc)==false) {error=true;} } - if (!file_var.empty()) { - if (ReadFile_var (file_var, mapRS2var)==false) {error=true;} + //read snp weight files + if (!file_wcat.empty()) { + if (ReadFile_wsnp (file_wcat, n_vc, mapRS2wcat)==false) {error=true;} + } + if (!file_wsnp.empty()) { + if (ReadFile_wsnp (file_wsnp, mapRS2wsnp)==false) {error=true;} } + //count number of kinship files if (!file_mk.empty()) { if (CountFileLines (file_mk, n_vc)==false) {error=true;} } + //read snp set if (!file_snps.empty()) { if (ReadFile_snps (file_snps, setSnps)==false) {error=true;} } else { @@ -184,10 +192,17 @@ void PARAM::ReadFiles (void) //read genotype and phenotype file for plink format if (!file_bfile.empty()) { file_str=file_bfile+".bim"; + snpInfo.clear(); if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} - file_str=file_bfile+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;} + //if both fam file and pheno files are used, use phenotypes inside the pheno file + if (!file_pheno.empty()) { + //phenotype file before genotype file + if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} + } else { + file_str=file_bfile+".fam"; + if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;} + } //post-process covariates and phenotypes, obtain ni_test, save all useful covariates ProcessCvtPhen(); @@ -228,6 +243,97 @@ void PARAM::ReadFiles (void) ns_total=indicator_snp.size(); } + + //read genotype file for multiple plink files + if (!file_mbfile.empty()) { + igzstream infile (file_mbfile.c_str(), igzstream::in); + if (!infile) {cout<<"error! fail to open mbfile file: "<1) {cout<<"error! missing level needs to be between 0 and 1. current value = "<0.5) {cout<<"error! maf level needs to be between 0 and 0.5. current value = "<size1, ni_test=G->size1; - double di, dj, tr_KiKj, sum_Ki, sum_Kj, s_Ki, s_Kj, s_KiKj, si, sj, d; +//from an existing n by nd A and K matrices, compute the d by d S matrix (which is not necessary symmetric) +void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt, gsl_matrix *S) { + size_t n_vc=S->size1, ni_test=A->size1; + double di, dj, tr_AK, sum_A, sum_K, s_A, s_K, sum_AK, tr_A, tr_K, d; for (size_t i=0; in_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<size2/G->size1, ni_test=G->size1; - gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, n_vc*(n_vc+1)/2*ni_test); - gsl_vector *trKiKjKi=gsl_vector_alloc ( n_vc*n_vc ); + gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, (n_vc*(n_vc+1))/2*ni_test); gsl_vector *trKiKj=gsl_vector_alloc( n_vc*(n_vc+1)/2 ); gsl_vector *trKi=gsl_vector_alloc(n_vc); double d, tr, r=(double)ni_test/(double)(ni_test-1); - size_t t, t_ij, t_il, t_jl, t_ii; + size_t t, t_il, t_jm, t_lm, t_im, t_jl, t_ij; - //compute KiKj for all pairs of i and j (including the identity matrix) + //compute KiKj for all pairs of i and j (not including the identity matrix) t=0; for (size_t i=0; im) { + gsl_blas_ddot (&KiKl_row.vector, &KjKm_row.vector, &d); + tr+=d; + gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d); + tr-=r*d; + gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d); + tr-=r*d; + } else if (i>l && j<=m) { + gsl_blas_ddot (&KiKl_col.vector, &KjKm_col.vector, &d); + tr+=d; + gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d); + tr-=r*d; + gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d); + tr-=r*d; + } else { + gsl_blas_ddot (&KiKl_col.vector, &KjKm_row.vector, &d); + tr+=d; + gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d); + tr-=r*d; + gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d); + tr-=r*d; + } + } - //compute Q - for (size_t i=0; il) { - gsl_blas_ddot (&KiKj_row.vector, &KiKl_row.vector, &d); - tr+=d; - gsl_blas_ddot (&Kj_row.vector, &KiKl_row.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KiKj_col.vector, &d); - tr-=r*d; - } else if (i>j && i<=l) { - gsl_blas_ddot (&KiKj_col.vector, &KiKl_col.vector, &d); - tr+=d; - gsl_blas_ddot (&Kj_row.vector, &KiKl_col.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KiKj_row.vector, &d); - tr-=r*d; - } else { - gsl_blas_ddot (&KiKj_col.vector, &KiKl_row.vector, &d); - tr+=d; - gsl_blas_ddot (&Kj_row.vector, &KiKl_row.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KiKj_row.vector, &d); - tr-=r*d; + tr+=r*r*gsl_vector_get (trKiKj, t_lm); + } else if (l!=n_vc && m==n_vc) { + t_il=GetabIndex (i+1, l+1, n_vc-2); + t_jl=GetabIndex (j+1, l+1, n_vc-2); + tr=0; + for (size_t k=0; ksize1, ni_test=G->size1; - vector > > tr_KiKj, s_KiKj; - vector > sum_Ki, s_Ki, si; +void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A, const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) { + size_t n_vc=Svar->size1, ni_test=A->size1, n_cvt=W->size2; + + vector > > trAK, sumAK; + vector > sumA, sumK, trA, trK, sA, sK; vector vec_tmp; double di, dj, d, m, v; + //gsl_matrix *Stmp=gsl_matrix_alloc (n_vc, ni_test*n_vc); + //gsl_matrix *Stmp_sub=gsl_matrix_alloc (n_vc, n_vc); + //initialize and set all elements to zero for (size_t i=0; i &mapRS2wA, const map &mapRS2wK, const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns) { string file_str; gsl_matrix_set_zero (S); gsl_matrix_set_zero (Svar); - gsl_matrix_set_zero (Q); + gsl_vector_set_zero (ns); //compute the kinship matrix G for multiple categories; these matrices are not centered, for convienence of Jacknife sampling - gsl_matrix *G=gsl_matrix_alloc (ni_test, n_vc*ni_test); - gsl_matrix_set_zero (G); - if (!file_bfile.empty() ) { file_str=file_bfile+".bed"; - if (PlinkKin (file_str, indicator_idv, indicator_snp, a_mode-24, d_pace, mapRS2cat, mapRS2var, snpInfo, G)==false) {error=true;} - } else { + if (mapRS2wA.size()==0) { + if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, mapRS2cat, snpInfo, W, K, ns)==false) {error=true;} + } else { + if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, mapRS2cat, snpInfo, W, A, ns)==false) {error=true;} + } + } else if (!file_geno.empty()) { file_str=file_geno; - if (BimbamKin (file_str, indicator_idv, indicator_snp, a_mode-24, d_pace, mapRS2cat, mapRS2var, snpInfo, G)==false) {error=true;} + if (mapRS2wA.size()==0) { + if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, mapRS2cat, snpInfo, W, K, ns)==false) {error=true;} + } else { + if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, mapRS2cat, snpInfo, W, A, ns)==false) {error=true;} + } + } else if (!file_mbfile.empty() ){ + if (mapRS2wA.size()==0) { + if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp, mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) {error=true;} + } else { + if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp, mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) {error=true;} + } + } else if (!file_mgeno.empty()) { + if (mapRS2wA.size()==0) { + if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp, mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) {error=true;} + } else { + if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp, mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) {error=true;} + } } - //center and scale every kinship matrix inside G - double d; - for (size_t i=0; itm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - - //bootstrap: in each iteration, sample individuals and compute S_pmt - size_t n_pmt=100; - vector idv_order, idv_remove; - for (size_t i=0; i(&idv_remove[0]), n_pmt, static_cast(&idv_order[0]), ni_test, sizeof(size_t)); + //center and scale every kinship matrix inside G + for (size_t i=0; isize2, S); //compute Svar and update S with Jacknife - JacknifeGtoS (G, S, Svar); + JackknifeAKtoS (W, A, K, S, Svar); + + //based on G, compute a matrix Q that can be used to calculate the variance of q + //compKtoV (G, V); - gsl_matrix_free(G); return; } @@ -1223,11 +1388,20 @@ void PARAM::WriteVar (const string suffix) outfile.precision(10); - for (size_t i=0; i &setSnps_beta, map &mapRS2wK) +{ + mapRS2wK.clear(); + + vector wsum, wcount; + + for (size_t i=0; i::iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) { + if (mapRS2cat.size()==0) { + it->second/=wsum[0]; + } else { + it->second/=wsum[mapRS2cat[it->first]]; + } + } + } + return; +} + + +//pve_flag=0 then do not change pve; pve_flag==1, then change pve to 0 if pve < 0 and pve to 1 if pve > 1 +void PARAM::UpdateWeight (const size_t pve_flag, const map &mapRS2wK, const size_t ni_test, const gsl_vector *ns, map &mapRS2wA) +{ + double d; + vector wsum, wcount; + + for (size_t i=0; i::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) { + d=1; + for (size_t i=0; i=1 && pve_flag==1) { + d+=(double)ni_test/gsl_vector_get(ns, i)*mapRS2wcat[it->first][i]; + } else if (v_pve[i]<=0 && pve_flag==1) { + d+=0; + } else { + d+=(double)ni_test/gsl_vector_get(ns, i)*mapRS2wcat[it->first][i]*v_pve[i]; + } + } + mapRS2wA[it->first]=1/(d*d); + + if (mapRS2cat.size()==0) { + wsum[0]+=mapRS2wA[it->first]; + wcount[0]++; + } else { + wsum[mapRS2cat[it->first]]+=mapRS2wA[it->first]; + wcount[mapRS2cat[it->first]]++; + } + } + + for (size_t i=0; i::iterator it=mapRS2wA.begin(); it!=mapRS2wA.end(); ++it) { + if (mapRS2cat.size()==0) { + it->second/=wsum[0]; + } else { + it->second/=wsum[mapRS2cat[it->first]]; + } + } + return; +} + +// this function updates indicator_snp, and save z-scores and other values into vectors +void PARAM::UpdateSNPnZ (const map &mapRS2wA, const map &mapRS2A1, const map &mapRS2z, gsl_vector *w, gsl_vector *z, vector &vec_cat) +{ + gsl_vector_set_zero (w); + gsl_vector_set_zero (z); + vec_cat.clear(); + + string rs, a1; + size_t c=0; + if (msnpInfo.size()==0) { + for (size_t i=0; i &mapRS2wA) +{ + string rs; + if (msnpInfo.size()==0) { + for (size_t i=0; i